Refactor DataSet. Remove suspends where it is possible.

This commit is contained in:
Alexander Nozik 2022-05-04 17:27:56 +03:00
parent bedab0dc86
commit 0622bacc4d
No known key found for this signature in database
GPG Key ID: F7FCF2DD25C71357
26 changed files with 418 additions and 352 deletions

View File

@ -14,6 +14,7 @@
- DataSet `getData` is no longer suspended and renamed to `get`
- DataSet operates with sequences of data instead of flows
- PartialEnvelope uses `Int` instead `UInt`.
- `ActiveDataSet` renamed to `DataSource`
### Deprecated

View File

@ -4,10 +4,7 @@ plugins {
allprojects {
group = "space.kscience"
version = "0.6.0-dev-4"
repositories{
mavenCentral()
}
version = "0.6.0-dev-5"
}
subprojects {

View File

@ -1,6 +1,5 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.CoroutineScope
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
@ -9,13 +8,12 @@ import space.kscience.dataforge.misc.DFExperimental
* A simple data transformation on a data node. Actions should avoid doing actual dependency evaluation in [execute].
*/
public interface Action<in T : Any, out R : Any> {
/**
* Transform the data in the node, producing a new node. By default it is assumed that all calculations are lazy
* Transform the data in the node, producing a new node. By default, it is assumed that all calculations are lazy
* so not actual computation is started at this moment.
*
* [scope] context used to compute the initial result, also it is used for updates propagation
*/
public suspend fun execute(dataSet: DataSet<T>, meta: Meta = Meta.EMPTY, scope: CoroutineScope? = null): DataSet<R>
public fun execute(dataSet: DataSet<T>, meta: Meta = Meta.EMPTY): DataSet<R>
public companion object
}
@ -26,16 +24,17 @@ public interface Action<in T : Any, out R : Any> {
public infix fun <T : Any, I : Any, R : Any> Action<T, I>.then(action: Action<I, R>): Action<T, R> {
// TODO introduce composite action and add optimize by adding action to the list
return object : Action<T, R> {
override suspend fun execute(dataSet: DataSet<T>, meta: Meta, scope: CoroutineScope?): DataSet<R> {
return action.execute(this@then.execute(dataSet, meta, scope), meta, scope)
}
override fun execute(
dataSet: DataSet<T>,
meta: Meta,
): DataSet<R> = action.execute(this@then.execute(dataSet, meta), meta)
}
}
@DFExperimental
public suspend fun <T : Any, R : Any> DataSet<T>.transformWith(
action: Action<T, R>,
public operator fun <T : Any, R : Any> Action<T, R>.invoke(
dataSet: DataSet<T>,
meta: Meta = Meta.EMPTY,
scope: CoroutineScope? = null,
): DataSet<R> = action.execute(this, meta, scope)
): DataSet<R> = execute(dataSet, meta)

View File

@ -0,0 +1,53 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.launch
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.startsWith
import kotlin.reflect.KType
/**
* Remove all values with keys starting with [name]
*/
internal fun MutableMap<Name, *>.removeWhatStartsWith(name: Name) {
val toRemove = keys.filter { it.startsWith(name) }
toRemove.forEach(::remove)
}
/**
* An action that caches results on-demand and recalculates them on source push
*/
public abstract class CachingAction<in T : Any, out R : Any>(
public val outputType: KType,
) : Action<T, R> {
protected abstract fun transform(
set: DataSet<T>,
meta: Meta,
key: Name = Name.EMPTY,
): Sequence<NamedData<R>>
override fun execute(
dataSet: DataSet<T>,
meta: Meta,
): DataSet<R> = if (dataSet is DataSource) {
DataSourceBuilder<R>(outputType, dataSet.coroutineContext).apply {
populateFrom(transform(dataSet, meta))
launch {
dataSet.updates.collect {
//clear old nodes
remove(it)
//collect new items
populateFrom(transform(dataSet, meta, it))
//FIXME if the target is data, updates are fired twice
}
}
}
} else {
DataTree<R>(outputType) {
populateFrom(transform(dataSet, meta))
}
}
}

View File

@ -1,6 +1,5 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.launch
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
@ -59,11 +58,11 @@ internal class MapAction<in T : Any, out R : Any>(
private val block: MapActionBuilder<T, R>.() -> Unit,
) : Action<T, R> {
override suspend fun execute(
override fun execute(
dataSet: DataSet<T>,
meta: Meta,
scope: CoroutineScope?,
): DataSet<R> {
fun mapOne(data: NamedData<T>): NamedData<R> {
// Creating a new environment for action using **old** name, old meta and task meta
val env = ActionEnv(data.name, data.meta, meta)
@ -92,17 +91,23 @@ internal class MapAction<in T : Any, out R : Any>(
val sequence = dataSet.dataSequence().map(::mapOne)
return ActiveDataTree(outputType) {
populateWith(sequence)
scope?.launch {
return if (dataSet is DataSource ) {
ActiveDataTree(outputType, dataSet) {
populateFrom(sequence)
launch {
dataSet.updates.collect { name ->
//clear old nodes
remove(name)
//collect new items
populateWith(dataSet.children(name).map(::mapOne))
populateFrom(dataSet.children(name).map(::mapOne))
}
}
}
} else {
DataTree(outputType) {
populateFrom(sequence)
}
}
}
}

View File

@ -1,8 +1,5 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
@ -38,18 +35,17 @@ public class JoinGroup<T : Any, R : Any>(
@DFBuilder
public class ReduceGroupBuilder<T : Any, R : Any>(
private val scope: CoroutineScope,
public val actionMeta: Meta,
private val outputType: KType
private val outputType: KType,
) {
private val groupRules: MutableList<suspend (DataSet<T>) -> List<JoinGroup<T, R>>> = ArrayList();
private val groupRules: MutableList<(DataSet<T>) -> List<JoinGroup<T, R>>> = ArrayList();
/**
* introduce grouping by meta value
*/
public fun byValue(tag: String, defaultTag: String = "@default", action: JoinGroup<T, R>.() -> Unit) {
groupRules += { node ->
GroupRule.byMetaValue(scope, tag, defaultTag).gather(node).map {
GroupRule.byMetaValue(tag, defaultTag).gather(node).map {
JoinGroup<T, R>(it.key, it.value, outputType).apply(action)
}
}
@ -57,12 +53,12 @@ public class ReduceGroupBuilder<T : Any, R : Any>(
public fun group(
groupName: String,
filter: (Name, Data<T>) -> Boolean,
predicate: (Name, Meta) -> Boolean,
action: JoinGroup<T, R>.() -> Unit,
) {
groupRules += { source ->
listOf(
JoinGroup<T, R>(groupName, source.filter(filter), outputType).apply(action)
JoinGroup<T, R>(groupName, source.filter(predicate), outputType).apply(action)
)
}
}
@ -76,7 +72,7 @@ public class ReduceGroupBuilder<T : Any, R : Any>(
}
}
internal suspend fun buildGroups(input: DataSet<T>): List<JoinGroup<T, R>> =
internal fun buildGroups(input: DataSet<T>): List<JoinGroup<T, R>> =
groupRules.flatMap { it.invoke(input) }
}
@ -89,8 +85,8 @@ internal class ReduceAction<T : Any, R : Any>(
//TODO optimize reduction. Currently the whole action recalculates on push
override fun CoroutineScope.transform(set: DataSet<T>, meta: Meta, key: Name): Flow<NamedData<R>> = flow {
ReduceGroupBuilder<T, R>(this@transform, meta, outputType).apply(action).buildGroups(set).forEach { group ->
override fun transform(set: DataSet<T>, meta: Meta, key: Name): Sequence<NamedData<R>> = sequence {
ReduceGroupBuilder<T, R>(meta, outputType).apply(action).buildGroups(set).forEach { group ->
val dataFlow: Map<Name, Data<T>> = group.set.dataSequence().fold(HashMap()) { acc, value ->
acc.apply {
acc[value.name] = value.data
@ -107,7 +103,7 @@ internal class ReduceAction<T : Any, R : Any>(
meta = groupMeta
) { group.result.invoke(env, it) }
emit(res.named(env.name))
yield(res.named(env.name))
}
}
}

View File

@ -1,6 +1,5 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.launch
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Laminate
@ -51,10 +50,9 @@ internal class SplitAction<T : Any, R : Any>(
private val action: SplitBuilder<T, R>.() -> Unit,
) : Action<T, R> {
override suspend fun execute(
override fun execute(
dataSet: DataSet<T>,
meta: Meta,
scope: CoroutineScope?,
): DataSet<R> {
fun splitOne(data: NamedData<T>): Sequence<NamedData<R>> {
@ -77,17 +75,23 @@ internal class SplitAction<T : Any, R : Any>(
}
}
return ActiveDataTree<R>(outputType) {
populateWith(dataSet.dataSequence().flatMap(transform = ::splitOne))
scope?.launch {
return if (dataSet is DataSource) {
ActiveDataTree<R>(outputType, dataSet) {
populateFrom(dataSet.dataSequence().flatMap(transform = ::splitOne))
launch {
dataSet.updates.collect { name ->
//clear old nodes
remove(name)
//collect new items
populateWith(dataSet.children(name).flatMap(transform = ::splitOne))
populateFrom(dataSet.children(name).flatMap(transform = ::splitOne))
}
}
}
} else {
DataTree<R>(outputType) {
populateFrom(dataSet.dataSequence().flatMap(transform = ::splitOne))
}
}
}
}

View File

@ -1,103 +0,0 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import space.kscience.dataforge.meta.*
import space.kscience.dataforge.names.*
import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* A mutable [DataTree].
*/
public class ActiveDataTree<T : Any>(
override val dataType: KType,
) : DataTree<T>, DataSetBuilder<T>, ActiveDataSet<T> {
private val mutex = Mutex()
private val treeItems = HashMap<NameToken, DataTreeItem<T>>()
override val items: Map<NameToken, DataTreeItem<T>>
get() = treeItems.filter { !it.key.body.startsWith("@") }
private val _updates = MutableSharedFlow<Name>()
override val updates: Flow<Name>
get() = _updates
private suspend fun remove(token: NameToken) = mutex.withLock {
if (treeItems.remove(token) != null) {
_updates.emit(token.asName())
}
}
override suspend fun remove(name: Name) {
if (name.isEmpty()) error("Can't remove the root node")
(getItem(name.cutLast()).tree as? ActiveDataTree)?.remove(name.lastOrNull()!!)
}
private suspend fun set(token: NameToken, data: Data<T>) = mutex.withLock {
treeItems[token] = DataTreeItem.Leaf(data)
}
private suspend fun getOrCreateNode(token: NameToken): ActiveDataTree<T> =
(treeItems[token] as? DataTreeItem.Node<T>)?.tree as? ActiveDataTree<T>
?: ActiveDataTree<T>(dataType).also {
mutex.withLock {
treeItems[token] = DataTreeItem.Node(it)
}
}
private suspend fun getOrCreateNode(name: Name): ActiveDataTree<T> = when (name.length) {
0 -> this
1 -> getOrCreateNode(name.firstOrNull()!!)
else -> getOrCreateNode(name.firstOrNull()!!).getOrCreateNode(name.cutFirst())
}
override suspend fun data(name: Name, data: Data<T>?) {
if (data == null) {
remove(name)
} else {
when (name.length) {
0 -> error("Can't add data with empty name")
1 -> set(name.firstOrNull()!!, data)
2 -> getOrCreateNode(name.cutLast()).set(name.lastOrNull()!!, data)
}
}
_updates.emit(name)
}
override suspend fun meta(name: Name, meta: Meta) {
val item = getItem(name)
if(item is DataTreeItem.Leaf) error("TODO: Can't change meta of existing leaf item.")
data(name + DataTree.META_ITEM_NAME_TOKEN, Data.empty(meta))
}
}
/**
* Create a dynamic tree. Initial data is placed synchronously. Updates are propagated via [updatesScope]
*/
@Suppress("FunctionName")
public suspend fun <T : Any> ActiveDataTree(
type: KType,
block: suspend ActiveDataTree<T>.() -> Unit,
): ActiveDataTree<T> {
val tree = ActiveDataTree<T>(type)
tree.block()
return tree
}
@Suppress("FunctionName")
public suspend inline fun <reified T : Any> ActiveDataTree(
crossinline block: suspend ActiveDataTree<T>.() -> Unit,
): ActiveDataTree<T> = ActiveDataTree<T>(typeOf<T>()).apply { block() }
public suspend inline fun <reified T : Any> ActiveDataTree<T>.emit(
name: Name,
noinline block: suspend ActiveDataTree<T>.() -> Unit,
): Unit = node(name, ActiveDataTree(typeOf<T>(), block))
public suspend inline fun <reified T : Any> ActiveDataTree<T>.emit(
name: String,
noinline block: suspend ActiveDataTree<T>.() -> Unit,
): Unit = node(Name.parse(name), ActiveDataTree(typeOf<T>(), block))

View File

@ -1,51 +0,0 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.flow.Flow
import space.kscience.dataforge.actions.Action
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.startsWith
import kotlin.reflect.KType
/**
* Remove all values with keys starting with [name]
*/
internal fun MutableMap<Name, *>.removeWhatStartsWith(name: Name) {
val toRemove = keys.filter { it.startsWith(name) }
toRemove.forEach(::remove)
}
/**
* An action that caches results on-demand and recalculates them on source push
*/
public abstract class CachingAction<in T : Any, out R : Any>(
public val outputType: KType,
) : Action<T, R> {
protected abstract fun CoroutineScope.transform(
set: DataSet<T>,
meta: Meta,
key: Name = Name.EMPTY,
): Flow<NamedData<R>>
override suspend fun execute(
dataSet: DataSet<T>,
meta: Meta,
scope: CoroutineScope?,
): DataSet<R> = ActiveDataTree<R>(outputType) {
coroutineScope {
populateWith(transform(dataSet, meta))
}
scope?.let {
dataSet.updates.collect {
//clear old nodes
remove(it)
//collect new items
populateWith(scope.transform(dataSet, meta, it))
//FIXME if the target is data, updates are fired twice
}
}
}
}

View File

@ -62,7 +62,11 @@ DataSet<out T : Any> {
public operator fun <T: Any> DataSet<T>.get(name:String): Data<T>? = get(name.parseAsName())
public interface ActiveDataSet<T : Any> : DataSet<T> {
/**
* A [DataSet] with propagated updates.
*/
public interface DataSource<T : Any> : DataSet<T>, CoroutineScope {
/**
* A flow of updated item names. Updates are propagated in a form of [Flow] of names of updated nodes.
* Those can include new data items and replacement of existing ones. The replaced items could update existing data content
@ -70,9 +74,16 @@ public interface ActiveDataSet<T : Any> : DataSet<T> {
*
*/
public val updates: Flow<Name>
/**
* Stop generating updates from this [DataSource]
*/
public fun close(){
coroutineContext[Job]?.cancel()
}
}
public val <T : Any> DataSet<T>.updates: Flow<Name> get() = if (this is ActiveDataSet) updates else emptyFlow()
public val <T : Any> DataSet<T>.updates: Flow<Name> get() = if (this is DataSource) updates else emptyFlow()
/**
* Flow all data nodes with names starting with [branchName]

View File

@ -1,7 +1,5 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.flow.Flow
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
import space.kscience.dataforge.misc.DFExperimental
@ -16,14 +14,14 @@ public interface DataSetBuilder<in T : Any> {
/**
* Remove all data items starting with [name]
*/
public suspend fun remove(name: Name)
public fun remove(name: Name)
public suspend fun data(name: Name, data: Data<T>?)
public fun data(name: Name, data: Data<T>?)
/**
* Set a current state of given [dataSet] into a branch [name]. Does not propagate updates
*/
public suspend fun node(name: Name, dataSet: DataSet<T>) {
public fun node(name: Name, dataSet: DataSet<T>) {
//remove previous items
if (name != Name.EMPTY) {
remove(name)
@ -38,19 +36,19 @@ public interface DataSetBuilder<in T : Any> {
/**
* Set meta for the given node
*/
public suspend fun meta(name: Name, meta: Meta)
public fun meta(name: Name, meta: Meta)
}
/**
* Define meta in this [DataSet]
*/
public suspend fun <T : Any> DataSetBuilder<T>.meta(value: Meta): Unit = meta(Name.EMPTY, value)
public fun <T : Any> DataSetBuilder<T>.meta(value: Meta): Unit = meta(Name.EMPTY, value)
/**
* Define meta in this [DataSet]
*/
public suspend fun <T : Any> DataSetBuilder<T>.meta(mutableMeta: MutableMeta.() -> Unit): Unit = meta(Meta(mutableMeta))
public fun <T : Any> DataSetBuilder<T>.meta(mutableMeta: MutableMeta.() -> Unit): Unit = meta(Meta(mutableMeta))
@PublishedApi
internal class SubSetBuilder<in T : Any>(
@ -59,52 +57,52 @@ internal class SubSetBuilder<in T : Any>(
) : DataSetBuilder<T> {
override val dataType: KType get() = parent.dataType
override suspend fun remove(name: Name) {
override fun remove(name: Name) {
parent.remove(branch + name)
}
override suspend fun data(name: Name, data: Data<T>?) {
override fun data(name: Name, data: Data<T>?) {
parent.data(branch + name, data)
}
override suspend fun node(name: Name, dataSet: DataSet<T>) {
override fun node(name: Name, dataSet: DataSet<T>) {
parent.node(branch + name, dataSet)
}
override suspend fun meta(name: Name, meta: Meta) {
override fun meta(name: Name, meta: Meta) {
parent.meta(branch + name, meta)
}
}
public suspend inline fun <T : Any> DataSetBuilder<T>.node(
public inline fun <T : Any> DataSetBuilder<T>.node(
name: Name,
crossinline block: suspend DataSetBuilder<T>.() -> Unit,
crossinline block: DataSetBuilder<T>.() -> Unit,
) {
if (name.isEmpty()) block() else SubSetBuilder(this, name).block()
}
public suspend fun <T : Any> DataSetBuilder<T>.data(name: String, value: Data<T>) {
public fun <T : Any> DataSetBuilder<T>.data(name: String, value: Data<T>) {
data(Name.parse(name), value)
}
public suspend fun <T : Any> DataSetBuilder<T>.node(name: String, set: DataSet<T>) {
public fun <T : Any> DataSetBuilder<T>.node(name: String, set: DataSet<T>) {
node(Name.parse(name), set)
}
public suspend inline fun <T : Any> DataSetBuilder<T>.node(
public inline fun <T : Any> DataSetBuilder<T>.node(
name: String,
crossinline block: suspend DataSetBuilder<T>.() -> Unit,
crossinline block: DataSetBuilder<T>.() -> Unit,
): Unit = node(Name.parse(name), block)
public suspend fun <T : Any> DataSetBuilder<T>.set(value: NamedData<T>) {
public fun <T : Any> DataSetBuilder<T>.set(value: NamedData<T>) {
data(value.name, value.data)
}
/**
* Produce lazy [Data] and emit it into the [DataSetBuilder]
*/
public suspend inline fun <reified T : Any> DataSetBuilder<T>.produce(
public inline fun <reified T : Any> DataSetBuilder<T>.produce(
name: String,
meta: Meta = Meta.EMPTY,
noinline producer: suspend () -> T,
@ -113,7 +111,7 @@ public suspend inline fun <reified T : Any> DataSetBuilder<T>.produce(
data(name, data)
}
public suspend inline fun <reified T : Any> DataSetBuilder<T>.produce(
public inline fun <reified T : Any> DataSetBuilder<T>.produce(
name: Name,
meta: Meta = Meta.EMPTY,
noinline producer: suspend () -> T,
@ -125,19 +123,19 @@ public suspend inline fun <reified T : Any> DataSetBuilder<T>.produce(
/**
* Emit a static data with the fixed value
*/
public suspend inline fun <reified T : Any> DataSetBuilder<T>.static(
public inline fun <reified T : Any> DataSetBuilder<T>.static(
name: String,
data: T,
meta: Meta = Meta.EMPTY,
): Unit = data(name, Data.static(data, meta))
public suspend inline fun <reified T : Any> DataSetBuilder<T>.static(
public inline fun <reified T : Any> DataSetBuilder<T>.static(
name: Name,
data: T,
meta: Meta = Meta.EMPTY,
): Unit = data(name, Data.static(data, meta))
public suspend inline fun <reified T : Any> DataSetBuilder<T>.static(
public inline fun <reified T : Any> DataSetBuilder<T>.static(
name: String,
data: T,
mutableMeta: MutableMeta.() -> Unit,
@ -147,20 +145,20 @@ public suspend inline fun <reified T : Any> DataSetBuilder<T>.static(
* Update data with given node data and meta with node meta.
*/
@DFExperimental
public suspend fun <T : Any> DataSetBuilder<T>.populateFrom(tree: DataSet<T>): Unit = coroutineScope {
public fun <T : Any> DataSetBuilder<T>.populateFrom(tree: DataSet<T>): Unit {
tree.dataSequence().forEach {
//TODO check if the place is occupied
data(it.name, it.data)
}
}
public suspend fun <T : Any> DataSetBuilder<T>.populateWith(flow: Flow<NamedData<T>>) {
flow.collect {
data(it.name, it.data)
}
}
//public fun <T : Any> DataSetBuilder<T>.populateFrom(flow: Flow<NamedData<T>>) {
// flow.collect {
// data(it.name, it.data)
// }
//}
public suspend fun <T : Any> DataSetBuilder<T>.populateWith(sequence: Sequence<NamedData<T>>) {
public fun <T : Any> DataSetBuilder<T>.populateFrom(sequence: Sequence<NamedData<T>>) {
sequence.forEach {
data(it.name, it.data)
}

View File

@ -0,0 +1,122 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Job
import kotlinx.coroutines.flow.MutableSharedFlow
import kotlinx.coroutines.flow.SharedFlow
import kotlinx.coroutines.launch
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.*
import kotlin.collections.set
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.coroutineContext
import kotlin.jvm.Synchronized
import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* A mutable [DataTree] that propagates updates
*/
public class DataSourceBuilder<T : Any>(
override val dataType: KType,
coroutineContext: CoroutineContext,
) : DataTree<T>, DataSetBuilder<T>, DataSource<T> {
override val coroutineContext: CoroutineContext =
coroutineContext + Job(coroutineContext[Job]) + GoalExecutionRestriction()
private val treeItems = HashMap<NameToken, DataTreeItem<T>>()
override val items: Map<NameToken, DataTreeItem<T>>
get() = treeItems.filter { !it.key.body.startsWith("@") }
private val _updates = MutableSharedFlow<Name>()
override val updates: SharedFlow<Name>
get() = _updates
@Synchronized
private fun remove(token: NameToken) {
if (treeItems.remove(token) != null) {
launch {
_updates.emit(token.asName())
}
}
}
override fun remove(name: Name) {
if (name.isEmpty()) error("Can't remove the root node")
(getItem(name.cutLast()).tree as? DataSourceBuilder)?.remove(name.lastOrNull()!!)
}
@Synchronized
private fun set(token: NameToken, data: Data<T>) {
treeItems[token] = DataTreeItem.Leaf(data)
}
@Synchronized
private fun set(token: NameToken, node: DataTree<T>) {
treeItems[token] = DataTreeItem.Node(node)
}
private fun getOrCreateNode(token: NameToken): DataSourceBuilder<T> =
(treeItems[token] as? DataTreeItem.Node<T>)?.tree as? DataSourceBuilder<T>
?: DataSourceBuilder<T>(dataType, coroutineContext).also { set(token, it) }
private fun getOrCreateNode(name: Name): DataSourceBuilder<T> = when (name.length) {
0 -> this
1 -> getOrCreateNode(name.firstOrNull()!!)
else -> getOrCreateNode(name.firstOrNull()!!).getOrCreateNode(name.cutFirst())
}
override fun data(name: Name, data: Data<T>?) {
if (data == null) {
remove(name)
} else {
when (name.length) {
0 -> error("Can't add data with empty name")
1 -> set(name.firstOrNull()!!, data)
2 -> getOrCreateNode(name.cutLast()).set(name.lastOrNull()!!, data)
}
}
launch {
_updates.emit(name)
}
}
override fun meta(name: Name, meta: Meta) {
val item = getItem(name)
if (item is DataTreeItem.Leaf) error("TODO: Can't change meta of existing leaf item.")
data(name + DataTree.META_ITEM_NAME_TOKEN, Data.empty(meta))
}
}
/**
* Create a dynamic tree. Initial data is placed synchronously.
*/
@Suppress("FunctionName")
public fun <T : Any> ActiveDataTree(
type: KType,
parent: CoroutineScope,
block: DataSourceBuilder<T>.() -> Unit,
): DataSourceBuilder<T> {
val tree = DataSourceBuilder<T>(type, parent.coroutineContext)
tree.block()
return tree
}
@Suppress("FunctionName")
public suspend inline fun <reified T : Any> ActiveDataTree(
crossinline block: DataSourceBuilder<T>.() -> Unit = {},
): DataSourceBuilder<T> = DataSourceBuilder<T>(typeOf<T>(), coroutineContext).apply { block() }
public inline fun <reified T : Any> DataSourceBuilder<T>.emit(
name: Name,
parent: CoroutineScope,
noinline block: DataSourceBuilder<T>.() -> Unit,
): Unit = node(name, ActiveDataTree(typeOf<T>(), parent, block))
public inline fun <reified T : Any> DataSourceBuilder<T>.emit(
name: String,
parent: CoroutineScope,
noinline block: DataSourceBuilder<T>.() -> Unit,
): Unit = node(Name.parse(name), ActiveDataTree(typeOf<T>(), parent, block))

View File

@ -15,13 +15,12 @@
*/
package space.kscience.dataforge.data
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.launch
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.meta.string
public interface GroupRule {
public suspend fun <T : Any> gather(set: DataSet<T>): Map<String, DataSet<T>>
public fun <T : Any> gather(set: DataSet<T>): Map<String, DataSet<T>>
public companion object {
/**
@ -33,30 +32,42 @@ public interface GroupRule {
* @return
*/
public fun byMetaValue(
scope: CoroutineScope,
key: String,
defaultTagValue: String,
): GroupRule = object : GroupRule {
override suspend fun <T : Any> gather(
override fun <T : Any> gather(
set: DataSet<T>,
): Map<String, DataSet<T>> {
val map = HashMap<String, ActiveDataTree<T>>()
val map = HashMap<String, DataSet<T>>()
if (set is DataSource) {
set.dataSequence().forEach { data ->
val tagValue = data.meta[key]?.string ?: defaultTagValue
map.getOrPut(tagValue) { ActiveDataTree(set.dataType) }.data(data.name, data.data)
}
val tagValue: String = data.meta[key]?.string ?: defaultTagValue
(map.getOrPut(tagValue) { DataSourceBuilder(set.dataType, set.coroutineContext) } as DataSourceBuilder<T>)
.data(data.name, data.data)
scope.launch {
set.launch {
set.updates.collect { name ->
val data = set.get(name)
val dataUpdate = set[name]
@Suppress("NULLABLE_EXTENSION_OPERATOR_WITH_SAFE_CALL_RECEIVER")
val tagValue = data?.meta?.get(key)?.string ?: defaultTagValue
map.getOrPut(tagValue) { ActiveDataTree(set.dataType) }.data(name, data)
val updateTagValue = dataUpdate?.meta?.get(key)?.string ?: defaultTagValue
map.getOrPut(updateTagValue) {
ActiveDataTree(set.dataType, this) {
data(name, dataUpdate)
}
}
}
}
}
} else {
set.dataSequence().forEach { data ->
val tagValue: String = data.meta[key]?.string ?: defaultTagValue
(map.getOrPut(tagValue) { StaticDataTree(set.dataType) } as StaticDataTree<T>)
.data(data.name, data.data)
}
}
return map
}

View File

@ -1,6 +1,5 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.coroutineScope
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.*
@ -17,7 +16,7 @@ internal class StaticDataTree<T : Any>(
override val items: Map<NameToken, DataTreeItem<T>>
get() = _items.filter { !it.key.body.startsWith("@") }
override suspend fun remove(name: Name) {
override fun remove(name: Name) {
when (name.length) {
0 -> error("Can't remove root tree node")
1 -> _items.remove(name.firstOrNull()!!)
@ -36,7 +35,7 @@ internal class StaticDataTree<T : Any>(
else -> getOrCreateNode(name.cutLast()).getOrCreateNode(name.lastOrNull()!!.asName())
}
private suspend fun set(name: Name, item: DataTreeItem<T>?) {
private fun set(name: Name, item: DataTreeItem<T>?) {
if (name.isEmpty()) error("Can't set top level tree node")
if (item == null) {
remove(name)
@ -45,23 +44,21 @@ internal class StaticDataTree<T : Any>(
}
}
override suspend fun data(name: Name, data: Data<T>?) {
override fun data(name: Name, data: Data<T>?) {
set(name, data?.let { DataTreeItem.Leaf(it) })
}
override suspend fun node(name: Name, dataSet: DataSet<T>) {
override fun node(name: Name, dataSet: DataSet<T>) {
if (dataSet is StaticDataTree) {
set(name, DataTreeItem.Node(dataSet))
} else {
coroutineScope {
dataSet.dataSequence().forEach {
data(name + it.name, it.data)
}
}
}
}
override suspend fun meta(name: Name, meta: Meta) {
override fun meta(name: Name, meta: Meta) {
val item = getItem(name)
if (item is DataTreeItem.Leaf) TODO("Can't change meta of existing leaf item.")
data(name + DataTree.META_ITEM_NAME_TOKEN, Data.empty(meta))
@ -69,17 +66,17 @@ internal class StaticDataTree<T : Any>(
}
@Suppress("FunctionName")
public suspend fun <T : Any> DataTree(
public inline fun <T : Any> DataTree(
dataType: KType,
block: suspend DataSetBuilder<T>.() -> Unit,
block: DataSetBuilder<T>.() -> Unit,
): DataTree<T> = StaticDataTree<T>(dataType).apply { block() }
@Suppress("FunctionName")
public suspend inline fun <reified T : Any> DataTree(
noinline block: suspend DataSetBuilder<T>.() -> Unit,
public inline fun <reified T : Any> DataTree(
noinline block: DataSetBuilder<T>.() -> Unit,
): DataTree<T> = DataTree(typeOf<T>(), block)
@OptIn(DFExperimental::class)
public suspend fun <T : Any> DataSet<T>.seal(): DataTree<T> = DataTree(dataType) {
public fun <T : Any> DataSet<T>.seal(): DataTree<T> = DataTree(dataType) {
populateFrom(this@seal)
}

View File

@ -10,6 +10,8 @@ import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.isEmpty
import space.kscience.dataforge.names.plus
import space.kscience.dataforge.names.removeHeadOrNull
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.EmptyCoroutineContext
import kotlin.reflect.KType
@ -17,34 +19,42 @@ import kotlin.reflect.KType
* A stateless filtered [DataSet]
*/
public fun <T : Any> DataSet<T>.filter(
predicate: (Name, Data<T>) -> Boolean,
): ActiveDataSet<T> = object : ActiveDataSet<T> {
predicate: (Name, Meta) -> Boolean,
): DataSource<T> = object : DataSource<T> {
override val dataType: KType get() = this@filter.dataType
override val coroutineContext: CoroutineContext
get() = (this@filter as? DataSource)?.coroutineContext ?: EmptyCoroutineContext
override val meta: Meta get() = this@filter.meta
override fun dataSequence(): Sequence<NamedData<T>> =
this@filter.dataSequence().filter { predicate(it.name, it.data) }
this@filter.dataSequence().filter { predicate(it.name, it.meta) }
override fun get(name: Name): Data<T>? = this@filter.get(name)?.takeIf {
predicate(name, it)
predicate(name, it.meta)
}
override val updates: Flow<Name> = this@filter.updates.filter flowFilter@{ name ->
val theData = this@filter.get(name) ?: return@flowFilter false
predicate(name, theData)
val theData = this@filter[name] ?: return@flowFilter false
predicate(name, theData.meta)
}
}
/**
* Generate a wrapper data set with a given name prefix appended to all names
*/
public fun <T : Any> DataSet<T>.withNamePrefix(prefix: Name): DataSet<T> = if (prefix.isEmpty()) this
else object : ActiveDataSet<T> {
public fun <T : Any> DataSet<T>.withNamePrefix(prefix: Name): DataSet<T> = if (prefix.isEmpty()) {
this
} else object : DataSource<T> {
override val dataType: KType get() = this@withNamePrefix.dataType
override val coroutineContext: CoroutineContext
get() = (this@withNamePrefix as? DataSource)?.coroutineContext ?: EmptyCoroutineContext
override val meta: Meta get() = this@withNamePrefix.meta
@ -62,9 +72,12 @@ else object : ActiveDataSet<T> {
*/
public fun <T : Any> DataSet<T>.branch(branchName: Name): DataSet<T> = if (branchName.isEmpty()) {
this
} else object : ActiveDataSet<T> {
} else object : DataSource<T> {
override val dataType: KType get() = this@branch.dataType
override val coroutineContext: CoroutineContext
get() = (this@branch as? DataSource)?.coroutineContext ?: EmptyCoroutineContext
override val meta: Meta get() = this@branch.meta
override fun dataSequence(): Sequence<NamedData<T>> = this@branch.dataSequence().mapNotNull {

View File

@ -144,7 +144,7 @@ public suspend fun <T : Any, R : Any> DataSet<T>.map(
metaTransform: MutableMeta.() -> Unit = {},
block: suspend (T) -> R,
): DataTree<R> = DataTree<R>(outputType) {
populateWith(
populateFrom(
dataSequence().map {
val newMeta = it.meta.toMutableMeta().apply(metaTransform).seal()
Data(outputType, newMeta, coroutineContext, listOf(it)) {

View File

@ -0,0 +1,2 @@
package space.kscience.dataforge.data

View File

@ -6,6 +6,8 @@ import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.matches
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.EmptyCoroutineContext
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.typeOf
@ -28,46 +30,47 @@ private fun <R : Any> Data<*>.castOrNull(type: KType): Data<R>? =
* Select all data matching given type and filters. Does not modify paths
*
* @param namePattern a name match patter according to [Name.matches]
* @param filter addition filtering condition based on item name and meta. By default, accepts all
* @param predicate addition filtering condition based on item name and meta. By default, accepts all
*/
@OptIn(DFExperimental::class)
public fun <R : Any> DataSet<*>.select(
public fun <R : Any> DataSet<*>.filterIsInstance(
type: KType,
namePattern: Name? = null,
filter: (name: Name, meta: Meta) -> Boolean = { _, _ -> true },
): ActiveDataSet<R> = object : ActiveDataSet<R> {
predicate: (name: Name, meta: Meta) -> Boolean = { _, _ -> true },
): DataSource<R> = object : DataSource<R> {
override val dataType = type
override val meta: Meta get() = this@select.meta
override val coroutineContext: CoroutineContext
get() = (this@filterIsInstance as? DataSource)?.coroutineContext ?: EmptyCoroutineContext
override val meta: Meta get() = this@filterIsInstance.meta
private fun checkDatum(name: Name, datum: Data<*>): Boolean = datum.type.isSubtypeOf(type)
&& (namePattern == null || name.matches(namePattern))
&& filter(name, datum.meta)
&& predicate(name, datum.meta)
override fun dataSequence(): Sequence<NamedData<R>> = this@select.dataSequence().filter {
override fun dataSequence(): Sequence<NamedData<R>> = this@filterIsInstance.dataSequence().filter {
checkDatum(it.name, it.data)
}.map {
@Suppress("UNCHECKED_CAST")
it as NamedData<R>
}
override fun get(name: Name): Data<R>? = this@select[name]?.let { datum ->
override fun get(name: Name): Data<R>? = this@filterIsInstance[name]?.let { datum ->
if (checkDatum(name, datum)) datum.castOrNull(type) else null
}
override val updates: Flow<Name> = this@select.updates.filter {
val datum = this@select[it] ?: return@filter false
checkDatum(it, datum)
override val updates: Flow<Name> = this@filterIsInstance.updates.filter { name ->
get(name)?.let { datum ->
checkDatum(name, datum)
} ?: false
}
}
/**
* Select a single datum of the appropriate type
*/
public inline fun <reified R : Any> DataSet<*>.select(
namePattern: Name? = null,
noinline filter: (name: Name, meta: Meta) -> Boolean = { _, _ -> true },
): DataSet<R> = select(typeOf<R>(), namePattern, filter)
public inline fun <reified R : Any> DataSet<*>.filterIsInstance(
noinline predicate: (name: Name, meta: Meta) -> Boolean = { _, _ -> true },
): DataSet<R> = filterIsInstance(typeOf<R>(), predicate)
/**
* Select a single datum if it is present and of given [type]

View File

@ -10,24 +10,24 @@ import space.kscience.dataforge.names.plus
/**
* Append data to node
*/
context(DataSetBuilder<T>) public suspend infix fun <T : Any> String.put(data: Data<T>): Unit =
context(DataSetBuilder<T>) public infix fun <T : Any> String.put(data: Data<T>): Unit =
data(Name.parse(this), data)
/**
* Append node
*/
context(DataSetBuilder<T>) public suspend infix fun <T : Any> String.put(dataSet: DataSet<T>): Unit =
context(DataSetBuilder<T>) public infix fun <T : Any> String.put(dataSet: DataSet<T>): Unit =
node(Name.parse(this), dataSet)
/**
* Build and append node
*/
context(DataSetBuilder<T>) public suspend infix fun <T : Any> String.put(
block: suspend DataSetBuilder<T>.() -> Unit,
context(DataSetBuilder<T>) public infix fun <T : Any> String.put(
block: DataSetBuilder<T>.() -> Unit,
): Unit = node(Name.parse(this), block)
/**
* Copy given data set and mirror its changes to this [ActiveDataTree] in [this@setAndObserve]. Returns an update [Job]
* Copy given data set and mirror its changes to this [DataSourceBuilder] in [this@setAndObserve]. Returns an update [Job]
*/
context(DataSetBuilder<T>) public fun <T : Any> CoroutineScope.setAndWatch(
name: Name,

View File

@ -1,44 +1,49 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.delay
import kotlinx.coroutines.test.runTest
import org.junit.jupiter.api.Test
import space.kscience.dataforge.actions.Action
import space.kscience.dataforge.actions.invoke
import space.kscience.dataforge.actions.map
import space.kscience.dataforge.misc.DFExperimental
import kotlin.test.assertEquals
@OptIn(DFExperimental::class)
internal class ActionsTest {
private val data: DataTree<Int> = runBlocking {
DataTree {
@Test
fun testStaticMapAction() = runTest {
val data: DataTree<Int> = DataTree {
repeat(10) {
static(it.toString(), it)
}
}
}
@Test
fun testStaticMapAction() {
val plusOne = Action.map<Int, Int> {
result { it + 1 }
}
runBlocking {
val result = plusOne.execute(data)
val result = plusOne(data)
assertEquals(2, result["1"]?.await())
}
}
@Test
fun testDynamicMapAction() {
fun testDynamicMapAction() = runTest {
val data: DataSourceBuilder<Int> = ActiveDataTree()
val plusOne = Action.map<Int, Int> {
result { it + 1 }
}
val datum = runBlocking {
val result = plusOne.execute(data, scope = this)
result["1"]?.await()
val result = plusOne(data)
repeat(10) {
data.static(it.toString(), it)
}
assertEquals(2, datum)
delay(20)
assertEquals(2, result["1"]?.await())
data.close()
}
}

View File

@ -46,8 +46,8 @@ internal class DataTreeBuilderTest {
}
runBlocking {
assertEquals("a", node.get("update.a")?.await())
assertEquals("a", node.get("primary.a")?.await())
assertEquals("a", node["update.a"]?.await())
assertEquals("a", node["primary.a"]?.await())
}
}

View File

@ -1,12 +1,10 @@
package space.kscience.dataforge.workspace
import kotlinx.coroutines.CoroutineScope
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.ContextBuilder
import space.kscience.dataforge.context.Global
import space.kscience.dataforge.data.ActiveDataTree
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.data.DataSetBuilder
import space.kscience.dataforge.data.DataTree
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MetaRepr
import space.kscience.dataforge.meta.MutableMeta
@ -17,8 +15,11 @@ import space.kscience.dataforge.misc.DFBuilder
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.asName
import kotlin.collections.HashMap
import kotlin.collections.set
import kotlin.properties.PropertyDelegateProvider
import kotlin.properties.ReadOnlyProperty
import kotlin.reflect.typeOf
public data class TaskReference<T : Any>(public val taskName: Name, public val task: Task<T>) : DataSelector<T> {
@ -100,13 +101,13 @@ public class WorkspaceBuilder(private val parentContext: Context = Global) : Tas
/**
* Define intrinsic data for the workspace
*/
public suspend fun buildData(builder: suspend DataSetBuilder<Any>.() -> Unit) {
public fun data(builder: DataSetBuilder<Any>.() -> Unit) {
data = DataTree(builder)
}
@DFExperimental
public suspend fun buildActiveData(builder: suspend ActiveDataTree<Any>.() -> Unit) {
data = ActiveDataTree(builder)
public fun buildActiveData(scope: CoroutineScope, builder: DataSourceBuilder<Any>.() -> Unit) {
data = ActiveDataTree(typeOf<Any>(), scope, builder)
}
/**

View File

@ -1,21 +1,24 @@
package space.kscience.dataforge.workspace
import kotlinx.coroutines.runBlocking
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.data.DataSetBuilder
import space.kscience.dataforge.data.select
import space.kscience.dataforge.data.filterIsInstance
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.matches
public fun WorkspaceBuilder.data(builder: suspend DataSetBuilder<Any>.() -> Unit): Unit = runBlocking {
buildData(builder)
//public fun WorkspaceBuilder.data(builder: DataSetBuilder<Any>.() -> Unit): Unit = runBlocking {
// data(builder)
//}
public inline fun <reified T : Any> TaskResultBuilder<*>.data(namePattern: Name? = null): DataSelector<T> =
object : DataSelector<T> {
override suspend fun select(workspace: Workspace, meta: Meta): DataSet<T> =
workspace.data.filterIsInstance { name, _ ->
namePattern == null || name.matches(namePattern)
}
public inline fun <reified T: Any> TaskResultBuilder<*>.data(namePattern: Name? = null): DataSelector<T> = object : DataSelector<T> {
override suspend fun select(workspace: Workspace, meta: Meta): DataSet<T> = workspace.data.select(namePattern)
}
public suspend inline fun <reified T : Any> TaskResultBuilder<*>.fromTask(
task: Name,
taskMeta: Meta = Meta.EMPTY,
): DataSet<T> = workspace.produce(task, taskMeta).select()
): DataSet<T> = workspace.produce(task, taskMeta).filterIsInstance()

View File

@ -14,7 +14,7 @@ class DataPropagationTestPlugin : WorkspacePlugin() {
override val tag: PluginTag = Companion.tag
val allData by task<Int> {
val selectedData = workspace.data.select<Int>()
val selectedData = workspace.data.filterIsInstance<Int>()
val result: Data<Int> = selectedData.dataSequence().foldToData(0) { result, data ->
result + data.await()
}
@ -23,7 +23,7 @@ class DataPropagationTestPlugin : WorkspacePlugin() {
val singleData by task<Int> {
workspace.data.select<Int>()["myData[12]"]?.let {
workspace.data.filterIsInstance<Int>()["myData[12]"]?.let {
data("result", it)
}
}

View File

@ -20,8 +20,7 @@ import kotlin.test.assertEquals
class FileDataTest {
val dataNode = runBlocking {
DataTree<String> {
val dataNode = DataTree<String> {
node("dir") {
static("a", "Some string") {
"content" put "Some string"
@ -32,7 +31,7 @@ class FileDataTest {
"content" put "This is root meta node"
}
}
}
object StringIOFormat : IOFormat<String> {

View File

@ -117,16 +117,16 @@ class SimpleWorkspaceTest {
}
val averageByGroup by task<Int> {
val evenSum = workspace.data.filter { name, _ ->
val evenSum = workspace.data.filterIsInstance<Int> { name, _ ->
name.toString().toInt() % 2 == 0
}.select<Int>().foldToData(0) { l, r ->
}.foldToData(0) { l, r ->
l + r.await()
}
data("even", evenSum)
val oddSum = workspace.data.filter { name, _ ->
val oddSum = workspace.data.filterIsInstance<Int> { name, _ ->
name.toString().toInt() % 2 == 1
}.select<Int>().foldToData(0) { l, r ->
}.foldToData(0) { l, r ->
l + r.await()
}
data("odd", oddSum)
@ -143,7 +143,7 @@ class SimpleWorkspaceTest {
}
val customPipe by task<Int> {
workspace.data.select<Int>().forEach { data ->
workspace.data.filterIsInstance<Int>().forEach { data ->
val meta = data.meta.toMutableMeta().apply {
"newValue" put 22
}