Merge remote-tracking branch 'upstream/0.6' into distributed

This commit is contained in:
Andrey Stoyan 2022-06-01 09:38:56 +03:00
commit 99efe3a456
68 changed files with 1681 additions and 1241 deletions

View File

@ -4,15 +4,29 @@
### Added
- Add `specOrNull` delegate to meta and Scheme
- Suspended read methods to the `Binary`
- Synchronously accessed `meta` to all `DataSet`s
- More fine-grained types in Action builders.
### Changed
- `Factory` is now `fun interface` and uses `build` instead of `invoke`. `invoke moved to an extension.
- KTor 2.0
- DataTree `items` call is blocking.
- DataSet `getData` is no longer suspended and renamed to `get`
- DataSet operates with sequences of data instead of flows
- PartialEnvelope uses `Int` instead `UInt`.
- `ActiveDataSet` renamed to `DataSource`
- `selectOne`->`getByType`
- Data traversal in `DataSet` is done via iterator
- Remove all unnecessary properties for `IOFormat`
- Separate interfaces for `IOReader` and `IOWriter`
### Deprecated
### Removed
### Fixed
- Meta file name in readMeta from directory
- Tagless and FrontMatter envelope partial readers fix.
### Security

View File

@ -1,17 +1,22 @@
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
plugins {
id("ru.mipt.npm.gradle.project")
}
allprojects {
group = "space.kscience"
version = "0.5.3-dev-4"
repositories{
mavenCentral()
}
version = "0.6.0-dev-9"
}
subprojects {
apply(plugin = "maven-publish")
tasks.withType<KotlinCompile>{
kotlinOptions{
freeCompilerArgs = freeCompilerArgs + "-Xcontext-receivers"
}
}
}
readme {

View File

@ -75,10 +75,8 @@ public inline fun <reified T : Any> Provider.provide(path: String, targetOverrid
/**
* Typed top level content
*/
public fun <T : Any> Provider.top(target: String, type: KClass<out T>): Map<Name, T> {
return content(target).mapValues {
type.safeCast(it.value) ?: error("The type of element $it is ${it::class} but $type is expected")
}
public fun <T : Any> Provider.top(target: String, type: KClass<out T>): Map<Name, T> = content(target).mapValues {
type.safeCast(it.value) ?: error("The type of element ${it.value} is ${it.value::class} but $type is expected")
}
/**

View File

@ -0,0 +1,65 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.launch
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFInternal
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.startsWith
import kotlin.reflect.KType
/**
* Remove all values with keys starting with [name]
*/
internal fun MutableMap<Name, *>.removeWhatStartsWith(name: Name) {
val toRemove = keys.filter { it.startsWith(name) }
toRemove.forEach(::remove)
}
/**
* An action that caches results on-demand and recalculates them on source push
*/
public abstract class AbstractAction<in T : Any, R : Any>(
public val outputType: KType,
) : Action<T, R> {
/**
* Generate initial content of the output
*/
protected abstract fun DataSetBuilder<R>.generate(
data: DataSet<T>,
meta: Meta,
)
/**
* Update part of the data set when given [updateKey] is triggered by the source
*/
protected open fun DataSourceBuilder<R>.update(
dataSet: DataSet<T>,
meta: Meta,
updateKey: Name,
) {
// By default, recalculate the whole dataset
generate(dataSet, meta)
}
@OptIn(DFInternal::class)
override fun execute(
dataSet: DataSet<T>,
meta: Meta,
): DataSet<R> = if (dataSet is DataSource) {
DataSource(outputType, dataSet){
generate(dataSet, meta)
launch {
dataSet.updates.collect { name ->
update(dataSet, meta, name)
}
}
}
} else {
DataTree<R>(outputType) {
generate(dataSet, meta)
}
}
}

View File

@ -1,6 +1,5 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.CoroutineScope
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
@ -9,13 +8,12 @@ import space.kscience.dataforge.misc.DFExperimental
* A simple data transformation on a data node. Actions should avoid doing actual dependency evaluation in [execute].
*/
public interface Action<in T : Any, out R : Any> {
/**
* Transform the data in the node, producing a new node. By default it is assumed that all calculations are lazy
* Transform the data in the node, producing a new node. By default, it is assumed that all calculations are lazy
* so not actual computation is started at this moment.
*
* [scope] context used to compute the initial result, also it is used for updates propagation
*/
public suspend fun execute(dataSet: DataSet<T>, meta: Meta = Meta.EMPTY, scope: CoroutineScope? = null): DataSet<R>
public fun execute(dataSet: DataSet<T>, meta: Meta = Meta.EMPTY): DataSet<R>
public companion object
}
@ -26,16 +24,17 @@ public interface Action<in T : Any, out R : Any> {
public infix fun <T : Any, I : Any, R : Any> Action<T, I>.then(action: Action<I, R>): Action<T, R> {
// TODO introduce composite action and add optimize by adding action to the list
return object : Action<T, R> {
override suspend fun execute(dataSet: DataSet<T>, meta: Meta, scope: CoroutineScope?): DataSet<R> {
return action.execute(this@then.execute(dataSet, meta, scope), meta, scope)
}
override fun execute(
dataSet: DataSet<T>,
meta: Meta,
): DataSet<R> = action.execute(this@then.execute(dataSet, meta), meta)
}
}
@DFExperimental
public suspend fun <T : Any, R : Any> DataSet<T>.transformWith(
action: Action<T, R>,
public operator fun <T : Any, R : Any> Action<T, R>.invoke(
dataSet: DataSet<T>,
meta: Meta = Meta.EMPTY,
scope: CoroutineScope? = null,
): DataSet<R> = action.execute(this, meta, scope)
): DataSet<R> = execute(dataSet, meta)

View File

@ -1,9 +1,5 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.collect
import kotlinx.coroutines.flow.map
import kotlinx.coroutines.launch
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
@ -29,66 +25,71 @@ public data class ActionEnv(
* Action environment
*/
@DFBuilder
public class MapActionBuilder<T, R>(public var name: Name, public var meta: MutableMeta, public val actionMeta: Meta) {
public class MapActionBuilder<T, R>(
public var name: Name,
public var meta: MutableMeta,
public val actionMeta: Meta,
@PublishedApi internal var outputType: KType,
) {
public lateinit var result: suspend ActionEnv.(T) -> R
/**
* Set unsafe [outputType] for the resulting data. Be sure that it is correct.
*/
public fun <R1 : R> result(outputType: KType, f: suspend ActionEnv.(T) -> R1) {
this.outputType = outputType
result = f;
}
/**
* Calculate the result of goal
*/
public fun result(f: suspend ActionEnv.(T) -> R) {
public inline fun <reified R1 : R> result(noinline f: suspend ActionEnv.(T) -> R1) {
outputType = typeOf<R1>()
result = f;
}
}
@PublishedApi
internal class MapAction<in T : Any, out R : Any>(
private val outputType: KType,
internal class MapAction<in T : Any, R : Any>(
outputType: KType,
private val block: MapActionBuilder<T, R>.() -> Unit,
) : Action<T, R> {
) : AbstractAction<T, R>(outputType) {
override suspend fun execute(
dataSet: DataSet<T>,
meta: Meta,
scope: CoroutineScope?,
): DataSet<R> {
suspend fun mapOne(data: NamedData<T>): NamedData<R> {
// Creating a new environment for action using **old** name, old meta and task meta
val env = ActionEnv(data.name, data.meta, meta)
private fun DataSetBuilder<R>.mapOne(name: Name, data: Data<T>, meta: Meta) {
// Creating a new environment for action using **old** name, old meta and task meta
val env = ActionEnv(name, data.meta, meta)
//applying transformation from builder
val builder = MapActionBuilder<T, R>(
data.name,
data.meta.toMutableMeta(), // using data meta
meta
).apply(block)
//applying transformation from builder
val builder = MapActionBuilder<T, R>(
name,
data.meta.toMutableMeta(), // using data meta
meta,
outputType
).apply(block)
//getting new name
val newName = builder.name
//getting new name
val newName = builder.name
//getting new meta
val newMeta = builder.meta.seal()
//getting new meta
val newMeta = builder.meta.seal()
@OptIn(DFInternal::class)
val newData = Data(outputType, newMeta, dependencies = listOf(data)) {
builder.result(env, data.await())
}
//setting the data node
return newData.named(newName)
@OptIn(DFInternal::class)
val newData = Data(builder.outputType, newMeta, dependencies = listOf(data)) {
builder.result(env, data.await())
}
//setting the data node
data(newName, newData)
}
val flow = dataSet.flowData().map(::mapOne)
override fun DataSetBuilder<R>.generate(data: DataSet<T>, meta: Meta) {
data.forEach { mapOne(it.name, it.data, meta) }
}
return ActiveDataTree(outputType) {
populate(flow)
scope?.launch {
dataSet.updates.collect { name ->
//clear old nodes
remove(name)
//collect new items
populate(dataSet.flowChildren(name).map(::mapOne))
}
}
}
override fun DataSourceBuilder<R>.update(dataSet: DataSet<T>, meta: Meta, updateKey: Name) {
remove(updateKey)
dataSet[updateKey]?.let { mapOne(updateKey, it, meta) }
}
}

View File

@ -1,9 +1,5 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.flow.fold
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
@ -15,13 +11,23 @@ import kotlin.reflect.KType
import kotlin.reflect.typeOf
public class JoinGroup<T : Any, R : Any>(public var name: String, internal val set: DataSet<T>) {
public class JoinGroup<T : Any, R : Any>(
public var name: String,
internal val set: DataSet<T>,
@PublishedApi internal var outputType: KType,
) {
public var meta: MutableMeta = MutableMeta()
public lateinit var result: suspend ActionEnv.(Map<Name, T>) -> R
public lateinit var result: suspend ActionEnv.(Map<Name, ValueWithMeta<T>>) -> R
public fun result(f: suspend ActionEnv.(Map<Name, T>) -> R) {
internal fun <R1 : R> result(outputType: KType, f: suspend ActionEnv.(Map<Name, ValueWithMeta<T>>) -> R1) {
this.outputType = outputType
this.result = f;
}
public inline fun <reified R1 : R> result(noinline f: suspend ActionEnv.(Map<Name, ValueWithMeta<T>>) -> R1) {
outputType = typeOf<R1>()
this.result = f;
}
@ -29,31 +35,30 @@ public class JoinGroup<T : Any, R : Any>(public var name: String, internal val s
@DFBuilder
public class ReduceGroupBuilder<T : Any, R : Any>(
private val inputType: KType,
private val scope: CoroutineScope,
public val actionMeta: Meta,
private val outputType: KType,
) {
private val groupRules: MutableList<suspend (DataSet<T>) -> List<JoinGroup<T, R>>> = ArrayList();
private val groupRules: MutableList<(DataSet<T>) -> List<JoinGroup<T, R>>> = ArrayList();
/**
* introduce grouping by meta value
*/
public fun byValue(tag: String, defaultTag: String = "@default", action: JoinGroup<T, R>.() -> Unit) {
groupRules += { node ->
GroupRule.byMetaValue(scope, tag, defaultTag).gather(node).map {
JoinGroup<T, R>(it.key, it.value).apply(action)
GroupRule.byMetaValue(tag, defaultTag).gather(node).map {
JoinGroup<T, R>(it.key, it.value, outputType).apply(action)
}
}
}
public fun group(
groupName: String,
filter: suspend (Name, Data<T>) -> Boolean,
predicate: (Name, Meta) -> Boolean,
action: JoinGroup<T, R>.() -> Unit,
) {
groupRules += { source ->
listOf(
JoinGroup<T, R>(groupName, source.filter(filter)).apply(action)
JoinGroup<T, R>(groupName, source.filter(predicate), outputType).apply(action)
)
}
}
@ -61,30 +66,27 @@ public class ReduceGroupBuilder<T : Any, R : Any>(
/**
* Apply transformation to the whole node
*/
public fun result(resultName: String, f: suspend ActionEnv.(Map<Name, T>) -> R) {
public fun result(resultName: String, f: suspend ActionEnv.(Map<Name, ValueWithMeta<T>>) -> R) {
groupRules += { node ->
listOf(JoinGroup<T, R>(resultName, node).apply { result(f) })
listOf(JoinGroup<T, R>(resultName, node, outputType).apply { result(outputType, f) })
}
}
internal suspend fun buildGroups(input: DataSet<T>): List<JoinGroup<T, R>> {
return groupRules.flatMap { it.invoke(input) }
}
internal fun buildGroups(input: DataSet<T>): List<JoinGroup<T, R>> =
groupRules.flatMap { it.invoke(input) }
}
@PublishedApi
internal class ReduceAction<T : Any, R : Any>(
private val inputType: KType,
outputType: KType,
private val action: ReduceGroupBuilder<T, R>.() -> Unit,
) : CachingAction<T, R>(outputType) {
//TODO optimize reduction. Currently the whole action recalculates on push
) : AbstractAction<T, R>(outputType) {
//TODO optimize reduction. Currently, the whole action recalculates on push
override fun CoroutineScope.transform(set: DataSet<T>, meta: Meta, key: Name): Flow<NamedData<R>> = flow {
ReduceGroupBuilder<T, R>(inputType, this@transform, meta).apply(action).buildGroups(set).forEach { group ->
val dataFlow: Map<Name, Data<T>> = group.set.flowData().fold(HashMap()) { acc, value ->
override fun DataSetBuilder<R>.generate(data: DataSet<T>, meta: Meta) {
ReduceGroupBuilder<T, R>(meta, outputType).apply(action).buildGroups(data).forEach { group ->
val dataFlow: Map<Name, Data<T>> = group.set.asSequence().fold(HashMap()) { acc, value ->
acc.apply {
acc[value.name] = value.data
}
@ -96,11 +98,11 @@ internal class ReduceAction<T : Any, R : Any>(
val env = ActionEnv(Name.parse(groupName), groupMeta, meta)
@OptIn(DFInternal::class) val res: Data<R> = dataFlow.reduceToData(
outputType,
group.outputType,
meta = groupMeta
) { group.result.invoke(env, it) }
emit(res.named(env.name))
data(env.name, res)
}
}
}
@ -112,4 +114,4 @@ internal class ReduceAction<T : Any, R : Any>(
@Suppress("FunctionName")
public inline fun <reified T : Any, reified R : Any> Action.Companion.reduce(
noinline builder: ReduceGroupBuilder<T, R>.() -> Unit,
): Action<T, R> = ReduceAction(typeOf<T>(), typeOf<R>(), builder)
): Action<T, R> = ReduceAction(typeOf<R>(), builder)

View File

@ -1,16 +1,11 @@
package space.kscience.dataforge.actions
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.FlowPreview
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.launch
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Laminate
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
import space.kscience.dataforge.meta.toMutableMeta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.misc.DFInternal
import space.kscience.dataforge.names.Name
import kotlin.collections.set
import kotlin.reflect.KType
@ -19,10 +14,15 @@ import kotlin.reflect.typeOf
public class SplitBuilder<T : Any, R : Any>(public val name: Name, public val meta: Meta) {
public class FragmentRule<T : Any, R : Any>(public val name: Name, public var meta: MutableMeta) {
public class FragmentRule<T : Any, R : Any>(
public val name: Name,
public var meta: MutableMeta,
@PublishedApi internal var outputType: KType,
) {
public lateinit var result: suspend (T) -> R
public fun result(f: suspend (T) -> R) {
public inline fun <reified R1 : R> result(noinline f: suspend (T) -> R1) {
this.outputType = typeOf<R1>()
result = f;
}
}
@ -44,45 +44,42 @@ public class SplitBuilder<T : Any, R : Any>(public val name: Name, public val me
*/
@PublishedApi
internal class SplitAction<T : Any, R : Any>(
private val outputType: KType,
outputType: KType,
private val action: SplitBuilder<T, R>.() -> Unit,
) : Action<T, R> {
) : AbstractAction<T, R>(outputType) {
@OptIn(FlowPreview::class)
override suspend fun execute(
dataSet: DataSet<T>,
meta: Meta,
scope: CoroutineScope?,
): DataSet<R> {
private fun DataSetBuilder<R>.splitOne(name: Name, data: Data<T>, meta: Meta) {
val laminate = Laminate(data.meta, meta)
suspend fun splitOne(data: NamedData<T>): Flow<NamedData<R>> {
val laminate = Laminate(data.meta, meta)
val split = SplitBuilder<T, R>(data.name, data.meta).apply(action)
val split = SplitBuilder<T, R>(name, data.meta).apply(action)
// apply individual fragment rules to result
return split.fragments.entries.asFlow().map { (fragmentName, rule) ->
val env = SplitBuilder.FragmentRule<T, R>(fragmentName, laminate.toMutableMeta()).apply(rule)
//data.map<R>(outputType, meta = env.meta) { env.result(it) }.named(fragmentName)
@OptIn(DFInternal::class) Data(outputType, meta = env.meta, dependencies = listOf(data)) {
// apply individual fragment rules to result
split.fragments.forEach { (fragmentName, rule) ->
val env = SplitBuilder.FragmentRule<T, R>(
fragmentName,
laminate.toMutableMeta(),
outputType
).apply(rule)
//data.map<R>(outputType, meta = env.meta) { env.result(it) }.named(fragmentName)
data(
fragmentName,
@Suppress("OPT_IN_USAGE") Data(outputType, meta = env.meta, dependencies = listOf(data)) {
env.result(data.await())
}.named(fragmentName)
}
}
return ActiveDataTree<R>(outputType) {
populate(dataSet.flowData().flatMapConcat(transform = ::splitOne))
scope?.launch {
dataSet.updates.collect { name ->
//clear old nodes
remove(name)
//collect new items
populate(dataSet.flowChildren(name).flatMapConcat(transform = ::splitOne))
}
}
)
}
}
override fun DataSetBuilder<R>.generate(data: DataSet<T>, meta: Meta) {
data.forEach { splitOne(it.name, it.data, meta) }
}
override fun DataSourceBuilder<R>.update(dataSet: DataSet<T>, meta: Meta, updateKey: Name) {
remove(updateKey)
dataSet[updateKey]?.let { splitOne(updateKey, it, meta) }
}
}
/**

View File

@ -1,118 +0,0 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Job
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.launch
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import space.kscience.dataforge.meta.*
import space.kscience.dataforge.names.*
import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* A mutable [DataTree.Companion.active]. It
*/
public class ActiveDataTree<T : Any>(
override val dataType: KType,
) : DataTree<T>, DataSetBuilder<T>, ActiveDataSet<T> {
private val mutex = Mutex()
private val treeItems = HashMap<NameToken, DataTreeItem<T>>()
override suspend fun items(): Map<NameToken, DataTreeItem<T>> = mutex.withLock {
treeItems.filter { !it.key.body.startsWith("@") }
}
private val _updates = MutableSharedFlow<Name>()
override val updates: Flow<Name>
get() = _updates
private suspend fun remove(token: NameToken) {
mutex.withLock {
if (treeItems.remove(token) != null) {
_updates.emit(token.asName())
}
}
}
override suspend fun remove(name: Name) {
if (name.isEmpty()) error("Can't remove the root node")
(getItem(name.cutLast()).tree as? ActiveDataTree)?.remove(name.lastOrNull()!!)
}
private suspend fun set(token: NameToken, data: Data<T>) {
mutex.withLock {
treeItems[token] = DataTreeItem.Leaf(data)
}
}
private suspend fun getOrCreateNode(token: NameToken): ActiveDataTree<T> =
(treeItems[token] as? DataTreeItem.Node<T>)?.tree as? ActiveDataTree<T>
?: ActiveDataTree<T>(dataType).also {
mutex.withLock {
treeItems[token] = DataTreeItem.Node(it)
}
}
private suspend fun getOrCreateNode(name: Name): ActiveDataTree<T> {
return when (name.length) {
0 -> this
1 -> getOrCreateNode(name.firstOrNull()!!)
else -> getOrCreateNode(name.firstOrNull()!!).getOrCreateNode(name.cutFirst())
}
}
override suspend fun emit(name: Name, data: Data<T>?) {
if (data == null) {
remove(name)
} else {
when (name.length) {
0 -> error("Can't add data with empty name")
1 -> set(name.firstOrNull()!!, data)
2 -> getOrCreateNode(name.cutLast()).set(name.lastOrNull()!!, data)
}
}
_updates.emit(name)
}
/**
* Copy given data set and mirror its changes to this [ActiveDataTree] in [this@setAndObserve]. Returns an update [Job]
*/
public fun CoroutineScope.setAndObserve(name: Name, dataSet: DataSet<T>): Job = launch {
emit(name, dataSet)
dataSet.updates.collect { nameInBranch ->
emit(name + nameInBranch, dataSet.getData(nameInBranch))
}
}
}
/**
* Create a dynamic tree. Initial data is placed synchronously. Updates are propagated via [updatesScope]
*/
@Suppress("FunctionName")
public suspend fun <T : Any> ActiveDataTree(
type: KType,
block: suspend ActiveDataTree<T>.() -> Unit,
): ActiveDataTree<T> {
val tree = ActiveDataTree<T>(type)
tree.block()
return tree
}
@Suppress("FunctionName")
public suspend inline fun <reified T : Any> ActiveDataTree(
crossinline block: suspend ActiveDataTree<T>.() -> Unit,
): ActiveDataTree<T> = ActiveDataTree<T>(typeOf<T>()).apply { block() }
public suspend inline fun <reified T : Any> ActiveDataTree<T>.emit(
name: Name,
noinline block: suspend ActiveDataTree<T>.() -> Unit,
): Unit = emit(name, ActiveDataTree(typeOf<T>(), block))
public suspend inline fun <reified T : Any> ActiveDataTree<T>.emit(
name: String,
noinline block: suspend ActiveDataTree<T>.() -> Unit,
): Unit = emit(Name.parse(name), ActiveDataTree(typeOf<T>(), block))

View File

@ -1,52 +0,0 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.collect
import space.kscience.dataforge.actions.Action
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.startsWith
import kotlin.reflect.KType
/**
* Remove all values with keys starting with [name]
*/
internal fun MutableMap<Name, *>.removeWhatStartsWith(name: Name) {
val toRemove = keys.filter { it.startsWith(name) }
toRemove.forEach(::remove)
}
/**
* An action that caches results on-demand and recalculates them on source push
*/
public abstract class CachingAction<in T : Any, out R : Any>(
public val outputType: KType,
) : Action<T, R> {
protected abstract fun CoroutineScope.transform(
set: DataSet<T>,
meta: Meta,
key: Name = Name.EMPTY,
): Flow<NamedData<R>>
override suspend fun execute(
dataSet: DataSet<T>,
meta: Meta,
scope: CoroutineScope?,
): DataSet<R> = ActiveDataTree<R>(outputType) {
coroutineScope {
populate(transform(dataSet, meta))
}
scope?.let {
dataSet.updates.collect {
//clear old nodes
remove(it)
//collect new items
populate(scope.transform(dataSet, meta, it))
//FIXME if the target is data, updates are fired twice
}
}
}
}

View File

@ -15,7 +15,7 @@ import kotlin.reflect.typeOf
* A data element characterized by its meta
*/
@Type(Data.TYPE)
public interface Data<out T : Any> : Goal<T>, MetaRepr {
public interface Data<out T> : Goal<T>, MetaRepr {
/**
* Type marker for the data. The type is known before the calculation takes place so it could be checked.
*/
@ -83,6 +83,10 @@ public class StaticData<T : Any>(
override val meta: Meta = Meta.EMPTY,
) : Data<T>, StaticGoal<T>(value)
@Suppress("FunctionName")
public inline fun <reified T : Any> Data(value: T, meta: Meta = Meta.EMPTY): StaticData<T> =
StaticData(typeOf<T>(), value, meta)
@Suppress("FunctionName")
@DFInternal
public fun <T : Any> Data(

View File

@ -1,11 +1,16 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.*
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.emptyFlow
import kotlinx.coroutines.flow.mapNotNull
import space.kscience.dataforge.data.Data.Companion.TYPE_OF_NOTHING
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.set
import space.kscience.dataforge.names.*
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.asName
import space.kscience.dataforge.names.endsWith
import space.kscience.dataforge.names.parseAsName
import kotlin.reflect.KType
public interface DataSet<out T : Any> {
@ -16,21 +21,19 @@ public interface DataSet<out T : Any> {
public val dataType: KType
/**
* Traverse this provider or its child. The order is not guaranteed.
* Meta-data associated with this node. If no meta is provided, returns [Meta.EMPTY].
*/
public fun flowData(): Flow<NamedData<T>>
public val meta: Meta
/**
* Traverse this [DataSet] returning named data instances. The order is not guaranteed.
*/
public operator fun iterator(): Iterator<NamedData<T>>
/**
* Get data with given name.
*/
public suspend fun getData(name: Name): Data<T>?
/**
* Get a snapshot of names of top level children of given node. Empty if node does not exist or is a leaf.
*/
public suspend fun listTop(prefix: Name = Name.EMPTY): List<Name> =
flowData().map { it.name }.filter { it.startsWith(prefix) && (it.length == prefix.length + 1) }.toList()
// By default, traverses the whole tree. Could be optimized in descendants
public operator fun get(name: Name): Data<T>?
public companion object {
public val META_KEY: Name = "@meta".asName()
@ -40,17 +43,30 @@ public interface DataSet<out T : Any> {
*/
public val EMPTY: DataSet<Nothing> = object : DataSet<Nothing> {
override val dataType: KType = TYPE_OF_NOTHING
override val meta: Meta get() = Meta.EMPTY
//private val nothing: Nothing get() = error("this is nothing")
override fun iterator(): Iterator<NamedData<Nothing>> = emptySequence<NamedData<Nothing>>().iterator()
override fun flowData(): Flow<NamedData<Nothing>> = emptyFlow()
override suspend fun getData(name: Name): Data<Nothing>? = null
override fun get(name: Name): Data<Nothing>? = null
}
}
}
public interface ActiveDataSet<T : Any> : DataSet<T> {
public fun <T : Any> DataSet<T>.asSequence(): Sequence<NamedData<T>> = object : Sequence<NamedData<T>> {
override fun iterator(): Iterator<NamedData<T>> = this@asSequence.iterator()
}
public fun <T : Any> DataSet<T>.asIterable(): Iterable<NamedData<T>> = object : Iterable<NamedData<T>> {
override fun iterator(): Iterator<NamedData<T>> = this@asIterable.iterator()
}
public operator fun <T : Any> DataSet<T>.get(name: String): Data<T>? = get(name.parseAsName())
/**
* A [DataSet] with propagated updates.
*/
public interface DataSource<out T : Any> : DataSet<T>, CoroutineScope {
/**
* A flow of updated item names. Updates are propagated in a form of [Flow] of names of updated nodes.
* Those can include new data items and replacement of existing ones. The replaced items could update existing data content
@ -58,30 +74,38 @@ public interface ActiveDataSet<T : Any> : DataSet<T> {
*
*/
public val updates: Flow<Name>
/**
* Stop generating updates from this [DataSource]
*/
public fun close() {
coroutineContext[Job]?.cancel()
}
}
public val <T : Any> DataSet<T>.updates: Flow<Name> get() = if (this is ActiveDataSet) updates else emptyFlow()
/**
* Flow all data nodes with names starting with [branchName]
*/
public fun <T : Any> DataSet<T>.flowChildren(branchName: Name): Flow<NamedData<T>> = this@flowChildren.flowData().filter {
it.name.startsWith(branchName)
}
public val <T : Any> DataSet<T>.updates: Flow<Name> get() = if (this is DataSource) updates else emptyFlow()
//
///**
// * Flow all data nodes with names starting with [branchName]
// */
//public fun <T : Any> DataSet<T>.children(branchName: Name): Sequence<NamedData<T>> =
// this@children.asSequence().filter {
// it.name.startsWith(branchName)
// }
/**
* Start computation for all goals in data node and return a job for the whole node
*/
public fun <T : Any> DataSet<T>.startAll(coroutineScope: CoroutineScope): Job = coroutineScope.launch {
flowData().map {
asIterable().map {
it.launch(this@launch)
}.toList().joinAll()
}.joinAll()
}
public suspend fun <T : Any> DataSet<T>.join(): Unit = coroutineScope { startAll(this).join() }
public suspend fun DataSet<*>.toMeta(): Meta = Meta {
flowData().collect {
public fun DataSet<*>.toMeta(): Meta = Meta {
forEach {
if (it.name.endsWith(DataSet.META_KEY)) {
set(it.name, it.meta)
} else {
@ -93,4 +117,4 @@ public suspend fun DataSet<*>.toMeta(): Meta = Meta {
}
}
public val <T : Any> DataSet<T>.updatesWithData: Flow<NamedData<T>> get() = updates.mapNotNull { getData(it)?.named(it) }
public val <T : Any> DataSet<T>.updatesWithData: Flow<NamedData<T>> get() = updates.mapNotNull { get(it)?.named(it) }

View File

@ -1,12 +1,10 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.collect
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.isEmpty
import space.kscience.dataforge.names.plus
import kotlin.reflect.KType
@ -16,137 +14,152 @@ public interface DataSetBuilder<in T : Any> {
/**
* Remove all data items starting with [name]
*/
public suspend fun remove(name: Name)
public fun remove(name: Name)
public suspend fun emit(name: Name, data: Data<T>?)
public fun data(name: Name, data: Data<T>?)
/**
* Set a current state of given [dataSet] into a branch [name]. Does not propagate updates
*/
public suspend fun emit(name: Name, dataSet: DataSet<T>) {
public fun node(name: Name, dataSet: DataSet<T>) {
//remove previous items
if (name != Name.EMPTY) {
remove(name)
}
//Set new items
dataSet.flowData().collect {
emit(name + it.name, it.data)
dataSet.forEach {
data(name + it.name, it.data)
}
}
/**
* Append data to node
* Set meta for the given node
*/
public suspend infix fun String.put(data: Data<T>): Unit = emit(Name.parse(this), data)
public fun meta(name: Name, meta: Meta)
/**
* Append node
*/
public suspend infix fun String.put(dataSet: DataSet<T>): Unit = emit(Name.parse(this), dataSet)
/**
* Build and append node
*/
public suspend infix fun String.put(block: suspend DataSetBuilder<T>.() -> Unit): Unit = emit(Name.parse(this), block)
}
private class SubSetBuilder<in T : Any>(
/**
* Define meta in this [DataSet]
*/
public fun <T : Any> DataSetBuilder<T>.meta(value: Meta): Unit = meta(Name.EMPTY, value)
/**
* Define meta in this [DataSet]
*/
public fun <T : Any> DataSetBuilder<T>.meta(mutableMeta: MutableMeta.() -> Unit): Unit = meta(Meta(mutableMeta))
@PublishedApi
internal class SubSetBuilder<in T : Any>(
private val parent: DataSetBuilder<T>,
private val branch: Name,
) : DataSetBuilder<T> {
override val dataType: KType get() = parent.dataType
override suspend fun remove(name: Name) {
override fun remove(name: Name) {
parent.remove(branch + name)
}
override suspend fun emit(name: Name, data: Data<T>?) {
parent.emit(branch + name, data)
override fun data(name: Name, data: Data<T>?) {
parent.data(branch + name, data)
}
override suspend fun emit(name: Name, dataSet: DataSet<T>) {
parent.emit(branch + name, dataSet)
override fun node(name: Name, dataSet: DataSet<T>) {
parent.node(branch + name, dataSet)
}
override fun meta(name: Name, meta: Meta) {
parent.meta(branch + name, meta)
}
}
public suspend fun <T : Any> DataSetBuilder<T>.emit(name: Name, block: suspend DataSetBuilder<T>.() -> Unit) {
SubSetBuilder(this, name).apply { block() }
public inline fun <T : Any> DataSetBuilder<T>.node(
name: Name,
crossinline block: DataSetBuilder<T>.() -> Unit,
) {
if (name.isEmpty()) block() else SubSetBuilder(this, name).block()
}
public suspend fun <T : Any> DataSetBuilder<T>.emit(name: String, data: Data<T>) {
emit(Name.parse(name), data)
public fun <T : Any> DataSetBuilder<T>.data(name: String, value: Data<T>) {
data(Name.parse(name), value)
}
public suspend fun <T : Any> DataSetBuilder<T>.emit(name: String, set: DataSet<T>) {
this.emit(Name.parse(name), set)
public fun <T : Any> DataSetBuilder<T>.node(name: String, set: DataSet<T>) {
node(Name.parse(name), set)
}
public suspend fun <T : Any> DataSetBuilder<T>.emit(name: String, block: suspend DataSetBuilder<T>.() -> Unit): Unit =
this@emit.emit(Name.parse(name), block)
public inline fun <T : Any> DataSetBuilder<T>.node(
name: String,
crossinline block: DataSetBuilder<T>.() -> Unit,
): Unit = node(Name.parse(name), block)
public suspend fun <T : Any> DataSetBuilder<T>.emit(data: NamedData<T>) {
emit(data.name, data.data)
public fun <T : Any> DataSetBuilder<T>.set(value: NamedData<T>) {
data(value.name, value.data)
}
/**
* Produce lazy [Data] and emit it into the [DataSetBuilder]
*/
public suspend inline fun <reified T : Any> DataSetBuilder<T>.produce(
public inline fun <reified T : Any> DataSetBuilder<T>.produce(
name: String,
meta: Meta = Meta.EMPTY,
noinline producer: suspend () -> T,
) {
val data = Data(meta, block = producer)
emit(name, data)
data(name, data)
}
public suspend inline fun <reified T : Any> DataSetBuilder<T>.produce(
public inline fun <reified T : Any> DataSetBuilder<T>.produce(
name: Name,
meta: Meta = Meta.EMPTY,
noinline producer: suspend () -> T,
) {
val data = Data(meta, block = producer)
emit(name, data)
data(name, data)
}
/**
* Emit a static data with the fixed value
*/
public suspend inline fun <reified T : Any> DataSetBuilder<T>.static(
public inline fun <reified T : Any> DataSetBuilder<T>.static(
name: String,
data: T,
meta: Meta = Meta.EMPTY
): Unit =
emit(name, Data.static(data, meta))
meta: Meta = Meta.EMPTY,
): Unit = data(name, Data.static(data, meta))
public suspend inline fun <reified T : Any> DataSetBuilder<T>.static(
public inline fun <reified T : Any> DataSetBuilder<T>.static(
name: Name,
data: T,
meta: Meta = Meta.EMPTY
): Unit =
emit(name, Data.static(data, meta))
meta: Meta = Meta.EMPTY,
): Unit = data(name, Data.static(data, meta))
public suspend inline fun <reified T : Any> DataSetBuilder<T>.static(
public inline fun <reified T : Any> DataSetBuilder<T>.static(
name: String,
data: T,
mutableMeta: MutableMeta.() -> Unit,
): Unit = emit(Name.parse(name), Data.static(data, Meta(mutableMeta)))
): Unit = data(Name.parse(name), Data.static(data, Meta(mutableMeta)))
/**
* Update data with given node data and meta with node meta.
*/
@DFExperimental
public suspend fun <T : Any> DataSetBuilder<T>.populate(tree: DataSet<T>): Unit = coroutineScope {
tree.flowData().collect {
public fun <T : Any> DataSetBuilder<T>.populateFrom(tree: DataSet<T>): Unit {
tree.forEach {
//TODO check if the place is occupied
emit(it.name, it.data)
data(it.name, it.data)
}
}
public suspend fun <T : Any> DataSetBuilder<T>.populate(flow: Flow<NamedData<T>>) {
flow.collect {
emit(it.name, it.data)
//public fun <T : Any> DataSetBuilder<T>.populateFrom(flow: Flow<NamedData<T>>) {
// flow.collect {
// data(it.name, it.data)
// }
//}
public fun <T : Any> DataSetBuilder<T>.populateFrom(sequence: Sequence<NamedData<T>>) {
sequence.forEach {
data(it.name, it.data)
}
}

View File

@ -1,9 +1,6 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.emitAll
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.flow.map
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.Type
import space.kscience.dataforge.names.*
import kotlin.collections.component1
@ -11,8 +8,16 @@ import kotlin.collections.component2
import kotlin.reflect.KType
public sealed class DataTreeItem<out T : Any> {
public class Node<out T : Any>(public val tree: DataTree<T>) : DataTreeItem<T>()
public class Leaf<out T : Any>(public val data: Data<T>) : DataTreeItem<T>()
public abstract val meta: Meta
public class Node<out T : Any>(public val tree: DataTree<T>) : DataTreeItem<T>() {
override val meta: Meta get() = tree.meta
}
public class Leaf<out T : Any>(public val data: Data<T>) : DataTreeItem<T>() {
override val meta: Meta get() = data.meta
}
}
public val <T : Any> DataTreeItem<T>.type: KType
@ -28,59 +33,64 @@ public val <T : Any> DataTreeItem<T>.type: KType
public interface DataTree<out T : Any> : DataSet<T> {
/**
* Children items of this [DataTree] provided asynchronously
* Top-level children items of this [DataTree]
*/
public suspend fun items(): Map<NameToken, DataTreeItem<T>>
public val items: Map<NameToken, DataTreeItem<T>>
override fun flowData(): Flow<NamedData<T>> = flow {
items().forEach { (token, childItem: DataTreeItem<T>) ->
if(!token.body.startsWith("@")) {
override val meta: Meta get() = items[META_ITEM_NAME_TOKEN]?.meta ?: Meta.EMPTY
override fun iterator(): Iterator<NamedData<T>> = iterator {
items.forEach { (token, childItem: DataTreeItem<T>) ->
if (!token.body.startsWith("@")) {
when (childItem) {
is DataTreeItem.Leaf -> emit(childItem.data.named(token.asName()))
is DataTreeItem.Node -> emitAll(childItem.tree.flowData().map { it.named(token + it.name) })
is DataTreeItem.Leaf -> yield(childItem.data.named(token.asName()))
is DataTreeItem.Node -> yieldAll(childItem.tree.asSequence().map { it.named(token + it.name) })
}
}
}
}
override suspend fun listTop(prefix: Name): List<Name> =
getItem(prefix).tree?.items()?.keys?.map { prefix + it } ?: emptyList()
override suspend fun getData(name: Name): Data<T>? = when (name.length) {
override fun get(name: Name): Data<T>? = when (name.length) {
0 -> null
1 -> items()[name.firstOrNull()!!].data
else -> items()[name.firstOrNull()!!].tree?.getData(name.cutFirst())
1 -> items[name.firstOrNull()!!].data
else -> items[name.firstOrNull()!!].tree?.get(name.cutFirst())
}
public companion object {
public const val TYPE: String = "dataTree"
/**
* A name token used to designate tree node meta
*/
public val META_ITEM_NAME_TOKEN: NameToken = NameToken("@meta")
}
}
public suspend fun <T: Any> DataSet<T>.getData(name: String): Data<T>? = getData(Name.parse(name))
public fun <T : Any> DataTree<T>.listChildren(prefix: Name): List<Name> =
getItem(prefix).tree?.items?.keys?.map { prefix + it } ?: emptyList()
/**
* Get a [DataTreeItem] with given [name] or null if the item does not exist
*/
public tailrec suspend fun <T : Any> DataTree<T>.getItem(name: Name): DataTreeItem<T>? = when (name.length) {
public tailrec fun <T : Any> DataTree<T>.getItem(name: Name): DataTreeItem<T>? = when (name.length) {
0 -> DataTreeItem.Node(this)
1 -> items()[name.firstOrNull()]
else -> items()[name.firstOrNull()!!].tree?.getItem(name.cutFirst())
1 -> items[name.firstOrNull()]
else -> items[name.firstOrNull()!!].tree?.getItem(name.cutFirst())
}
public val <T : Any> DataTreeItem<T>?.tree: DataTree<T>? get() = (this as? DataTreeItem.Node<T>)?.tree
public val <T : Any> DataTreeItem<T>?.data: Data<T>? get() = (this as? DataTreeItem.Leaf<T>)?.data
/**
* Flow of all children including nodes
* A [Sequence] of all children including nodes
*/
public fun <T : Any> DataTree<T>.itemFlow(): Flow<Pair<Name, DataTreeItem<T>>> = flow {
items().forEach { (head, item) ->
emit(head.asName() to item)
public fun <T : Any> DataTree<T>.traverseItems(): Sequence<Pair<Name, DataTreeItem<T>>> = sequence {
items.forEach { (head, item) ->
yield(head.asName() to item)
if (item is DataTreeItem.Node) {
val subSequence = item.tree.itemFlow()
val subSequence = item.tree.traverseItems()
.map { (name, data) -> (head.asName() + name) to data }
emitAll(subSequence)
yieldAll(subSequence)
}
}
}
@ -92,5 +102,9 @@ public fun <T : Any> DataTree<T>.itemFlow(): Flow<Pair<Name, DataTreeItem<T>>> =
public fun <T : Any> DataTree<T>.branch(branchName: Name): DataTree<T> = object : DataTree<T> {
override val dataType: KType get() = this@branch.dataType
override suspend fun items(): Map<NameToken, DataTreeItem<T>> = getItem(branchName).tree?.items() ?: emptyMap()
override val meta: Meta
get() = getItem(branchName)?.meta ?: Meta.EMPTY
override val items: Map<NameToken, DataTreeItem<T>>
get() = getItem(branchName).tree?.items ?: emptyMap()
}

View File

@ -0,0 +1,132 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Job
import kotlinx.coroutines.flow.MutableSharedFlow
import kotlinx.coroutines.launch
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFInternal
import space.kscience.dataforge.names.*
import kotlin.collections.set
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.coroutineContext
import kotlin.jvm.Synchronized
import kotlin.reflect.KType
import kotlin.reflect.typeOf
public interface DataSourceBuilder<T : Any> : DataSetBuilder<T>, DataSource<T> {
override val updates: MutableSharedFlow<Name>
}
/**
* A mutable [DataTree] that propagates updates
*/
@PublishedApi
internal class DataTreeBuilder<T : Any>(
override val dataType: KType,
coroutineContext: CoroutineContext,
) : DataTree<T>, DataSourceBuilder<T> {
override val coroutineContext: CoroutineContext =
coroutineContext + Job(coroutineContext[Job]) + GoalExecutionRestriction()
private val treeItems = HashMap<NameToken, DataTreeItem<T>>()
override val items: Map<NameToken, DataTreeItem<T>>
get() = treeItems.filter { !it.key.body.startsWith("@") }
override val updates = MutableSharedFlow<Name>()
@Synchronized
private fun remove(token: NameToken) {
if (treeItems.remove(token) != null) {
launch {
updates.emit(token.asName())
}
}
}
override fun remove(name: Name) {
if (name.isEmpty()) error("Can't remove the root node")
(getItem(name.cutLast()).tree as? DataTreeBuilder)?.remove(name.lastOrNull()!!)
}
@Synchronized
private fun set(token: NameToken, data: Data<T>) {
treeItems[token] = DataTreeItem.Leaf(data)
}
@Synchronized
private fun set(token: NameToken, node: DataTree<T>) {
treeItems[token] = DataTreeItem.Node(node)
}
private fun getOrCreateNode(token: NameToken): DataTreeBuilder<T> =
(treeItems[token] as? DataTreeItem.Node<T>)?.tree as? DataTreeBuilder<T>
?: DataTreeBuilder<T>(dataType, coroutineContext).also { set(token, it) }
private fun getOrCreateNode(name: Name): DataTreeBuilder<T> = when (name.length) {
0 -> this
1 -> getOrCreateNode(name.firstOrNull()!!)
else -> getOrCreateNode(name.firstOrNull()!!).getOrCreateNode(name.cutFirst())
}
override fun data(name: Name, data: Data<T>?) {
if (data == null) {
remove(name)
} else {
when (name.length) {
0 -> error("Can't add data with empty name")
1 -> set(name.firstOrNull()!!, data)
2 -> getOrCreateNode(name.cutLast()).set(name.lastOrNull()!!, data)
}
}
launch {
updates.emit(name)
}
}
override fun meta(name: Name, meta: Meta) {
val item = getItem(name)
if (item is DataTreeItem.Leaf) error("TODO: Can't change meta of existing leaf item.")
data(name + DataTree.META_ITEM_NAME_TOKEN, Data.empty(meta))
}
}
/**
* Create a dynamic [DataSource]. Initial data is placed synchronously.
*/
@DFInternal
@Suppress("FunctionName")
public fun <T : Any> DataSource(
type: KType,
parent: CoroutineScope,
block: DataSourceBuilder<T>.() -> Unit,
): DataSource<T> {
val tree = DataTreeBuilder<T>(type, parent.coroutineContext)
tree.block()
return tree
}
@Suppress("OPT_IN_USAGE","FunctionName")
public inline fun <reified T : Any> DataSource(
parent: CoroutineScope,
crossinline block: DataSourceBuilder<T>.() -> Unit,
): DataSource<T> = DataSource(typeOf<T>(), parent) { block() }
@Suppress("FunctionName")
public suspend inline fun <reified T : Any> DataSource(
crossinline block: DataSourceBuilder<T>.() -> Unit = {},
): DataSourceBuilder<T> = DataTreeBuilder<T>(typeOf<T>(), coroutineContext).apply { block() }
public inline fun <reified T : Any> DataSourceBuilder<T>.emit(
name: Name,
parent: CoroutineScope,
noinline block: DataSourceBuilder<T>.() -> Unit,
): Unit = node(name, DataSource(parent, block))
public inline fun <reified T : Any> DataSourceBuilder<T>.emit(
name: String,
parent: CoroutineScope,
noinline block: DataSourceBuilder<T>.() -> Unit,
): Unit = node(Name.parse(name), DataSource(parent, block))

View File

@ -15,14 +15,13 @@
*/
package space.kscience.dataforge.data
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.collect
import kotlinx.coroutines.launch
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.misc.DFInternal
public interface GroupRule {
public suspend fun <T : Any> gather(set: DataSet<T>): Map<String, DataSet<T>>
public fun <T : Any> gather(set: DataSet<T>): Map<String, DataSet<T>>
public companion object {
/**
@ -33,32 +32,45 @@ public interface GroupRule {
* @param defaultTagValue
* @return
*/
@OptIn(DFInternal::class)
public fun byMetaValue(
scope: CoroutineScope,
key: String,
defaultTagValue: String,
): GroupRule = object : GroupRule {
override suspend fun <T : Any> gather(
override fun <T : Any> gather(
set: DataSet<T>,
): Map<String, DataSet<T>> {
val map = HashMap<String, ActiveDataTree<T>>()
val map = HashMap<String, DataSet<T>>()
set.flowData().collect { data ->
val tagValue = data.meta[key]?.string ?: defaultTagValue
map.getOrPut(tagValue) { ActiveDataTree(set.dataType) }.emit(data.name, data.data)
}
if (set is DataSource) {
set.forEach { data ->
val tagValue: String = data.meta[key]?.string ?: defaultTagValue
(map.getOrPut(tagValue) { DataTreeBuilder(set.dataType, set.coroutineContext) } as DataTreeBuilder<T>)
.data(data.name, data.data)
scope.launch {
set.updates.collect { name ->
val data = set.getData(name)
set.launch {
set.updates.collect { name ->
val dataUpdate = set[name]
@Suppress("NULLABLE_EXTENSION_OPERATOR_WITH_SAFE_CALL_RECEIVER")
val tagValue = data?.meta?.get(key)?.string ?: defaultTagValue
map.getOrPut(tagValue) { ActiveDataTree(set.dataType) }.emit(name, data)
val updateTagValue = dataUpdate?.meta?.get(key)?.string ?: defaultTagValue
map.getOrPut(updateTagValue) {
DataSource(set.dataType, this) {
data(name, dataUpdate)
}
}
}
}
}
} else {
set.forEach { data ->
val tagValue: String = data.meta[key]?.string ?: defaultTagValue
(map.getOrPut(tagValue) { StaticDataTree(set.dataType) } as StaticDataTree<T>)
.data(data.name, data.data)
}
}
return map
}
}

View File

@ -9,6 +9,9 @@ public interface NamedData<out T : Any> : Named, Data<T> {
public val data: Data<T>
}
public operator fun NamedData<*>.component1(): Name = name
public operator fun <T: Any> NamedData<T>.component2(): Data<T> = data
private class NamedDataImpl<out T : Any>(
override val name: Name,
override val data: Data<T>,
@ -30,6 +33,3 @@ public fun <T : Any> Data<T>.named(name: Name): NamedData<T> = if (this is Named
} else {
NamedDataImpl(name, this)
}
public operator fun <T : Any> NamedData<T>.component1(): Name = name
public operator fun <T : Any> NamedData<T>.component2(): Data<T> = data

View File

@ -1,6 +1,7 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.coroutineScope
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.NameToken
@ -21,15 +22,16 @@ internal class StaticDataTree<T : Any>(
override val dataType: KType,
) : DataSetBuilder<T>, DataTree<T> {
private val items: MutableMap<NameToken, DataTreeItem<T>> = HashMap()
private val _items: MutableMap<NameToken, DataTreeItem<T>> = HashMap()
override suspend fun items(): Map<NameToken, DataTreeItem<T>> = items.filter { !it.key.body.startsWith("@") }
override val items: Map<NameToken, DataTreeItem<T>>
get() = _items.filter { !it.key.body.startsWith("@") }
override suspend fun remove(name: Name) {
override fun remove(name: Name) {
when (name.length) {
0 -> error("Can't remove root tree node")
1 -> items.remove(name.firstOrNull()!!)
else -> (items[name.firstOrNull()!!].tree as? StaticDataTree<T>)?.remove(name.cutFirst())
1 -> _items.remove(name.firstOrNull()!!)
else -> (_items[name.firstOrNull()!!].tree as? StaticDataTree<T>)?.remove(name.cutFirst())
}
}
@ -37,57 +39,55 @@ internal class StaticDataTree<T : Any>(
0 -> this
1 -> {
val itemName = name.firstOrNull()!!
(items[itemName].tree as? StaticDataTree<T>) ?: StaticDataTree<T>(dataType).also {
items[itemName] = DataTreeItem.Node(it)
(_items[itemName].tree as? StaticDataTree<T>) ?: StaticDataTree<T>(dataType).also {
_items[itemName] = DataTreeItem.Node(it)
}
}
else -> getOrCreateNode(name.cutLast()).getOrCreateNode(name.lastOrNull()!!.asName())
}
private suspend fun set(name: Name, item: DataTreeItem<T>?) {
private fun set(name: Name, item: DataTreeItem<T>?) {
if (name.isEmpty()) error("Can't set top level tree node")
if (item == null) {
remove(name)
} else {
getOrCreateNode(name.cutLast()).items[name.lastOrNull()!!] = item
getOrCreateNode(name.cutLast())._items[name.lastOrNull()!!] = item
}
}
override suspend fun emit(name: Name, data: Data<T>?) {
override fun data(name: Name, data: Data<T>?) {
set(name, data?.let { DataTreeItem.Leaf(it) })
}
override suspend fun emit(name: Name, dataSet: DataSet<T>) {
override fun node(name: Name, dataSet: DataSet<T>) {
if (dataSet is StaticDataTree) {
set(name, DataTreeItem.Node(dataSet))
} else {
coroutineScope {
dataSet.flowData().collect {
emit(name + it.name, it.data)
}
dataSet.forEach {
data(name + it.name, it.data)
}
}
}
override fun meta(name: Name, meta: Meta) {
val item = getItem(name)
if (item is DataTreeItem.Leaf) TODO("Can't change meta of existing leaf item.")
data(name + DataTree.META_ITEM_NAME_TOKEN, Data.empty(meta))
}
}
@Suppress("FunctionName")
public fun <T : Any> DataTree(dataType: KType): DataTree<T> = StaticDataTree(dataType)
@Suppress("FunctionName")
public inline fun <reified T : Any> DataTree(): DataTree<T> = DataTree(typeOf<T>())
@Suppress("FunctionName")
public suspend fun <T : Any> DataTree(
public inline fun <T : Any> DataTree(
dataType: KType,
block: suspend DataSetBuilder<T>.() -> Unit,
block: DataSetBuilder<T>.() -> Unit,
): DataTree<T> = StaticDataTree<T>(dataType).apply { block() }
@Suppress("FunctionName")
public suspend inline fun <reified T : Any> DataTree(
noinline block: suspend DataSetBuilder<T>.() -> Unit,
public inline fun <reified T : Any> DataTree(
noinline block: DataSetBuilder<T>.() -> Unit = {},
): DataTree<T> = DataTree(typeOf<T>(), block)
@OptIn(DFExperimental::class)
public suspend fun <T : Any> DataSet<T>.seal(): DataTree<T> = DataTree(dataType) {
populate(this@seal)
}
public fun <T : Any> DataSet<T>.seal(): DataTree<T> = DataTree(dataType) {
populateFrom(this@seal)
}

View File

@ -4,11 +4,14 @@ import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.filter
import kotlinx.coroutines.flow.map
import kotlinx.coroutines.flow.mapNotNull
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.isEmpty
import space.kscience.dataforge.names.plus
import space.kscience.dataforge.names.removeHeadOrNull
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.EmptyCoroutineContext
import kotlin.reflect.KType
@ -16,34 +19,58 @@ import kotlin.reflect.KType
* A stateless filtered [DataSet]
*/
public fun <T : Any> DataSet<T>.filter(
predicate: suspend (Name, Data<T>) -> Boolean,
): ActiveDataSet<T> = object : ActiveDataSet<T> {
predicate: (Name, Meta) -> Boolean,
): DataSource<T> = object : DataSource<T> {
override val dataType: KType get() = this@filter.dataType
override fun flowData(): Flow<NamedData<T>> =
this@filter.flowData().filter { predicate(it.name, it.data) }
override val coroutineContext: CoroutineContext
get() = (this@filter as? DataSource)?.coroutineContext ?: EmptyCoroutineContext
override suspend fun getData(name: Name): Data<T>? = this@filter.getData(name)?.takeIf {
predicate(name, it)
override val meta: Meta get() = this@filter.meta
override fun iterator(): Iterator<NamedData<T>> = iterator {
for(d in this@filter){
if(predicate(d.name, d.meta)){
yield(d)
}
}
}
override fun get(name: Name): Data<T>? = this@filter.get(name)?.takeIf {
predicate(name, it.meta)
}
override val updates: Flow<Name> = this@filter.updates.filter flowFilter@{ name ->
val theData = this@filter.getData(name) ?: return@flowFilter false
predicate(name, theData)
val theData = this@filter[name] ?: return@flowFilter false
predicate(name, theData.meta)
}
}
/**
* Generate a wrapper data set with a given name prefix appended to all names
*/
public fun <T : Any> DataSet<T>.withNamePrefix(prefix: Name): DataSet<T> = if (prefix.isEmpty()) this
else object : ActiveDataSet<T> {
public fun <T : Any> DataSet<T>.withNamePrefix(prefix: Name): DataSet<T> = if (prefix.isEmpty()) {
this
} else object : DataSource<T> {
override val dataType: KType get() = this@withNamePrefix.dataType
override fun flowData(): Flow<NamedData<T>> = this@withNamePrefix.flowData().map { it.data.named(prefix + it.name) }
override val coroutineContext: CoroutineContext
get() = (this@withNamePrefix as? DataSource)?.coroutineContext ?: EmptyCoroutineContext
override suspend fun getData(name: Name): Data<T>? =
name.removeHeadOrNull(name)?.let { this@withNamePrefix.getData(it) }
override val meta: Meta get() = this@withNamePrefix.meta
override fun iterator(): Iterator<NamedData<T>> = iterator {
for(d in this@withNamePrefix){
yield(d.data.named(prefix + d.name))
}
}
override fun get(name: Name): Data<T>? =
name.removeHeadOrNull(name)?.let { this@withNamePrefix.get(it) }
override val updates: Flow<Name> get() = this@withNamePrefix.updates.map { prefix + it }
}
@ -53,16 +80,23 @@ else object : ActiveDataSet<T> {
*/
public fun <T : Any> DataSet<T>.branch(branchName: Name): DataSet<T> = if (branchName.isEmpty()) {
this
} else object : ActiveDataSet<T> {
} else object : DataSource<T> {
override val dataType: KType get() = this@branch.dataType
override fun flowData(): Flow<NamedData<T>> = this@branch.flowData().mapNotNull {
it.name.removeHeadOrNull(branchName)?.let { name ->
it.data.named(name)
override val coroutineContext: CoroutineContext
get() = (this@branch as? DataSource)?.coroutineContext ?: EmptyCoroutineContext
override val meta: Meta get() = this@branch.meta
override fun iterator(): Iterator<NamedData<T>> = iterator {
for(d in this@branch){
d.name.removeHeadOrNull(branchName)?.let { name ->
yield(d.data.named(name))
}
}
}
override suspend fun getData(name: Name): Data<T>? = this@branch.getData(branchName + name)
override fun get(name: Name): Data<T>? = this@branch.get(branchName + name)
override val updates: Flow<Name> get() = this@branch.updates.mapNotNull { it.removeHeadOrNull(branchName) }
}
@ -70,5 +104,5 @@ public fun <T : Any> DataSet<T>.branch(branchName: Name): DataSet<T> = if (branc
public fun <T : Any> DataSet<T>.branch(branchName: String): DataSet<T> = this@branch.branch(Name.parse(branchName))
@DFExperimental
public suspend fun <T : Any> DataSet<T>.rootData(): Data<T>? = getData(Name.EMPTY)
public suspend fun <T : Any> DataSet<T>.rootData(): Data<T>? = get(Name.EMPTY)

View File

@ -1,20 +0,0 @@
package space.kscience.dataforge.data
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
/**
* Get a metadata node for this set if it is present
*/
public suspend fun DataSet<*>.getMeta(): Meta? = getData(DataSet.META_KEY)?.meta
/**
* Add meta-data node to a [DataSet]
*/
public suspend fun DataSetBuilder<*>.meta(meta: Meta): Unit = emit(DataSet.META_KEY, Data.empty(meta))
/**
* Add meta-data node to a [DataSet]
*/
public suspend fun DataSetBuilder<*>.meta(mutableMeta: MutableMeta.() -> Unit): Unit = meta(Meta(mutableMeta))

View File

@ -1,18 +1,27 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.flow.map
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
import space.kscience.dataforge.meta.seal
import space.kscience.dataforge.meta.toMutableMeta
import space.kscience.dataforge.misc.DFInternal
import kotlin.contracts.InvocationKind
import kotlin.contracts.contract
import space.kscience.dataforge.names.Name
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.EmptyCoroutineContext
import kotlin.reflect.KType
import kotlin.reflect.typeOf
public data class ValueWithMeta<T>(val meta: Meta, val value: T)
public suspend fun <T : Any> Data<T>.awaitWithMeta(): ValueWithMeta<T> = ValueWithMeta(meta, await())
public data class NamedValueWithMeta<T>(val name: Name, val meta: Meta, val value: T)
public suspend fun <T : Any> NamedData<T>.awaitWithMeta(): NamedValueWithMeta<T> =
NamedValueWithMeta(name, meta, await())
/**
* Lazily transform this data to another data. By convention [block] should not use external data (be pure).
* @param coroutineContext additional [CoroutineContext] elements used for data computation.
@ -48,13 +57,13 @@ public inline fun <T1 : Any, T2 : Any, reified R : Any> Data<T1>.combine(
public inline fun <T : Any, reified R : Any> Collection<Data<T>>.reduceToData(
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
crossinline block: suspend (Collection<T>) -> R,
crossinline block: suspend (List<ValueWithMeta<T>>) -> R,
): Data<R> = Data(
meta,
coroutineContext,
this
) {
block(map { it.await() })
block(map { it.awaitWithMeta() })
}
@DFInternal
@ -62,17 +71,16 @@ public fun <K, T : Any, R : Any> Map<K, Data<T>>.reduceToData(
outputType: KType,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
block: suspend (Map<K, T>) -> R,
block: suspend (Map<K, ValueWithMeta<T>>) -> R,
): Data<R> = Data(
outputType,
meta,
coroutineContext,
this.values
) {
block(mapValues { it.value.await() })
block(mapValues { it.value.awaitWithMeta() })
}
/**
* Lazily reduce a [Map] of [Data] with any static key.
* @param K type of the map key
@ -82,56 +90,91 @@ public fun <K, T : Any, R : Any> Map<K, Data<T>>.reduceToData(
public inline fun <K, T : Any, reified R : Any> Map<K, Data<T>>.reduceToData(
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
noinline block: suspend (Map<K, T>) -> R,
crossinline block: suspend (Map<K, ValueWithMeta<T>>) -> R,
): Data<R> = Data(
meta,
coroutineContext,
this.values
) {
block(mapValues { it.value.await() })
block(mapValues { it.value.awaitWithMeta() })
}
//flow operations
//Iterable operations
/**
* Transform a [Flow] of [NamedData] to a single [Data].
*/
@DFInternal
public suspend fun <T : Any, R : Any> Flow<NamedData<T>>.reduceToData(
public inline fun <T : Any, R : Any> Iterable<Data<T>>.reduceToData(
outputType: KType,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
transformation: suspend (Flow<NamedData<T>>) -> R,
crossinline transformation: suspend (Collection<ValueWithMeta<T>>) -> R,
): Data<R> = Data(
outputType,
meta,
coroutineContext,
toList()
) {
transformation(this)
transformation(map { it.awaitWithMeta() })
}
@OptIn(DFInternal::class)
public suspend inline fun <T : Any, reified R : Any> Flow<NamedData<T>>.reduceToData(
public inline fun <T : Any, reified R : Any> Iterable<Data<T>>.reduceToData(
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
noinline transformation: suspend (Flow<NamedData<T>>) -> R,
crossinline transformation: suspend (Collection<ValueWithMeta<T>>) -> R,
): Data<R> = reduceToData(typeOf<R>(), coroutineContext, meta) {
transformation(it)
}
/**
* Fold a flow of named data into a single [Data]
*/
public suspend inline fun <T : Any, reified R : Any> Flow<NamedData<T>>.foldToData(
public inline fun <T : Any, reified R : Any> Iterable<Data<T>>.foldToData(
initial: R,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
noinline block: suspend (result: R, data: NamedData<T>) -> R,
crossinline block: suspend (result: R, data: ValueWithMeta<T>) -> R,
): Data<R> = reduceToData(
coroutineContext, meta
) {
it.fold(initial, block)
it.fold(initial) { acc, t -> block(acc, t) }
}
/**
* Transform an [Iterable] of [NamedData] to a single [Data].
*/
@DFInternal
public inline fun <T : Any, R : Any> Iterable<NamedData<T>>.reduceNamedToData(
outputType: KType,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
crossinline transformation: suspend (Collection<NamedValueWithMeta<T>>) -> R,
): Data<R> = Data(
outputType,
meta,
coroutineContext,
toList()
) {
transformation(map { it.awaitWithMeta() })
}
@OptIn(DFInternal::class)
public inline fun <T : Any, reified R : Any> Iterable<NamedData<T>>.reduceNamedToData(
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
crossinline transformation: suspend (Collection<NamedValueWithMeta<T>>) -> R,
): Data<R> = reduceNamedToData(typeOf<R>(), coroutineContext, meta) {
transformation(it)
}
/**
* Fold a [Iterable] of named data into a single [Data]
*/
public inline fun <T : Any, reified R : Any> Iterable<NamedData<T>>.foldNamedToData(
initial: R,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
crossinline block: suspend (result: R, data: NamedValueWithMeta<T>) -> R,
): Data<R> = reduceNamedToData(
coroutineContext, meta
) {
it.fold(initial) { acc, t -> block(acc, t) }
}
//DataSet operations
@ -141,41 +184,39 @@ public suspend fun <T : Any, R : Any> DataSet<T>.map(
outputType: KType,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
metaTransform: MutableMeta.() -> Unit = {},
block: suspend (T) -> R,
block: suspend (NamedValueWithMeta<T>) -> R,
): DataTree<R> = DataTree<R>(outputType) {
populate(
flowData().map {
val newMeta = it.meta.toMutableMeta().apply(metaTransform).seal()
Data(outputType, newMeta, coroutineContext, listOf(it)) {
block(it.await())
}.named(it.name)
forEach {
val newMeta = it.meta.toMutableMeta().apply(metaTransform).seal()
val d = Data(outputType, newMeta, coroutineContext, listOf(it)) {
block(it.awaitWithMeta())
}
)
data(it.name, d)
}
}
@OptIn(DFInternal::class)
public suspend inline fun <T : Any, reified R : Any> DataSet<T>.map(
coroutineContext: CoroutineContext = EmptyCoroutineContext,
noinline metaTransform: MutableMeta.() -> Unit = {},
noinline block: suspend (T) -> R,
noinline block: suspend (NamedValueWithMeta<T>) -> R,
): DataTree<R> = map(typeOf<R>(), coroutineContext, metaTransform, block)
public suspend fun <T : Any> DataSet<T>.forEach(block: suspend (NamedData<T>) -> Unit) {
contract { callsInPlace(block, InvocationKind.EXACTLY_ONCE) }
flowData().collect {
block(it)
public inline fun <T : Any> DataSet<T>.forEach(block: (NamedData<T>) -> Unit) {
for (d in this) {
block(d)
}
}
public suspend inline fun <T : Any, reified R : Any> DataSet<T>.reduceToData(
public inline fun <T : Any, reified R : Any> DataSet<T>.reduceToData(
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
noinline transformation: suspend (Flow<NamedData<T>>) -> R,
): Data<R> = flowData().reduceToData(coroutineContext, meta, transformation)
crossinline transformation: suspend (Iterable<NamedValueWithMeta<T>>) -> R,
): Data<R> = asIterable().reduceNamedToData(coroutineContext, meta, transformation)
public suspend inline fun <T : Any, reified R : Any> DataSet<T>.foldToData(
public inline fun <T : Any, reified R : Any> DataSet<T>.foldToData(
initial: R,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
meta: Meta = Meta.EMPTY,
noinline block: suspend (result: R, data: NamedData<T>) -> R,
): Data<R> = flowData().foldToData(initial, coroutineContext, meta, block)
crossinline block: suspend (result: R, data: NamedValueWithMeta<T>) -> R,
): Data<R> = asIterable().foldNamedToData(initial, coroutineContext, meta, block)

View File

@ -0,0 +1,2 @@
package space.kscience.dataforge.data

View File

@ -0,0 +1,85 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.filter
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.EmptyCoroutineContext
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.typeOf
/**
* Cast the node to given type if the cast is possible or return null
*/
@Suppress("UNCHECKED_CAST")
private fun <R : Any> Data<*>.castOrNull(type: KType): Data<R>? =
if (!this.type.isSubtypeOf(type)) {
null
} else {
object : Data<R> by (this as Data<R>) {
override val type: KType = type
}
}
/**
* Select all data matching given type and filters. Does not modify paths
*
* @param predicate addition filtering condition based on item name and meta. By default, accepts all
*/
@OptIn(DFExperimental::class)
public fun <R : Any> DataSet<*>.filterByType(
type: KType,
predicate: (name: Name, meta: Meta) -> Boolean = { _, _ -> true },
): DataSource<R> = object : DataSource<R> {
override val dataType = type
override val coroutineContext: CoroutineContext
get() = (this@filterByType as? DataSource)?.coroutineContext ?: EmptyCoroutineContext
override val meta: Meta get() = this@filterByType.meta
private fun checkDatum(name: Name, datum: Data<*>): Boolean = datum.type.isSubtypeOf(type)
&& predicate(name, datum.meta)
override fun iterator(): Iterator<NamedData<R>> = iterator {
for(d in this@filterByType){
if(checkDatum(d.name,d.data)){
@Suppress("UNCHECKED_CAST")
yield(d as NamedData<R>)
}
}
}
override fun get(name: Name): Data<R>? = this@filterByType[name]?.let { datum ->
if (checkDatum(name, datum)) datum.castOrNull(type) else null
}
override val updates: Flow<Name> = this@filterByType.updates.filter { name ->
get(name)?.let { datum ->
checkDatum(name, datum)
} ?: false
}
}
/**
* Select a single datum of the appropriate type
*/
public inline fun <reified R : Any> DataSet<*>.filterByType(
noinline predicate: (name: Name, meta: Meta) -> Boolean = { _, _ -> true },
): DataSet<R> = filterByType(typeOf<R>(), predicate)
/**
* Select a single datum if it is present and of given [type]
*/
public fun <R : Any> DataSet<*>.getByType(type: KType, name: Name): NamedData<R>? =
get(name)?.castOrNull<R>(type)?.named(name)
public inline fun <reified R : Any> DataSet<*>.getByType(name: Name): NamedData<R>? =
this@getByType.getByType(typeOf<R>(), name)
public inline fun <reified R : Any> DataSet<*>.getByType(name: String): NamedData<R>? =
this@getByType.getByType(typeOf<R>(), Name.parse(name))

View File

@ -0,0 +1,40 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Job
import kotlinx.coroutines.launch
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.plus
/**
* Append data to node
*/
context(DataSetBuilder<T>) public infix fun <T : Any> String.put(data: Data<T>): Unit =
data(Name.parse(this), data)
/**
* Append node
*/
context(DataSetBuilder<T>) public infix fun <T : Any> String.put(dataSet: DataSet<T>): Unit =
node(Name.parse(this), dataSet)
/**
* Build and append node
*/
context(DataSetBuilder<T>) public infix fun <T : Any> String.put(
block: DataSetBuilder<T>.() -> Unit,
): Unit = node(Name.parse(this), block)
/**
* Copy given data set and mirror its changes to this [DataTreeBuilder] in [this@setAndObserve]. Returns an update [Job]
*/
context(DataSetBuilder<T>) public fun <T : Any> CoroutineScope.setAndWatch(
name: Name,
dataSet: DataSet<T>,
): Job = launch {
node(name, dataSet)
dataSet.updates.collect { nameInBranch ->
data(name + nameInBranch, dataSet.get(nameInBranch))
}
}

View File

@ -1,81 +0,0 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.filter
import kotlinx.coroutines.flow.map
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.matches
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.typeOf
/**
* Cast the node to given type if the cast is possible or return null
*/
@Suppress("UNCHECKED_CAST")
private fun <R : Any> Data<*>.castOrNull(type: KType): Data<R>? =
if (!this.type.isSubtypeOf(type)) {
null
} else {
object : Data<R> by (this as Data<R>) {
override val type: KType = type
}
}
/**
* Select all data matching given type and filters. Does not modify paths
*
* @param namePattern a name match patter according to [Name.matches]
* @param filter addition filtering condition based on item name and meta. By default, accepts all
*/
@OptIn(DFExperimental::class)
public fun <R : Any> DataSet<*>.select(
type: KType,
namePattern: Name? = null,
filter: (name: Name, meta: Meta) -> Boolean = { _, _ -> true }
): ActiveDataSet<R> = object : ActiveDataSet<R> {
override val dataType = type
private fun checkDatum(name: Name, datum: Data<*>): Boolean = datum.type.isSubtypeOf(type)
&& (namePattern == null || name.matches(namePattern))
&& filter(name, datum.meta)
override fun flowData(): Flow<NamedData<R>> = this@select.flowData().filter {
checkDatum(it.name, it.data)
}.map {
@Suppress("UNCHECKED_CAST")
it as NamedData<R>
}
override suspend fun getData(name: Name): Data<R>? = this@select.getData(name)?.let { datum ->
if (checkDatum(name, datum)) datum.castOrNull(type) else null
}
override val updates: Flow<Name> = this@select.updates.filter {
val datum = this@select.getData(it) ?: return@filter false
checkDatum(it, datum)
}
}
/**
* Select a single datum of the appropriate type
*/
public inline fun <reified R : Any> DataSet<*>.select(
namePattern: Name? = null,
noinline filter: (name: Name, meta: Meta) -> Boolean = { _, _ -> true }
): DataSet<R> = select(typeOf<R>(), namePattern, filter)
/**
* Select a single datum if it is present and of given [type]
*/
public suspend fun <R : Any> DataSet<*>.selectOne(type: KType, name: Name): NamedData<R>? =
getData(name)?.castOrNull<R>(type)?.named(name)
public suspend inline fun <reified R : Any> DataSet<*>.selectOne(name: Name): NamedData<R>? =
selectOne(typeOf<R>(), name)
public suspend inline fun <reified R : Any> DataSet<*>.selectOne(name: String): NamedData<R>? =
selectOne(typeOf<R>(), Name.parse(name))

View File

@ -1,42 +1,50 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.delay
import kotlinx.coroutines.test.runTest
import org.junit.jupiter.api.Test
import space.kscience.dataforge.actions.Action
import space.kscience.dataforge.actions.invoke
import space.kscience.dataforge.actions.map
import space.kscience.dataforge.misc.DFExperimental
import kotlin.test.assertEquals
@Suppress("EXPERIMENTAL_API_USAGE")
class ActionsTest {
val data: DataTree<Int> = runBlocking {
DataTree {
@OptIn(DFExperimental::class, ExperimentalCoroutinesApi::class)
internal class ActionsTest {
@Test
fun testStaticMapAction() = runTest {
val data: DataTree<Int> = DataTree {
repeat(10) {
static(it.toString(), it)
}
}
}
@Test
fun testStaticMapAction() {
val plusOne = Action.map<Int, Int> {
result { it + 1 }
}
runBlocking {
val result = plusOne.execute(data)
assertEquals(2, result.getData("1")?.await())
}
val result = plusOne(data)
assertEquals(2, result["1"]?.await())
}
@Test
fun testDynamicMapAction() {
fun testDynamicMapAction() = runTest {
val data: DataSourceBuilder<Int> = DataSource()
val plusOne = Action.map<Int, Int> {
result { it + 1 }
}
val datum = runBlocking {
val result = plusOne.execute(data, scope = this)
result.getData("1")?.await()
val result = plusOne(data)
repeat(10) {
data.static(it.toString(), it)
}
assertEquals(2, datum)
delay(20)
assertEquals(2, result["1"]?.await())
data.close()
}
}

View File

@ -1,7 +1,6 @@
package space.kscience.dataforge.data
import kotlinx.coroutines.*
import kotlinx.coroutines.flow.collect
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.asName
import kotlin.test.Test
@ -20,10 +19,10 @@ internal class DataTreeBuilderTest {
static("c.f", "c.f")
}
runBlocking {
assertEquals("a", node.getData("primary.a")?.await())
assertEquals("b", node.getData("primary.b")?.await())
assertEquals("c.d", node.getData("c.d")?.await())
assertEquals("c.f", node.getData("c.f")?.await())
assertEquals("a", node["primary.a"]?.await())
assertEquals("b", node["primary.b"]?.await())
assertEquals("c.d", node["c.d"]?.await())
assertEquals("c.f", node["c.f"]?.await())
}
}
@ -43,12 +42,12 @@ internal class DataTreeBuilderTest {
static("b", "b")
}
static("root", "root")
populate(updateData)
populateFrom(updateData)
}
runBlocking {
assertEquals("a", node.getData("update.a")?.await())
assertEquals("a", node.getData("primary.a")?.await())
assertEquals("a", node["update.a"]?.await())
assertEquals("a", node["primary.a"]?.await())
}
}
@ -57,7 +56,7 @@ internal class DataTreeBuilderTest {
try {
lateinit var updateJob: Job
supervisorScope {
val subNode = ActiveDataTree<Int> {
val subNode = DataSource<Int> {
updateJob = launch {
repeat(10) {
delay(10)
@ -71,8 +70,8 @@ internal class DataTreeBuilderTest {
println(it)
}
}
val rootNode = ActiveDataTree<Int> {
setAndObserve("sub".asName(), subNode)
val rootNode = DataSource<Int> {
setAndWatch("sub".asName(), subNode)
}
launch {
@ -81,11 +80,11 @@ internal class DataTreeBuilderTest {
}
}
updateJob.join()
assertEquals(9, rootNode.getData("sub.value")?.await())
assertEquals(9, rootNode["sub.value"]?.await())
cancel()
}
} catch (t: Throwable) {
if (t !is CancellationException) throw t
if (t !is CancellationException) throw t
}
}

View File

@ -1,19 +1,16 @@
package space.kscience.dataforge.distributed.serialization
import kotlinx.coroutines.Deferred
import kotlinx.coroutines.async
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.asFlow
import kotlinx.coroutines.flow.map
import kotlinx.coroutines.flow.toList
import kotlinx.serialization.KSerializer
import kotlinx.serialization.Serializable
import space.kscience.dataforge.data.Data
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.data.NamedData
import space.kscience.dataforge.data.asIterable
import space.kscience.dataforge.data.component1
import space.kscience.dataforge.data.component2
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.Name
import kotlin.reflect.KType
@ -21,21 +18,21 @@ import kotlin.reflect.KType
* [DataSet] representation that is trivially serializable.
*/
@Serializable
internal data class DataSetPrototype(val data: Map<String, DataPrototype>) {
internal data class DataSetPrototype(val meta: Meta, val data: Map<String, DataPrototype>) {
fun <T : Any> toDataSet(type: KType, serializer: KSerializer<T>): DataSet<T> {
val data = data
.mapKeys { (name, _) -> Name.of(name) }
.mapValues { (_, dataPrototype) -> dataPrototype.toData(type, serializer) }
return SerializableDataSetImpl(type, data)
return SerializableDataSetImpl(type, data, meta)
}
companion object {
suspend fun <T : Any> of(dataSet: DataSet<T>, serializer: KSerializer<T>): DataSetPrototype = coroutineScope {
val flow = mutableListOf<Pair<String, Deferred<DataPrototype>>>()
dataSet.flowData().map { (name, data) ->
val prototypes = dataSet.asIterable().map { (name, data) ->
name.toString() to async { DataPrototype.of(data, serializer) }
}.toList(flow)
DataSetPrototype(flow.associate { (name, deferred) -> name to deferred.await() })
}
val map = prototypes.associate { (name, deferred) -> name to deferred.await() }
DataSetPrototype(dataSet.meta, map)
}
}
}
@ -46,16 +43,20 @@ internal data class DataSetPrototype(val data: Map<String, DataPrototype>) {
private class SerializableDataSetImpl<T : Any>(
override val dataType: KType,
private val data: Map<Name, Data<T>>,
override val meta: Meta,
) : DataSet<T> {
override fun flowData(): Flow<NamedData<T>> =
data.map { (name, data) -> SimpleNamedData(name, data) }.asFlow()
override suspend fun getData(name: Name): Data<T>? = data[name]
/**
* Trivial named data implementation.
*/
private class SimpleNamedData<T : Any>(override val name: Name, override val data: Data<T>) :
NamedData<T>, Data<T> by data
override fun iterator(): Iterator<NamedData<T>> =
data
.asSequence()
.map { (name, data) -> SimpleNamedData(name, data) }
.iterator()
override fun get(name: Name): Data<T>? = data[name]
}

View File

@ -6,8 +6,9 @@ import space.kscience.dataforge.context.PluginFactory
import space.kscience.dataforge.context.PluginTag
import space.kscience.dataforge.context.info
import space.kscience.dataforge.context.logger
import space.kscience.dataforge.data.data
import space.kscience.dataforge.data.getByType
import space.kscience.dataforge.data.map
import space.kscience.dataforge.data.select
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.asName
@ -22,9 +23,8 @@ internal class MyPlugin1 : WorkspacePlugin() {
val task by task<Int>(serializer()) {
workspace.logger.info { "In ${tag.name}.task" }
val myInt = workspace.data.select<Int>()
val res = myInt.getData("int".asName())!!
emit("result".asName(), res.map { it + 1 })
val myInt = workspace.data.getByType<Int>("int")!!
data("result", myInt.data.map { it + 1 })
}
companion object Factory : PluginFactory<MyPlugin1> {
@ -45,8 +45,8 @@ internal class MyPlugin2 : WorkspacePlugin() {
val task by task<Int>(serializer()) {
workspace.logger.info { "In ${tag.name}.task" }
val dataSet = fromTask<Int>(Name.of(MyPlugin1.tag.name, "task"))
val data = dataSet.getData("result".asName())!!
emit("result".asName(), data.map { it + 1 })
val data = dataSet["result".asName()]!!
data("result", data.map { it + 1 })
}
companion object Factory : PluginFactory<MyPlugin2> {

View File

@ -8,7 +8,7 @@ import org.junit.jupiter.api.TestInstance
import space.kscience.dataforge.context.Global
import space.kscience.dataforge.data.DataTree
import space.kscience.dataforge.data.await
import space.kscience.dataforge.data.getData
import space.kscience.dataforge.data.get
import space.kscience.dataforge.data.static
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.Name
@ -59,10 +59,9 @@ internal class RemoteCallTest {
@Test
fun `local execution`() = runBlocking {
assertEquals(42, worker1.data.getData("int")!!.await())
assertEquals(42, worker1.data["int".asName()]!!.await())
val res = worker1
.produce(Name.of(MyPlugin1.tag.name, "task"), Meta.EMPTY)
.getData("result".asName())!!
.produce(Name.of(MyPlugin1.tag.name, "task"), Meta.EMPTY)["result"]!!
.await()
assertEquals(43, res)
}
@ -70,8 +69,7 @@ internal class RemoteCallTest {
@Test
fun `remote execution`() = runBlocking {
val remoteRes = workspace
.produce(Name.of(MyPlugin1.tag.name, "task"), Meta.EMPTY)
.getData("result".asName())!!
.produce(Name.of(MyPlugin1.tag.name, "task"), Meta.EMPTY)["result"]!!
.await()
assertEquals(43, remoteRes)
}
@ -79,8 +77,7 @@ internal class RemoteCallTest {
@Test
fun `transitive execution`() = runBlocking {
val remoteRes = workspace
.produce(Name.of(MyPlugin2.tag.name, "task"), Meta.EMPTY)
.getData("result".asName())!!
.produce(Name.of(MyPlugin2.tag.name, "task"), Meta.EMPTY)["result"]!!
.await()
assertEquals(44, remoteRes)
}

View File

@ -1,3 +1,5 @@
import ru.mipt.npm.gradle.KScienceVersions
plugins {
id("ru.mipt.npm.gradle.mpp")
id("ru.mipt.npm.gradle.native")
@ -18,7 +20,7 @@ kotlin {
commonMain {
dependencies {
api(project(":dataforge-context"))
api(npmlibs.ktor.io)
api("io.ktor:ktor-io:${KScienceVersions.ktorVersion}")
}
}
}

View File

@ -2,68 +2,48 @@ package space.kscience.dataforge.io.yaml
import io.ktor.utils.io.core.Input
import io.ktor.utils.io.core.Output
import io.ktor.utils.io.core.buildPacket
import io.ktor.utils.io.core.readBytes
import io.ktor.utils.io.core.readUTF8Line
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.Global
import space.kscience.dataforge.io.*
import space.kscience.dataforge.io.IOFormat.Companion.META_KEY
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.plus
@DFExperimental
public class FrontMatterEnvelopeFormat(
private val io: IOPlugin,
private val meta: Meta = Meta.EMPTY,
) : EnvelopeFormat {
override fun readPartial(input: Input): PartialEnvelope {
var line: String
var offset = 0u
do {
line = input.readUTF8Line() ?: error("Input does not contain front matter separator")
offset += line.encodeToByteArray().size.toUInt()
} while (!line.startsWith(SEPARATOR))
var offset = 0
val readMetaFormat =
metaTypeRegex.matchEntire(line)?.groupValues?.first()
?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat
offset += input.discardWithSeparator(
SEPARATOR.encodeToByteArray(),
atMost = 1024,
skipUntilEndOfLine = false
)
val line = input.readSafeUtf8Line()
val readMetaFormat = line.trim().takeIf { it.isNotBlank() }?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat
//TODO replace by preview
val meta = Binary {
do {
line = input.readSafeUtf8Line()
writeUtf8String(line + "\r\n")
offset += line.encodeToByteArray().size.toUInt()
} while (!line.startsWith(SEPARATOR))
}.read {
readMetaFormat.readMeta(input)
val packet = buildPacket {
offset += input.readBytesWithSeparatorTo(
this,
SEPARATOR.encodeToByteArray(),
skipUntilEndOfLine = true
)
}
val meta = readMetaFormat.readMeta(packet)
return PartialEnvelope(meta, offset, null)
}
override fun readObject(input: Input): Envelope {
var line: String
do {
line = input.readSafeUtf8Line() //?: error("Input does not contain front matter separator")
} while (!line.startsWith(SEPARATOR))
val readMetaFormat =
metaTypeRegex.matchEntire(line)?.groupValues?.first()
?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat
val meta = Binary {
do {
writeUtf8String(input.readSafeUtf8Line() + "\r\n")
} while (!line.startsWith(SEPARATOR))
}.read {
readMetaFormat.readMeta(input)
}
val bytes = input.readBytes()
val data = bytes.asBinary()
return SimpleEnvelope(meta, data)
val partial = readPartial(input)
val data = input.readBytes().asBinary()
return SimpleEnvelope(partial.meta, data)
}
override fun writeEnvelope(
@ -82,16 +62,13 @@ public class FrontMatterEnvelopeFormat(
}
}
override fun toMeta(): Meta = Meta {
NAME_KEY put name.toString()
META_KEY put meta
}
public companion object : EnvelopeFormatFactory {
public const val SEPARATOR: String = "---"
private val metaTypeRegex = "---(\\w*)\\s*".toRegex()
override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + "frontMatter"
override fun build(context: Context, meta: Meta): EnvelopeFormat {
return FrontMatterEnvelopeFormat(context.io, meta)
}
@ -115,7 +92,7 @@ public class FrontMatterEnvelopeFormat(
envelope: Envelope,
metaFormatFactory: MetaFormatFactory,
formatMeta: Meta,
): Unit = FrontMatterEnvelopeFormat.default.writeEnvelope(output, envelope, metaFormatFactory, formatMeta)
): Unit = default.writeEnvelope(output, envelope, metaFormatFactory, formatMeta)
override fun readObject(input: Input): Envelope = default.readObject(input)

View File

@ -4,8 +4,6 @@ import io.ktor.utils.io.core.Input
import io.ktor.utils.io.core.Output
import net.mamoe.yamlkt.*
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.io.IOFormat.Companion.META_KEY
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
import space.kscience.dataforge.io.MetaFormat
import space.kscience.dataforge.io.MetaFormatFactory
import space.kscience.dataforge.io.readUtf8String
@ -14,7 +12,6 @@ import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.descriptors.MetaDescriptor
import space.kscience.dataforge.meta.descriptors.get
import space.kscience.dataforge.meta.isLeaf
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.NameToken
import space.kscience.dataforge.names.withIndex
import space.kscience.dataforge.values.ListValue
@ -95,7 +92,6 @@ public fun YamlMap.toMeta(): Meta = YamlMeta(this)
/**
* Represent meta as Yaml
*/
@DFExperimental
public class YamlMetaFormat(private val meta: Meta) : MetaFormat {
override fun writeMeta(output: Output, meta: Meta, descriptor: MetaDescriptor?) {
@ -109,11 +105,6 @@ public class YamlMetaFormat(private val meta: Meta) : MetaFormat {
return yaml.toMeta()
}
override fun toMeta(): Meta = Meta {
NAME_KEY put FrontMatterEnvelopeFormat.name.toString()
META_KEY put meta
}
public companion object : MetaFormatFactory {
override fun build(context: Context, meta: Meta): MetaFormat = YamlMetaFormat(meta)

View File

@ -4,6 +4,7 @@ import space.kscience.dataforge.context.AbstractPlugin
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.PluginFactory
import space.kscience.dataforge.context.PluginTag
import space.kscience.dataforge.io.EnvelopeFormatFactory
import space.kscience.dataforge.io.IOPlugin
import space.kscience.dataforge.io.MetaFormatFactory
import space.kscience.dataforge.meta.Meta
@ -12,7 +13,6 @@ import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.asName
import kotlin.reflect.KClass
@DFExperimental
public class YamlPlugin(meta: Meta) : AbstractPlugin(meta) {
public val io: IOPlugin by require(IOPlugin)
@ -20,6 +20,7 @@ public class YamlPlugin(meta: Meta) : AbstractPlugin(meta) {
override fun content(target: String): Map<Name, Any> = when (target) {
MetaFormatFactory.META_FORMAT_TYPE -> mapOf("yaml".asName() to YamlMetaFormat)
EnvelopeFormatFactory.ENVELOPE_FORMAT_TYPE -> mapOf(FrontMatterEnvelopeFormat.name to FrontMatterEnvelopeFormat)
else -> super.content(target)
}

View File

@ -0,0 +1,37 @@
@file:OptIn(DFExperimental::class)
package space.kscience.dataforge.io.yaml
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.io.io
import space.kscience.dataforge.io.readEnvelope
import space.kscience.dataforge.io.toByteArray
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.misc.DFExperimental
import kotlin.test.Test
import kotlin.test.assertEquals
internal class FrontMatterEnvelopeFormatTest {
val context = Context {
plugin(YamlPlugin)
}
@Test
fun frontMatter(){
val text = """
---
content_type: magprog
magprog_section: contacts
section_title: Контакты
language: ru
---
Some text here
""".trimIndent()
val envelope = context.io.readEnvelope(text)
assertEquals("Some text here", envelope.data!!.toByteArray().decodeToString().trim())
assertEquals("magprog", envelope.meta["content_type"].string)
}
}

View File

@ -62,7 +62,7 @@ internal class ByteArrayBinary(
public fun ByteArray.asBinary(): Binary = ByteArrayBinary(this)
/**
* Produce a [buildByteArray] representing an exact copy of this [Binary]
* Produce a [ByteArray] representing an exact copy of this [Binary]
*/
public fun Binary.toByteArray(): ByteArray = if (this is ByteArrayBinary) {
array.copyOf() // TODO do we need to ensure data safety here?

View File

@ -34,7 +34,7 @@ public class EnvelopeBuilder : Envelope {
* Construct a data binary from given builder
*/
public inline fun data(block: Output.() -> Unit) {
data = buildByteArray { block() }.asBinary()
data = ByteArray { block() }.asBinary()
}
public fun seal(): Envelope = SimpleEnvelope(metaBuilder.seal(), data)

View File

@ -14,11 +14,12 @@ import kotlin.reflect.typeOf
/**
* A partially read envelope with meta, but without data
*/
public data class PartialEnvelope(val meta: Meta, val dataOffset: UInt, val dataSize: ULong?)
public data class PartialEnvelope(val meta: Meta, val dataOffset: Int, val dataSize: ULong?)
public interface EnvelopeFormat : IOFormat<Envelope> {
override val type: KType get() = typeOf<Envelope>()
override val type: KType get() = typeOf<Envelope>()
public val defaultMetaFormat: MetaFormatFactory get() = JsonMetaFormat
public fun readPartial(input: Input): PartialEnvelope
@ -39,7 +40,6 @@ public fun EnvelopeFormat.read(input: Input): Envelope = readObject(input)
@Type(ENVELOPE_FORMAT_TYPE)
public interface EnvelopeFormatFactory : IOFormatFactory<Envelope>, EnvelopeFormat {
override val name: Name get() = "envelope".asName()
override val type: KType get() = typeOf<Envelope>()
override fun build(context: Context, meta: Meta): EnvelopeFormat
@ -51,6 +51,7 @@ public interface EnvelopeFormatFactory : IOFormatFactory<Envelope>, EnvelopeForm
public fun peekFormat(io: IOPlugin, binary: Binary): EnvelopeFormat?
public companion object {
public val ENVELOPE_FACTORY_NAME: Name = "envelope".asName()
public const val ENVELOPE_FORMAT_TYPE: String = "io.format.envelope"
}
}

View File

@ -1,5 +1,6 @@
package space.kscience.dataforge.io
import space.kscience.dataforge.context.invoke
import space.kscience.dataforge.io.Envelope.Companion.ENVELOPE_NODE_KEY
import space.kscience.dataforge.io.PartDescriptor.Companion.DEFAULT_MULTIPART_DATA_SEPARATOR
import space.kscience.dataforge.io.PartDescriptor.Companion.MULTIPART_DATA_TYPE
@ -35,7 +36,7 @@ public typealias EnvelopeParts = List<EnvelopePart>
public fun EnvelopeBuilder.multipart(
parts: EnvelopeParts,
separator: String = DEFAULT_MULTIPART_DATA_SEPARATOR
separator: String = DEFAULT_MULTIPART_DATA_SEPARATOR,
) {
dataType = MULTIPART_DATA_TYPE
@ -67,17 +68,25 @@ public fun EnvelopeBuilder.multipart(
}
}
/**
* Put a list of envelopes as parts of given envelope
*/
public fun EnvelopeBuilder.envelopes(
envelopes: List<Envelope>,
format: EnvelopeFormat = TaggedEnvelopeFormat,
separator: String = DEFAULT_MULTIPART_DATA_SEPARATOR
formatFactory: EnvelopeFormatFactory = TaggedEnvelopeFormat,
formatMeta: Meta? = null,
separator: String = DEFAULT_MULTIPART_DATA_SEPARATOR,
) {
val parts = envelopes.map {
val binary = format.toBinary(it)
val format = formatMeta?.let { formatFactory(formatMeta) } ?: formatFactory
val binary = Binary(it, format)
EnvelopePart(binary, null)
}
meta{
set(MULTIPART_KEY + PART_FORMAT_KEY, format.toMeta())
meta {
(MULTIPART_KEY + PART_FORMAT_KEY) put {
IOFormatFactory.NAME_KEY put formatFactory.name.toString()
formatMeta?.let { IOFormatFactory.META_KEY put formatMeta }
}
}
multipart(parts, separator)
}

View File

@ -1,12 +1,13 @@
package space.kscience.dataforge.io
import io.ktor.utils.io.core.*
import io.ktor.utils.io.core.Input
import io.ktor.utils.io.core.Output
import io.ktor.utils.io.core.readDouble
import io.ktor.utils.io.core.writeDouble
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.Factory
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
import space.kscience.dataforge.io.IOFormatFactory.Companion.IO_FORMAT_TYPE
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MetaRepr
import space.kscience.dataforge.misc.Named
import space.kscience.dataforge.misc.Type
import space.kscience.dataforge.names.Name
@ -15,91 +16,64 @@ import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* And interface for reading and writing objects into with IO streams
* Reader of a custom object from input
*/
public interface IOFormat<T : Any> : MetaRepr {
public interface IOReader<out T> {
/**
* The type of object being read
*/
public val type: KType
public fun writeObject(output: Output, obj: T)
public fun readObject(input: Input): T
public companion object {
public val NAME_KEY: Name = "name".asName()
public val META_KEY: Name = "meta".asName()
}
}
public fun <T : Any> Input.readWith(format: IOFormat<T>): T = format.readObject(this@readWith)
public inline fun <reified T> IOReader(crossinline read: Input.() -> T): IOReader<T> = object : IOReader<T> {
override val type: KType = typeOf<T>()
public fun <T: Any> IOFormat<T>.readObject(binary: Binary): T = binary.read {
override fun readObject(input: Input): T = input.read()
}
public fun interface IOWriter<in T> {
public fun writeObject(output: Output, obj: T)
}
/**
* And interface for reading and writing objects into with IO streams
*/
public interface IOFormat<T> : IOReader<T>, IOWriter<T>
public fun <T : Any> Input.readObject(format: IOReader<T>): T = format.readObject(this@readObject)
public fun <T : Any> IOFormat<T>.readObjectFrom(binary: Binary): T = binary.read {
readObject(this)
}
/**
* Read given binary as object using given format
*/
public fun <T : Any> Binary.readWith(format: IOFormat<T>): T = read {
readWith(format)
public fun <T : Any> Binary.readWith(format: IOReader<T>): T = read {
readObject(format)
}
public fun <T : Any> Output.writeWith(format: IOFormat<T>, obj: T): Unit =
format.run { writeObject(this@writeWith, obj) }
public fun <T : Any> Output.writeObject(format: IOWriter<T>, obj: T): Unit =
format.writeObject(this@writeObject, obj)
public inline fun <reified T : Any> IOFormat.Companion.listOf(
format: IOFormat<T>,
): IOFormat<List<T>> = object : IOFormat<List<T>> {
override val type: KType = typeOf<List<T>>()
override fun writeObject(output: Output, obj: List<T>) {
output.writeInt(obj.size)
format.run {
obj.forEach {
writeObject(output, it)
}
}
}
override fun readObject(input: Input): List<T> {
val size = input.readInt()
return format.run {
List(size) { readObject(input) }
}
}
override fun toMeta(): Meta = Meta {
NAME_KEY put "list"
"contentFormat" put format.toMeta()
}
}
//public fun ObjectPool<Buffer>.fill(block: Buffer.() -> Unit): Buffer {
// val buffer = borrow()
// return try {
// buffer.apply(block)
// } catch (ex: Exception) {
// //recycle(buffer)
// throw ex
// }
//}
@Type(IO_FORMAT_TYPE)
public interface IOFormatFactory<T : Any> : Factory<IOFormat<T>>, Named, MetaRepr {
public interface IOFormatFactory<T : Any> : Factory<IOFormat<T>>, Named {
/**
* Explicit type for dynamic type checks
*/
public val type: KType
override fun toMeta(): Meta = Meta {
NAME_KEY put name.toString()
}
public companion object {
public const val IO_FORMAT_TYPE: String = "io.format"
public val NAME_KEY: Name = "name".asName()
public val META_KEY: Name = "meta".asName()
}
}
public fun <T : Any> IOFormat<T>.toBinary(obj: T): Binary = Binary { writeObject(this, obj) }
public fun <T : Any> Binary(obj: T, format: IOWriter<T>): Binary = Binary { format.writeObject(this, obj) }
public object DoubleIOFormat : IOFormat<Double>, IOFormatFactory<Double> {
override fun build(context: Context, meta: Meta): IOFormat<Double> = this
@ -108,26 +82,9 @@ public object DoubleIOFormat : IOFormat<Double>, IOFormatFactory<Double> {
override val type: KType get() = typeOf<Double>()
override fun writeObject(output: Output, obj: kotlin.Double) {
override fun writeObject(output: Output, obj: Double) {
output.writeDouble(obj)
}
override fun readObject(input: Input): Double = input.readDouble()
}
//public object ValueIOFormat : IOFormat<Value>, IOFormatFactory<Value> {
// override fun invoke(meta: Meta, context: Context): IOFormat<Value> = this
//
// override val name: Name = "value".asName()
//
// override val type: KType get() = typeOf<Value>()
//
// override fun writeObject(output: Output, obj: Value) {
// BinaryMetaFormat.run { output.writeValue(obj) }
// }
//
// override fun readObject(input: Input): Value {
// return (BinaryMetaFormat.run { input.readMetaItem() } as? MetaItemValue)?.value
// ?: error("The item is not a value")
// }
//}
}

View File

@ -2,8 +2,6 @@ package space.kscience.dataforge.io
import space.kscience.dataforge.context.*
import space.kscience.dataforge.io.EnvelopeFormatFactory.Companion.ENVELOPE_FORMAT_TYPE
import space.kscience.dataforge.io.IOFormat.Companion.META_KEY
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
import space.kscience.dataforge.io.IOFormatFactory.Companion.IO_FORMAT_TYPE
import space.kscience.dataforge.io.MetaFormatFactory.Companion.META_FORMAT_TYPE
import space.kscience.dataforge.meta.Meta
@ -20,12 +18,12 @@ public class IOPlugin(meta: Meta) : AbstractPlugin(meta) {
}
public fun <T : Any> resolveIOFormat(item: Meta, type: KClass<out T>): IOFormat<T>? {
val key = item.string ?: item[NAME_KEY]?.string ?: error("Format name not defined")
val key = item.string ?: item[IOFormatFactory.NAME_KEY]?.string ?: error("Format name not defined")
val name = Name.parse(key)
return ioFormatFactories.find { it.name == name }?.let {
@Suppress("UNCHECKED_CAST")
if (it.type != type) error("Format type ${it.type} is not the same as requested type $type")
else it.build(context, item[META_KEY] ?: Meta.EMPTY) as IOFormat<T>
else it.build(context, item[IOFormatFactory.META_KEY] ?: Meta.EMPTY) as IOFormat<T>
}
}
@ -47,21 +45,24 @@ public class IOPlugin(meta: Meta) : AbstractPlugin(meta) {
envelopeFormatFactories.find { it.name == name }?.build(context, meta)
public fun resolveEnvelopeFormat(item: Meta): EnvelopeFormat? {
val name = item.string ?: item[NAME_KEY]?.string ?: error("Envelope format name not defined")
val meta = item[META_KEY] ?: Meta.EMPTY
val name = item.string ?: item[IOFormatFactory.NAME_KEY]?.string ?: error("Envelope format name not defined")
val meta = item[IOFormatFactory.META_KEY] ?: Meta.EMPTY
return resolveEnvelopeFormat(Name.parse(name), meta)
}
override fun content(target: String): Map<Name, Any> = when (target) {
META_FORMAT_TYPE -> defaultMetaFormats.toMap()
ENVELOPE_FORMAT_TYPE -> defaultEnvelopeFormats.toMap()
IO_FORMAT_TYPE -> content(META_FORMAT_TYPE) + content(ENVELOPE_FORMAT_TYPE)
else -> super.content(target)
}
public companion object : PluginFactory<IOPlugin> {
public val defaultMetaFormats: List<MetaFormatFactory> = listOf(JsonMetaFormat)
public val defaultEnvelopeFormats: List<EnvelopeFormatFactory> =
listOf(TaggedEnvelopeFormat, TaglessEnvelopeFormat)
public val defaultEnvelopeFormats: List<EnvelopeFormatFactory> = listOf(
TaggedEnvelopeFormat,
TaglessEnvelopeFormat
)
override val tag: PluginTag = PluginTag("io", group = PluginTag.DATAFORGE_GROUP)

View File

@ -8,30 +8,21 @@ import io.ktor.utils.io.core.Output
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonObject
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.descriptors.MetaDescriptor
import space.kscience.dataforge.meta.toJson
import space.kscience.dataforge.meta.toMeta
import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* A Json format for Meta representation
*/
public class JsonMetaFormat(private val json: Json = DEFAULT_JSON) : MetaFormat {
override val type: KType get() = typeOf<Meta>()
override fun writeMeta(output: Output, meta: Meta, descriptor: MetaDescriptor?) {
val jsonObject = meta.toJson(descriptor)
output.writeUtf8String(json.encodeToString(JsonObject.serializer(), jsonObject))
}
override fun toMeta(): Meta = Meta {
NAME_KEY put name.toString()
}
override fun readMeta(input: Input, descriptor: MetaDescriptor?): Meta {
val str = input.readUtf8String()//readByteArray().decodeToString()
val jsonElement = json.parseToJsonElement(str)

View File

@ -20,6 +20,7 @@ import kotlin.reflect.typeOf
* A format for meta serialization
*/
public interface MetaFormat : IOFormat<Meta> {
override val type: KType get() = typeOf<Meta>()
override fun writeObject(output: Output, obj: Meta) {
@ -54,9 +55,9 @@ public interface MetaFormatFactory : IOFormatFactory<Meta>, MetaFormat {
}
}
public fun Meta.toString(format: MetaFormat): String = buildByteArray {
public fun Meta.toString(format: MetaFormat): String = ByteArray {
format.run {
writeObject(this@buildByteArray, this@toString)
writeObject(this@ByteArray, this@toString)
}
}.decodeToString()

View File

@ -3,8 +3,6 @@ package space.kscience.dataforge.io
import io.ktor.utils.io.core.*
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.Global
import space.kscience.dataforge.io.IOFormat.Companion.META_KEY
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.enum
import space.kscience.dataforge.meta.get
@ -26,7 +24,7 @@ public class TaggedEnvelopeFormat(
// ?: error("Meta format with key $metaFormatKey could not be resolved in $io")
private fun Tag.toBinary() = Binary(24) {
private fun Tag.toBinary() = Binary {
writeRawString(START_SEQUENCE)
writeRawString(version.name)
writeShort(metaFormatKey)
@ -49,7 +47,7 @@ public class TaggedEnvelopeFormat(
formatMeta: Meta,
) {
val metaFormat = metaFormatFactory.build(this@TaggedEnvelopeFormat.io.context, formatMeta)
val metaBytes = metaFormat.toBinary(envelope.meta)
val metaBytes = Binary(envelope.meta,metaFormat)
val actualSize: ULong = (envelope.data?.size ?: 0).toULong()
val tag = Tag(metaFormatFactory.key, metaBytes.size.toUInt() + 2u, actualSize)
output.writeBinary(tag.toBinary())
@ -74,7 +72,7 @@ public class TaggedEnvelopeFormat(
val metaBinary = input.readBinary(tag.metaSize.toInt())
val meta: Meta = metaFormat.readObject(metaBinary)
val meta: Meta = metaFormat.readObjectFrom(metaBinary)
val data = input.readBinary(tag.dataSize.toInt())
@ -89,10 +87,10 @@ public class TaggedEnvelopeFormat(
val metaBinary = input.readBinary(tag.metaSize.toInt())
val meta: Meta = metaFormat.readObject(metaBinary)
val meta: Meta = metaFormat.readObjectFrom(metaBinary)
return PartialEnvelope(meta, version.tagSize + tag.metaSize, tag.dataSize)
return PartialEnvelope(meta, (version.tagSize + tag.metaSize).toInt(), tag.dataSize)
}
private data class Tag(
@ -106,18 +104,11 @@ public class TaggedEnvelopeFormat(
DF03(24u)
}
override fun toMeta(): Meta = Meta {
NAME_KEY put name.toString()
META_KEY put {
"version" put version
}
}
public companion object : EnvelopeFormatFactory {
private const val START_SEQUENCE = "#~"
private const val END_SEQUENCE = "~#\r\n"
override val name: Name = super.name + "tagged"
override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + "tagged"
override fun build(context: Context, meta: Meta): EnvelopeFormat {
val io = context.io
@ -149,7 +140,7 @@ public class TaggedEnvelopeFormat(
override fun peekFormat(io: IOPlugin, binary: Binary): EnvelopeFormat? {
return try {
binary.read{
binary.read {
val header = readRawString(6)
return@read when (header.substring(2..5)) {
VERSION.DF02.name -> TaggedEnvelopeFormat(io, VERSION.DF02)

View File

@ -3,14 +3,12 @@ package space.kscience.dataforge.io
import io.ktor.utils.io.core.*
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.Global
import space.kscience.dataforge.io.IOFormat.Companion.META_KEY
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.meta.isEmpty
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.asName
import space.kscience.dataforge.names.plus
import kotlin.collections.set
/**
@ -33,7 +31,7 @@ public class TaglessEnvelopeFormat(
output: Output,
envelope: Envelope,
metaFormatFactory: MetaFormatFactory,
formatMeta: Meta
formatMeta: Meta,
) {
val metaFormat = metaFormatFactory.build(this.io.context, formatMeta)
@ -50,11 +48,11 @@ public class TaglessEnvelopeFormat(
//Printing meta
if (!envelope.meta.isEmpty()) {
val metaBytes = metaFormat.toBinary(envelope.meta)
val metaBinary = Binary(envelope.meta, metaFormat)
output.writeProperty(META_LENGTH_PROPERTY,
metaBytes.size + 2)
metaBinary.size + 2)
output.writeUtf8String(this.metaStart + "\r\n")
output.writeBinary(metaBytes)
output.writeBinary(metaBinary)
output.writeRawString("\r\n")
}
@ -66,13 +64,16 @@ public class TaglessEnvelopeFormat(
}
override fun readObject(input: Input): Envelope {
var line: String
do {
line = input.readSafeUtf8Line() // ?: error("Input does not contain tagless envelope header")
} while (!line.startsWith(TAGLESS_ENVELOPE_HEADER))
//read preamble
input.discardWithSeparator(
TAGLESS_ENVELOPE_HEADER.encodeToByteArray(),
atMost = 1024,
skipUntilEndOfLine = true
)
val properties = HashMap<String, String>()
line = ""
var line = ""
while (line.isBlank() || line.startsWith("#?")) {
if (line.startsWith("#?")) {
val match = propertyPattern.find(line)
@ -80,9 +81,17 @@ public class TaglessEnvelopeFormat(
val (key, value) = match.destructured
properties[key] = value
}
//If can't read line, return envelope without data
if (input.endOfInput) return SimpleEnvelope(Meta.EMPTY, null)
line = input.readSafeUtf8Line()
try {
line = ByteArray {
try {
input.readBytesWithSeparatorTo(this, byteArrayOf('\n'.code.toByte()), 1024)
} catch (ex: BufferLimitExceededException) {
throw IllegalStateException("Property line exceeds maximum line length (1024)", ex)
}
}.decodeToString().trim()
} catch (ex: EOFException) {
return SimpleEnvelope(Meta.EMPTY, Binary.EMPTY)
}
}
var meta: Meta = Meta.EMPTY
@ -91,20 +100,18 @@ public class TaglessEnvelopeFormat(
val metaFormat = properties[META_TYPE_PROPERTY]?.let { io.resolveMetaFormat(it) } ?: JsonMetaFormat
val metaSize = properties[META_LENGTH_PROPERTY]?.toInt()
meta = if (metaSize != null) {
metaFormat.readObject(input.readBinary(metaSize))
metaFormat.readObjectFrom(input.readBinary(metaSize))
} else {
metaFormat.readObject(input)
error("Can't partially read an envelope with undefined meta size")
}
}
do {
try {
line = input.readSafeUtf8Line()
} catch (ex: EOFException) {
//returning an Envelope without data if end of input is reached
return SimpleEnvelope(meta, null)
}
} while (!line.startsWith(dataStart))
//skip until data start
input.discardWithSeparator(
dataStart.encodeToByteArray(),
atMost = 1024,
skipUntilEndOfLine = true
)
val data: Binary = if (properties.containsKey(DATA_LENGTH_PROPERTY)) {
input.readBinary(properties[DATA_LENGTH_PROPERTY]!!.toInt())
@ -112,24 +119,27 @@ public class TaglessEnvelopeFormat(
// readByteArray(bytes)
// bytes.asBinary()
} else {
Binary {
input.copyTo(this)
}
input.readBytes().asBinary()
}
return SimpleEnvelope(meta, data)
}
override fun readPartial(input: Input): PartialEnvelope {
var offset = 0u
var line: String
do {
line = input.readSafeUtf8Line()// ?: error("Input does not contain tagless envelope header")
offset += line.encodeToByteArray().size.toUInt()
} while (!line.startsWith(TAGLESS_ENVELOPE_HEADER))
var offset = 0
//read preamble
offset += input.discardWithSeparator(
TAGLESS_ENVELOPE_HEADER.encodeToByteArray(),
atMost = 1024,
skipUntilEndOfLine = true
)
val properties = HashMap<String, String>()
line = ""
var line = ""
while (line.isBlank() || line.startsWith("#?")) {
if (line.startsWith("#?")) {
val match = propertyPattern.find(line)
@ -138,10 +148,16 @@ public class TaglessEnvelopeFormat(
properties[key] = value
}
try {
line = input.readSafeUtf8Line()
offset += line.encodeToByteArray().size.toUInt()
line = ByteArray {
val read = try {
input.readBytesWithSeparatorTo(this, byteArrayOf('\n'.code.toByte()), 1024)
} catch (ex: BufferLimitExceededException) {
throw IllegalStateException("Property line exceeds maximum line length (1024)", ex)
}
offset += read
}.decodeToString().trim()
} catch (ex: EOFException) {
return PartialEnvelope(Meta.EMPTY, offset.toUInt(), 0.toULong())
return PartialEnvelope(Meta.EMPTY, offset, 0.toULong())
}
}
@ -151,28 +167,24 @@ public class TaglessEnvelopeFormat(
val metaFormat = properties[META_TYPE_PROPERTY]?.let { io.resolveMetaFormat(it) } ?: JsonMetaFormat
val metaSize = properties[META_LENGTH_PROPERTY]?.toInt()
meta = if (metaSize != null) {
offset += metaSize.toUInt()
metaFormat.readObject(input.readBinary(metaSize))
offset += metaSize
metaFormat.readObjectFrom(input.readBinary(metaSize))
} else {
error("Can't partially read an envelope with undefined meta size")
}
}
do {
line = input.readSafeUtf8Line() //?: return PartialEnvelope(Meta.EMPTY, offset.toUInt(), 0.toULong())
offset += line.encodeToByteArray().size.toUInt()
//returning an Envelope without data if end of input is reached
} while (!line.startsWith(dataStart))
//skip until data start
offset += input.discardWithSeparator(
dataStart.encodeToByteArray(),
atMost = 1024,
skipUntilEndOfLine = true
)
val dataSize = properties[DATA_LENGTH_PROPERTY]?.toULong()
return PartialEnvelope(meta, offset, dataSize)
}
override fun toMeta(): Meta = Meta {
NAME_KEY put name.toString()
META_KEY put meta
}
public companion object : EnvelopeFormatFactory {
private val propertyPattern = "#\\?\\s*([\\w.]*)\\s*:\\s*([^;]*);?".toRegex()
@ -192,7 +204,7 @@ public class TaglessEnvelopeFormat(
public const val code: Int = 0x4446544c //DFTL
override val name: Name = TAGLESS_ENVELOPE_TYPE.asName()
override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + TAGLESS_ENVELOPE_TYPE
override fun build(context: Context, meta: Meta): EnvelopeFormat = TaglessEnvelopeFormat(context.io, meta)

View File

@ -1,8 +1,10 @@
package space.kscience.dataforge.io
import io.ktor.utils.io.bits.Memory
import io.ktor.utils.io.charsets.Charsets
import io.ktor.utils.io.charsets.decodeExactBytes
import io.ktor.utils.io.core.*
import io.ktor.utils.io.core.internal.ChunkBuffer
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import kotlin.math.min
@ -15,7 +17,6 @@ public fun Output.writeUtf8String(str: String) {
writeFully(str.encodeToByteArray())
}
@OptIn(ExperimentalIoApi::class)
public fun Input.readRawString(size: Int): String {
return Charsets.ISO_8859_1.newDecoder().decodeExactBytes(this, size)
}
@ -24,14 +25,11 @@ public fun Input.readUtf8String(): String = readBytes().decodeToString()
public fun Input.readSafeUtf8Line(): String = readUTF8Line() ?: error("Line not found")
public inline fun buildByteArray(expectedSize: Int = 16, block: Output.() -> Unit): ByteArray {
val builder = BytePacketBuilder(expectedSize)
builder.block()
return builder.build().readBytes()
}
public inline fun ByteArray(block: Output.() -> Unit): ByteArray =
buildPacket(block).readBytes()
public inline fun Binary(expectedSize: Int = 16, block: Output.() -> Unit): Binary =
buildByteArray(expectedSize, block).asBinary()
public inline fun Binary(block: Output.() -> Unit): Binary =
ByteArray(block).asBinary()
/**
* View section of a [Binary] as an independent binary
@ -90,11 +88,110 @@ public fun EnvelopeFormat.readBinary(binary: Binary): Envelope {
* A zero-copy read from
*/
@DFExperimental
public fun IOPlugin.readEnvelopeBinary(
public fun IOPlugin.readEnvelope(
binary: Binary,
readNonEnvelopes: Boolean = false,
formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat,
): Envelope = formatPicker(binary)?.readBinary(binary) ?: if (readNonEnvelopes) {
// if no format accepts file, read it as binary
SimpleEnvelope(Meta.EMPTY, binary)
} else error("Can't infer format for $binary")
} else error("Can't infer format for $binary")
@DFExperimental
public fun IOPlugin.readEnvelope(
string: String,
readNonEnvelopes: Boolean = false,
formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat,
): Envelope = readEnvelope(string.encodeToByteArray().asBinary(), readNonEnvelopes, formatPicker)
private class RingByteArray(
private val buffer: ByteArray,
private var startIndex: Int = 0,
var size: Int = 0,
) {
operator fun get(index: Int): Byte {
require(index >= 0) { "Index must be positive" }
require(index < size) { "Index $index is out of circular buffer size $size" }
return buffer[startIndex.forward(index)]
}
fun isFull(): Boolean = size == buffer.size
fun push(element: Byte) {
buffer[startIndex.forward(size)] = element
if (isFull()) startIndex++ else size++
}
private fun Int.forward(n: Int): Int = (this + n) % (buffer.size)
fun compare(inputArray: ByteArray): Boolean = when {
inputArray.size != buffer.size -> false
size < buffer.size -> false
else -> inputArray.indices.all { inputArray[it] == get(it) }
}
}
/**
* Read [Input] into [output] until designated multy-byte [separator] and optionally continues until
* the end of the line after it. Throw error if [separator] not found and [atMost] bytes are read.
* Also fails if [separator] not found until the end of input.
*
* Separator itself is not read into Output.
*
* @return bytes actually being read, including separator
*/
public fun Input.readBytesWithSeparatorTo(
output: Output,
separator: ByteArray,
atMost: Int = Int.MAX_VALUE,
skipUntilEndOfLine: Boolean = false,
): Int {
var counter = 0
val rb = RingByteArray(ByteArray(separator.size))
var separatorFound = false
takeWhile { buffer ->
while (buffer.canRead()) {
val byte = buffer.readByte()
counter++
if (counter >= atMost) error("Maximum number of bytes to be read $atMost reached.")
//If end-of-line-search is on, terminate
if (separatorFound) {
if (endOfInput || byte == '\n'.code.toByte()) {
return counter
}
} else {
rb.push(byte)
if (rb.compare(separator)) {
separatorFound = true
if (!skipUntilEndOfLine) {
return counter
}
} else if (rb.isFull()) {
output.writeByte(rb[0])
}
}
}
!endOfInput
}
error("Read to the end of input without encountering ${separator.decodeToString()}")
}
public fun Input.discardWithSeparator(
separator: ByteArray,
atMost: Int = Int.MAX_VALUE,
skipUntilEndOfLine: Boolean = false,
): Int {
val dummy: Output = object : Output(ChunkBuffer.Pool) {
override fun closeDestination() {
// Do nothing
}
override fun flush(source: Memory, offset: Int, length: Int) {
// Do nothing
}
}
return readBytesWithSeparatorTo(dummy, separator, atMost, skipUntilEndOfLine)
}

View File

@ -1,5 +1,6 @@
package space.kscience.dataforge.io
import io.ktor.utils.io.core.ByteReadPacket
import io.ktor.utils.io.core.readDouble
import io.ktor.utils.io.core.writeDouble
import kotlin.test.Test
@ -9,10 +10,10 @@ import kotlin.test.assertEquals
class EnvelopeFormatTest {
val envelope = Envelope {
type = "test.format"
meta{
meta {
"d" put 22.2
}
data{
data {
writeDouble(22.2)
// repeat(2000){
// writeInt(it)
@ -21,12 +22,12 @@ class EnvelopeFormatTest {
}
@Test
fun testTaggedFormat(){
fun testTaggedFormat() {
TaggedEnvelopeFormat.run {
val byteArray = writeToByteArray(envelope)
//println(byteArray.decodeToString())
val res = readFromByteArray(byteArray)
assertEquals(envelope.meta,res.meta)
assertEquals(envelope.meta, res.meta)
val double = res.data?.read {
readDouble()
}
@ -35,12 +36,14 @@ class EnvelopeFormatTest {
}
@Test
fun testTaglessFormat(){
fun testTaglessFormat() {
TaglessEnvelopeFormat.run {
val byteArray = writeToByteArray(envelope)
//println(byteArray.decodeToString())
val partial = readPartial(ByteReadPacket(byteArray))
assertEquals(8, partial.dataSize?.toInt())
val res = readFromByteArray(byteArray)
assertEquals(envelope.meta,res.meta)
assertEquals(envelope.meta, res.meta)
val double = res.data?.read {
readDouble()
}

View File

@ -2,8 +2,10 @@ package space.kscience.dataforge.io
import io.ktor.utils.io.core.ByteReadPacket
import io.ktor.utils.io.core.readBytes
import io.ktor.utils.io.core.readUTF8Line
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFails
class IOTest {
@Test
@ -14,4 +16,42 @@ class IOTest {
val second = input.readBytes(4)
assertEquals(4.toByte(), second[0])
}
@Test
fun readUntilSeparator() {
val source = """
aaa
bbb
---
ccc
ddd
""".trimIndent()
val binary = source.encodeToByteArray().asBinary()
binary.read {
val array = ByteArray {
val read = readBytesWithSeparatorTo(this, "---".encodeToByteArray(), skipUntilEndOfLine = true)
assertEquals(12, read)
}
assertEquals("""
aaa
bbb
""".trimIndent(),array.decodeToString().trim())
assertEquals("ccc", readUTF8Line()?.trim())
}
assertFails {
binary.read {
discardWithSeparator("---".encodeToByteArray(), atMost = 3)
}
}
assertFails {
binary.read{
discardWithSeparator("-+-".encodeToByteArray())
}
}
}
}

View File

@ -8,8 +8,8 @@ import kotlin.test.Test
import kotlin.test.assertEquals
fun Meta.toByteArray(format: MetaFormat = JsonMetaFormat) = buildByteArray {
format.writeObject(this@buildByteArray, this@toByteArray)
fun Meta.toByteArray(format: MetaFormat = JsonMetaFormat) = ByteArray {
format.writeObject(this@ByteArray, this@toByteArray)
}
fun MetaFormat.fromByteArray(packet: ByteArray): Meta {

View File

@ -32,18 +32,17 @@ class MultipartTest {
@Test
fun testParts() {
TaglessEnvelopeFormat.run {
val singleEnvelopeData = toBinary(envelopes[0])
val singleEnvelopeSize = singleEnvelopeData.size
val bytes = toBinary(partsEnvelope)
assertTrue(envelopes.size * singleEnvelopeSize < bytes.size)
val reconstructed = bytes.readWith(this)
println(reconstructed.meta)
val parts = reconstructed.parts()
val envelope = parts[2].envelope(io)
assertEquals(2, envelope.meta["value"].int)
println(reconstructed.data!!.size)
}
val format = TaglessEnvelopeFormat
val singleEnvelopeData = Binary(envelopes[0], format)
val singleEnvelopeSize = singleEnvelopeData.size
val bytes = Binary(partsEnvelope, format)
assertTrue(envelopes.size * singleEnvelopeSize < bytes.size)
val reconstructed = bytes.readWith(format)
println(reconstructed.meta)
val parts = reconstructed.parts()
val envelope = parts[2].envelope(io)
assertEquals(2, envelope.meta["value"].int)
println(reconstructed.data!!.size)
}
}

View File

@ -4,7 +4,7 @@ import io.ktor.utils.io.core.ByteReadPacket
import io.ktor.utils.io.core.use
fun <T : Any> IOFormat<T>.writeToByteArray(obj: T): ByteArray = buildByteArray {
fun <T : Any> IOFormat<T>.writeToByteArray(obj: T): ByteArray = ByteArray {
writeObject(this, obj)
}
fun <T : Any> IOFormat<T>.readFromByteArray(array: ByteArray): T = ByteReadPacket(array).use {

View File

@ -88,13 +88,45 @@ public fun EnvelopeFormat.readFile(path: Path): Envelope {
*/
@Suppress("UNCHECKED_CAST")
@DFExperimental
public inline fun <reified T : Any> IOPlugin.resolveIOFormat(): IOFormat<T>? {
return ioFormatFactories.find { it.type.isSupertypeOf(typeOf<T>()) } as IOFormat<T>?
public inline fun <reified T : Any> IOPlugin.resolveIOFormat(): IOFormat<T>? =
ioFormatFactories.find { it.type.isSupertypeOf(typeOf<T>()) } as IOFormat<T>?
public val IOPlugin.Companion.META_FILE_NAME: String get() = "@meta"
public val IOPlugin.Companion.DATA_FILE_NAME: String get() = "@data"
/**
* Read file containing meta using given [formatOverride] or file extension to infer meta type.
* If [path] is a directory search for file starting with `meta` in it.
*
* Returns null if meta could not be resolved
*/
public fun IOPlugin.readMetaFileOrNull(
path: Path,
formatOverride: MetaFormat? = null,
descriptor: MetaDescriptor? = null,
): Meta? {
if (!Files.exists(path)) return null
val actualPath: Path = if (Files.isDirectory(path)) {
Files.list(path).asSequence().find { it.fileName.startsWith(IOPlugin.META_FILE_NAME) }
?: return null
} else {
path
}
val extension = actualPath.fileName.toString().substringAfterLast('.')
val metaFormat = formatOverride ?: resolveMetaFormat(extension) ?: return null
return actualPath.read {
metaFormat.readMeta(this, descriptor)
}
}
/**
* Read file containing meta using given [formatOverride] or file extension to infer meta type.
* If [path] is a directory search for file starting with `meta` in it
* If [path] is a directory search for file starting with `meta` in it.
*
* Fails if nothing works.
*/
public fun IOPlugin.readMetaFile(
path: Path,
@ -104,7 +136,7 @@ public fun IOPlugin.readMetaFile(
if (!Files.exists(path)) error("Meta file $path does not exist")
val actualPath: Path = if (Files.isDirectory(path)) {
Files.list(path).asSequence().find { it.fileName.startsWith("meta") }
Files.list(path).asSequence().find { it.fileName.startsWith(IOPlugin.META_FILE_NAME) }
?: error("The directory $path does not contain meta file")
} else {
path
@ -117,6 +149,7 @@ public fun IOPlugin.readMetaFile(
}
}
/**
* Write meta to file using [metaFormat]. If [path] is a directory, write a file with name equals name of [metaFormat].
* Like "meta.json"
@ -148,8 +181,6 @@ public fun IOPlugin.peekFileEnvelopeFormat(path: Path): EnvelopeFormat? {
return peekBinaryEnvelopeFormat(binary)
}
public val IOPlugin.Companion.META_FILE_NAME: String get() = "@meta"
public val IOPlugin.Companion.DATA_FILE_NAME: String get() = "@data"
/**
* Read and envelope from file if the file exists, return null if file does not exist.
@ -195,22 +226,11 @@ public fun IOPlugin.readEnvelopeFile(
return SimpleEnvelope(meta, data)
}
return formatPicker(path)?.let { format ->
format.readFile(path)
} ?: if (readNonEnvelopes) { // if no format accepts file, read it as binary
return formatPicker(path)?.readFile(path) ?: if (readNonEnvelopes) { // if no format accepts file, read it as binary
SimpleEnvelope(Meta.EMPTY, path.asBinary())
} else error("Can't infer format for file $path")
}
/**
* Write a binary into file. Throws an error if file already exists
*/
public fun <T : Any> IOFormat<T>.writeToFile(path: Path, obj: T) {
path.write {
writeObject(this, obj)
}
}
/**
* Write envelope file to given [path] using [envelopeFormat] and optional [metaFormat]
*/

View File

@ -102,12 +102,14 @@ public operator fun Meta.get(token: NameToken): Meta? = items[token]
*
* If [name] is empty return current [Meta]
*/
public operator fun Meta.get(name: Name): Meta? = getMeta(name)
public operator fun Meta.get(name: Name): Meta? = this.getMeta(name)
//TODO allow nullable receivers after Kotlin 1.7
/**
* Parse [Name] from [key] using full name notation and pass it to [Meta.get]
*/
public operator fun Meta.get(key: String): Meta? = this[Name.parse(key)]
public operator fun Meta.get(key: String): Meta? = this.get(Name.parse(key))
/**
* Get all items matching given name. The index of the last element, if present is used as a [Regex],
@ -133,6 +135,7 @@ public fun Meta.getIndexed(name: Name): Map<String?, Meta> {
}
}
public fun Meta.getIndexed(name: String): Map<String?, Meta> = getIndexed(name.parseAsName())
/**
* A meta node that ensures that all of its descendants has at least the same type.

View File

@ -1,6 +1,7 @@
package space.kscience.dataforge.names
import kotlinx.serialization.Serializable
import space.kscience.dataforge.misc.DFExperimental
/**
* A single name token. Body is not allowed to be empty.
@ -25,6 +26,20 @@ public data class NameToken(val body: String, val index: String? = null) {
} else {
body.escape()
}
public companion object {
/**
* Parse name token from a string
*/
@DFExperimental
public fun parse(string: String): NameToken {
val body = string.substringBefore('[')
val index = string.substringAfter('[', "")
if (index.isNotEmpty() && index.endsWith(']')) error("NameToken with index must end with ']'")
return NameToken(body,index.removeSuffix("]"))
}
}
}
/**

View File

@ -27,7 +27,7 @@ public object Builders {
dependenciesFromCurrentContext(wholeClasspath = true)
}
hostConfiguration(defaultJvmScriptingHostConfiguration)
compilerOptions("-jvm-target", Runtime.version().feature().toString())
compilerOptions("-jvm-target", Runtime.version().feature().toString(),"-Xcontext-receivers")
}
val evaluationConfiguration = ScriptEvaluationConfiguration {

View File

@ -1,8 +1,7 @@
package space.kscience.dataforge.workspace
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.map
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.data.forEach
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.names.Name
@ -25,8 +24,8 @@ public interface TaskResult<out T : Any> : DataSet<T> {
*/
public val taskMeta: Meta
override fun flowData(): Flow<TaskData<T>>
override suspend fun getData(name: Name): TaskData<T>?
override fun iterator(): Iterator<TaskData<T>>
override fun get(name: Name): TaskData<T>?
}
private class TaskResultImpl<out T : Any>(
@ -36,11 +35,13 @@ private class TaskResultImpl<out T : Any>(
override val taskMeta: Meta,
) : TaskResult<T>, DataSet<T> by dataSet {
override fun flowData(): Flow<TaskData<T>> = dataSet.flowData().map {
workspace.wrapData(it, it.name, taskName, taskMeta)
override fun iterator(): Iterator<TaskData<T>> = iterator {
dataSet.forEach {
yield(workspace.wrapData(it, it.name, taskName, taskMeta))
}
}
override suspend fun getData(name: Name): TaskData<T>? = dataSet.getData(name)?.let {
override fun get(name: Name): TaskData<T>? = dataSet.get(name)?.let {
workspace.wrapData(it, name, taskName, taskMeta)
}
}

View File

@ -1,7 +1,9 @@
package space.kscience.dataforge.workspace
import space.kscience.dataforge.context.ContextAware
import space.kscience.dataforge.data.Data
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.data.asSequence
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
import space.kscience.dataforge.misc.Type
@ -34,7 +36,7 @@ public interface Workspace : ContextAware, Provider {
return when (target) {
"target", Meta.TYPE -> targets.mapKeys { Name.parse(it.key)}
Task.TYPE -> tasks
//Data.TYPE -> data.flow().toMap()
Data.TYPE -> data.asSequence().associateBy { it.name }
else -> emptyMap()
}
}
@ -46,7 +48,7 @@ public interface Workspace : ContextAware, Provider {
}
public suspend fun produceData(taskName: Name, taskMeta: Meta, name: Name): TaskData<*>? =
produce(taskName, taskMeta).getData(name)
produce(taskName, taskMeta)[name]
public companion object {
public const val TYPE: String = "workspace"

View File

@ -1,13 +1,11 @@
package space.kscience.dataforge.workspace
import kotlinx.serialization.KSerializer
import kotlinx.coroutines.CoroutineScope
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.ContextBuilder
import space.kscience.dataforge.context.Global
import space.kscience.dataforge.data.ActiveDataTree
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.data.DataSetBuilder
import space.kscience.dataforge.data.DataTree
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MetaRepr
import space.kscience.dataforge.meta.MutableMeta
@ -18,6 +16,7 @@ import space.kscience.dataforge.misc.DFBuilder
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.asName
import kotlin.collections.set
import kotlin.properties.PropertyDelegateProvider
import kotlin.properties.ReadOnlyProperty
@ -110,13 +109,13 @@ public class WorkspaceBuilder(private val parentContext: Context = Global) : Tas
/**
* Define intrinsic data for the workspace
*/
public suspend fun buildData(builder: suspend DataSetBuilder<Any>.() -> Unit) {
public fun data(builder: DataSetBuilder<Any>.() -> Unit) {
data = DataTree(builder)
}
@DFExperimental
public suspend fun buildActiveData(builder: suspend ActiveDataTree<Any>.() -> Unit) {
data = ActiveDataTree(builder)
public fun data(scope: CoroutineScope, builder: DataSourceBuilder<Any>.() -> Unit) {
data = DataSource(scope, builder)
}
/**

View File

@ -4,19 +4,23 @@ import space.kscience.dataforge.data.Data
import space.kscience.dataforge.data.await
import space.kscience.dataforge.io.*
import space.kscience.dataforge.misc.DFInternal
import kotlin.reflect.KType
import kotlin.reflect.typeOf
@DFInternal
public fun <T : Any> Envelope.toData(type: KType, format: IOReader<T>): Data<T> = Data(type, meta) {
data?.readWith(format) ?: error("Can't convert envelope without data to Data")
}
/**
* Convert an [Envelope] to a data via given format. The actual parsing is done lazily.
*/
@OptIn(DFInternal::class)
public fun <T : Any> Envelope.toData(format: IOFormat<T>): Data<T> {
return Data(format.type, meta) {
data?.readWith(format) ?: error("Can't convert envelope without data to Data")
}
}
public inline fun <reified T : Any> Envelope.toData(format: IOReader<T>): Data<T> = toData(typeOf<T>(), format)
public suspend fun <T : Any> Data<T>.toEnvelope(format: IOFormat<T>): Envelope {
public suspend fun <T : Any> Data<T>.toEnvelope(format: IOWriter<T>): Envelope {
val obj = await()
val binary = format.toBinary(obj)
val binary = Binary(obj, format)
return SimpleEnvelope(meta, binary)
}

View File

@ -38,7 +38,7 @@ public suspend inline fun <T : Any, reified R : Any> TaskResultBuilder<R>.pipeFr
action(it, data.name, meta)
}
emit(data.name, res)
data(data.name, res)
}
}

View File

@ -1,94 +1,94 @@
package space.kscience.dataforge.workspace
import io.ktor.utils.io.streams.asOutput
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.isActive
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import space.kscience.dataforge.context.error
import space.kscience.dataforge.context.logger
import space.kscience.dataforge.data.*
import space.kscience.dataforge.io.*
import space.kscience.dataforge.meta.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.copy
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.misc.DFExperimental
import java.nio.file.FileSystem
import space.kscience.dataforge.misc.DFInternal
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.NameToken
import space.kscience.dataforge.names.asName
import space.kscience.dataforge.names.plus
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.StandardOpenOption
import java.nio.file.StandardWatchEventKinds
import java.nio.file.WatchEvent
import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.spi.FileSystemProvider
import java.util.zip.ZipEntry
import java.util.zip.ZipOutputStream
import java.time.Instant
import kotlin.io.path.extension
import kotlin.io.path.nameWithoutExtension
import kotlin.io.path.readAttributes
import kotlin.reflect.KType
import kotlin.reflect.typeOf
import kotlin.streams.toList
//public typealias FileFormatResolver<T> = (Path, Meta) -> IOFormat<T>
public interface FileFormatResolver<T : Any> {
public val type: KType
public operator fun invoke(path: Path, meta: Meta): IOFormat<T>
}
public typealias FileFormatResolver<T> = (path: Path, meta: Meta) -> IOReader<T>
public class FileData<T> internal constructor(private val data: Data<T>) : Data<T> by data {
@PublishedApi
internal inline fun <reified T : Any> IOPlugin.formatResolver(): FileFormatResolver<T> =
object : FileFormatResolver<T> {
override val type: KType = typeOf<T>()
public val path: String? get() = meta[META_FILE_PATH_KEY].string
public val extension: String? get() = meta[META_FILE_EXTENSION_KEY].string
@OptIn(DFExperimental::class)
override fun invoke(path: Path, meta: Meta): IOFormat<T> =
resolveIOFormat<T>() ?: error("Can't resolve IO format for ${T::class}")
public val createdTime: Instant? get() = meta[META_FILE_CREATE_TIME_KEY].string?.let { Instant.parse(it) }
public val updatedTime: Instant? get() = meta[META_FILE_UPDATE_TIME_KEY].string?.let { Instant.parse(it) }
public companion object {
public val META_FILE_KEY: Name = "file".asName()
public val META_FILE_PATH_KEY: Name = META_FILE_KEY + "path"
public val META_FILE_EXTENSION_KEY: Name = META_FILE_KEY + "extension"
public val META_FILE_CREATE_TIME_KEY: Name = META_FILE_KEY + "created"
public val META_FILE_UPDATE_TIME_KEY: Name = META_FILE_KEY + "updated"
}
private fun newZFS(path: Path): FileSystem {
val fsProvider = FileSystemProvider.installedProviders().find { it.scheme == "jar" }
?: error("Zip file system provider not found")
return fsProvider.newFileSystem(path, mapOf("create" to "true"))
}
/**
* Read data with supported envelope format and binary format. If envelope format is null, then read binary directly from file.
* The operation is blocking since it must read meta header. The reading of envelope body is lazy
* @param type explicit type of data read
* @param dataFormat binary format
* @param envelopeFormat the format of envelope. If null, file is read directly
* @param metaFile the relative file for optional meta override
* @param metaFileFormat the meta format for override
*/
@OptIn(DFInternal::class)
@DFExperimental
public fun <T : Any> IOPlugin.readDataFile(
path: Path,
formatResolver: FileFormatResolver<T>,
): Data<T> {
): FileData<T> {
val envelope = readEnvelopeFile(path, true)
val format = formatResolver(path, envelope.meta)
return envelope.toData(format)
val updatedMeta = envelope.meta.copy {
FileData.META_FILE_PATH_KEY put path.toString()
FileData.META_FILE_EXTENSION_KEY put path.extension
val attributes = path.readAttributes<BasicFileAttributes>()
FileData.META_FILE_UPDATE_TIME_KEY put attributes.lastModifiedTime().toInstant().toString()
FileData.META_FILE_CREATE_TIME_KEY put attributes.creationTime().toInstant().toString()
}
return FileData(Data(format.type, updatedMeta) {
envelope.data?.readWith(format) ?: error("Can't convert envelope without content to Data")
})
}
@DFExperimental
public inline fun <reified T : Any> IOPlugin.readDataFile(path: Path): Data<T> = readDataFile(path, formatResolver())
/**
* Add file/directory-based data tree item
*/
@DFExperimental
public suspend fun <T : Any> DataSetBuilder<T>.file(
plugin: IOPlugin,
path: Path,
formatResolver: FileFormatResolver<T>,
) {
//If path is a single file or a special directory, read it as single datum
if (!Files.isDirectory(path) || Files.list(path).allMatch { it.fileName.toString().startsWith("@") }) {
plugin.run {
val data = readDataFile(path, formatResolver)
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string
?: path.fileName.toString().replace(".df", "")
emit(name, data)
}
} else {
//otherwise, read as directory
plugin.run {
val data = readDataDirectory(path, formatResolver)
val name = data.getMeta()?.get(Envelope.ENVELOPE_NAME_KEY).string
?: path.fileName.toString().replace(".df", "")
emit(name, data)
context(IOPlugin) @DFExperimental
private fun <T : Any> DataSetBuilder<T>.directory(path: Path, formatResolver: FileFormatResolver<T>) {
Files.list(path).toList().forEach { childPath ->
val fileName = childPath.fileName.toString()
if (fileName.startsWith(IOPlugin.META_FILE_NAME)) {
meta(readMetaFile(childPath))
} else if (!fileName.startsWith("@")) {
file(childPath, formatResolver)
}
}
}
@ -97,34 +97,92 @@ public suspend fun <T : Any> DataSetBuilder<T>.file(
* Read the directory as a data node. If [path] is a zip archive, read it as directory
*/
@DFExperimental
public suspend fun <T : Any> IOPlugin.readDataDirectory(
@DFInternal
public fun <T : Any> IOPlugin.readDataDirectory(
type: KType,
path: Path,
formatResolver: FileFormatResolver<T>,
): DataTree<T> {
//read zipped data node
if (path.fileName != null && path.fileName.toString().endsWith(".zip")) {
//Using explicit Zip file system to avoid bizarre compatibility bugs
val fs = newZFS(path)
return readDataDirectory(fs.rootDirectories.first(), formatResolver)
val fsProvider = FileSystemProvider.installedProviders().find { it.scheme == "jar" }
?: error("Zip file system provider not found")
val fs = fsProvider.newFileSystem(path, mapOf("create" to "true"))
return readDataDirectory(type, fs.rootDirectories.first(), formatResolver)
}
if (!Files.isDirectory(path)) error("Provided path $path is not a directory")
return DataTree(formatResolver.type) {
Files.list(path).toList().forEach { path ->
val fileName = path.fileName.toString()
if (fileName.startsWith(IOPlugin.META_FILE_NAME)) {
meta(readMetaFile(path))
} else if (!fileName.startsWith("@")) {
runBlocking {
file(this@readDataDirectory, path, formatResolver)
return DataTree(type) {
directory(path, formatResolver)
}
}
@OptIn(DFInternal::class)
@DFExperimental
public inline fun <reified T : Any> IOPlugin.readDataDirectory(
path: Path,
noinline formatResolver: FileFormatResolver<T>,
): DataTree<Any> = readDataDirectory(typeOf<T>(), path, formatResolver)
@OptIn(DFExperimental::class)
private fun Path.toName() = Name(map { NameToken.parse(it.nameWithoutExtension) })
@DFInternal
@DFExperimental
public fun <T : Any> IOPlugin.monitorDataDirectory(
type: KType,
path: Path,
formatResolver: FileFormatResolver<T>,
): DataSource<T> {
if (path.fileName.toString().endsWith(".zip")) error("Monitoring not supported for ZipFS")
if (!Files.isDirectory(path)) error("Provided path $path is not a directory")
return DataSource(type, context) {
directory(path, formatResolver)
launch(Dispatchers.IO) {
val watchService = path.fileSystem.newWatchService()
path.register(
watchService,
StandardWatchEventKinds.ENTRY_DELETE,
StandardWatchEventKinds.ENTRY_MODIFY,
StandardWatchEventKinds.ENTRY_CREATE
)
do {
val key = watchService.take()
if (key != null) {
for (event: WatchEvent<*> in key.pollEvents()) {
val eventPath = event.context() as Path
if (event.kind() == StandardWatchEventKinds.ENTRY_DELETE) {
remove(eventPath.toName())
} else {
val fileName = eventPath.fileName.toString()
if (fileName.startsWith(IOPlugin.META_FILE_NAME)) {
meta(readMetaFile(eventPath))
} else if (!fileName.startsWith("@")) {
file(eventPath, formatResolver)
}
}
}
key.reset()
}
}
} while (isActive && key != null)
}
}
}
/**
* Start monitoring given directory ([path]) as a [DataSource].
*/
@OptIn(DFInternal::class)
@DFExperimental
public suspend inline fun <reified T : Any> IOPlugin.readDataDirectory(path: Path): DataTree<T> =
readDataDirectory(path, formatResolver())
public inline fun <reified T : Any> IOPlugin.monitorDataDirectory(
path: Path,
noinline formatResolver: FileFormatResolver<T>,
): DataSource<T> = monitorDataDirectory(typeOf<T>(), path, formatResolver)
/**
* Write data tree to existing directory or create a new one using default [java.nio.file.FileSystem] provider
@ -133,7 +191,7 @@ public suspend inline fun <reified T : Any> IOPlugin.readDataDirectory(path: Pat
public suspend fun <T : Any> IOPlugin.writeDataDirectory(
path: Path,
tree: DataTree<T>,
format: IOFormat<T>,
format: IOWriter<T>,
envelopeFormat: EnvelopeFormat? = null,
metaFormat: MetaFormatFactory? = null,
) {
@ -143,7 +201,7 @@ public suspend fun <T : Any> IOPlugin.writeDataDirectory(
} else if (!Files.isDirectory(path)) {
error("Can't write a node into file")
}
tree.items().forEach { (token, item) ->
tree.items.forEach { (token, item) ->
val childPath = path.resolve(token.toString())
when (item) {
is DataTreeItem.Node -> {
@ -159,70 +217,35 @@ public suspend fun <T : Any> IOPlugin.writeDataDirectory(
}
}
}
val treeMeta = tree.getMeta()
if (treeMeta != null) {
writeMetaFile(path, treeMeta, metaFormat ?: JsonMetaFormat)
}
val treeMeta = tree.meta
writeMetaFile(path, treeMeta, metaFormat ?: JsonMetaFormat)
}
}
@Suppress("BlockingMethodInNonBlockingContext")
private suspend fun <T : Any> ZipOutputStream.writeNode(
name: String,
treeItem: DataTreeItem<T>,
dataFormat: IOFormat<T>,
envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat,
) {
withContext(Dispatchers.IO) {
when (treeItem) {
is DataTreeItem.Leaf -> {
//TODO add directory-based envelope writer
val envelope = treeItem.data.toEnvelope(dataFormat)
val entry = ZipEntry(name)
putNextEntry(entry)
envelopeFormat.run {
asOutput().run {
writeEnvelope(this, envelope)
flush()
}
}
}
is DataTreeItem.Node -> {
val entry = ZipEntry("$name/")
putNextEntry(entry)
closeEntry()
treeItem.tree.items().forEach { (token, item) ->
val childName = "$name/$token"
writeNode(childName, item, dataFormat, envelopeFormat)
}
}
}
}
}
@Suppress("BlockingMethodInNonBlockingContext")
/**
* Add file/directory-based data tree item
*/
context(IOPlugin) @OptIn(DFInternal::class)
@DFExperimental
public suspend fun <T : Any> IOPlugin.writeZip(
public fun <T : Any> DataSetBuilder<T>.file(
path: Path,
tree: DataTree<T>,
format: IOFormat<T>,
envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat,
formatResolver: FileFormatResolver<out T>,
) {
withContext(Dispatchers.IO) {
val actualFile = if (path.toString().endsWith(".zip")) {
path
try {
//If path is a single file or a special directory, read it as single datum
if (!Files.isDirectory(path) || Files.list(path).allMatch { it.fileName.toString().startsWith("@") }) {
val data = readDataFile(path, formatResolver)
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension
data(name, data)
} else {
path.resolveSibling(path.fileName.toString() + ".zip")
}
val fos = Files.newOutputStream(actualFile,
StandardOpenOption.WRITE,
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING)
val zos = ZipOutputStream(fos)
zos.use {
it.writeNode("", DataTreeItem.Node(tree), format, envelopeFormat)
//otherwise, read as directory
val data = readDataDirectory(dataType, path, formatResolver)
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension
node(name, data)
}
} catch (ex: Exception) {
logger.error { "Failed to read file or directory at $path: ${ex.message}" }
}
}

View File

@ -1,21 +1,26 @@
package space.kscience.dataforge.workspace
import kotlinx.coroutines.runBlocking
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.data.DataSetBuilder
import space.kscience.dataforge.data.select
import space.kscience.dataforge.data.filterByType
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.matches
public fun WorkspaceBuilder.data(builder: suspend DataSetBuilder<Any>.() -> Unit): Unit = runBlocking {
buildData(builder)
}
//public fun WorkspaceBuilder.data(builder: DataSetBuilder<Any>.() -> Unit): Unit = runBlocking {
// data(builder)
//}
public inline fun <reified T: Any> TaskResultBuilder<*>.data(namePattern: Name? = null): DataSelector<T> = object : DataSelector<T> {
override suspend fun select(workspace: Workspace, meta: Meta): DataSet<T> = workspace.data.select(namePattern)
}
@OptIn(DFExperimental::class)
public inline fun <reified T : Any> TaskResultBuilder<*>.data(namePattern: Name? = null): DataSelector<T> =
object : DataSelector<T> {
override suspend fun select(workspace: Workspace, meta: Meta): DataSet<T> =
workspace.data.filterByType { name, _ ->
namePattern == null || name.matches(namePattern)
}
}
public suspend inline fun <reified T : Any> TaskResultBuilder<*>.fromTask(
task: Name,
taskMeta: Meta = Meta.EMPTY,
): DataSet<T> = workspace.produce(task, taskMeta).select()
): DataSet<T> = workspace.produce(task, taskMeta).filterByType()

View File

@ -0,0 +1,72 @@
package space.kscience.dataforge.workspace
import io.ktor.utils.io.streams.asOutput
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import space.kscience.dataforge.data.DataTree
import space.kscience.dataforge.data.DataTreeItem
import space.kscience.dataforge.io.EnvelopeFormat
import space.kscience.dataforge.io.IOFormat
import space.kscience.dataforge.io.TaggedEnvelopeFormat
import space.kscience.dataforge.misc.DFExperimental
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.StandardOpenOption
import java.util.zip.ZipEntry
import java.util.zip.ZipOutputStream
private suspend fun <T : Any> ZipOutputStream.writeNode(
name: String,
treeItem: DataTreeItem<T>,
dataFormat: IOFormat<T>,
envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat,
): Unit = withContext(Dispatchers.IO) {
when (treeItem) {
is DataTreeItem.Leaf -> {
//TODO add directory-based envelope writer
val envelope = treeItem.data.toEnvelope(dataFormat)
val entry = ZipEntry(name)
putNextEntry(entry)
asOutput().run {
envelopeFormat.writeEnvelope(this, envelope)
flush()
}
}
is DataTreeItem.Node -> {
val entry = ZipEntry("$name/")
putNextEntry(entry)
closeEntry()
treeItem.tree.items.forEach { (token, item) ->
val childName = "$name/$token"
writeNode(childName, item, dataFormat, envelopeFormat)
}
}
}
}
/**
* Write this [DataTree] as a zip archive
*/
@DFExperimental
public suspend fun <T : Any> DataTree<T>.writeZip(
path: Path,
format: IOFormat<T>,
envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat,
): Unit = withContext(Dispatchers.IO) {
val actualFile = if (path.toString().endsWith(".zip")) {
path
} else {
path.resolveSibling(path.fileName.toString() + ".zip")
}
val fos = Files.newOutputStream(
actualFile,
StandardOpenOption.WRITE,
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING
)
val zos = ZipOutputStream(fos)
zos.use {
it.writeNode("", DataTreeItem.Node(this@writeZip), format, envelopeFormat)
}
}

View File

@ -1,7 +1,9 @@
@file:OptIn(ExperimentalCoroutinesApi::class)
package space.kscience.dataforge.workspace
import kotlinx.coroutines.flow.single
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.test.runTest
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.PluginFactory
import space.kscience.dataforge.context.PluginTag
@ -15,17 +17,17 @@ class DataPropagationTestPlugin : WorkspacePlugin() {
override val tag: PluginTag = Companion.tag
val allData by task<Int> {
val selectedData = workspace.data.select<Int>()
val result: Data<Int> = selectedData.flowData().foldToData(0) { result, data ->
result + data.await()
val selectedData = workspace.data.filterByType<Int>()
val result: Data<Int> = selectedData.foldToData(0) { result, data ->
result + data.value
}
emit("result", result)
data("result", result)
}
val singleData by task<Int> {
workspace.data.select<Int>().getData("myData[12]")?.let {
emit("result", it)
workspace.data.filterByType<Int>()["myData[12]"]?.let {
data("result", it)
}
}
@ -45,28 +47,22 @@ class DataPropagationTest {
context {
plugin(DataPropagationTestPlugin)
}
runBlocking {
data {
repeat(100) {
static("myData[$it]", it)
}
data {
repeat(100) {
static("myData[$it]", it)
}
}
}
@Test
fun testAllData() {
runBlocking {
val node = testWorkspace.produce("Test.allData")
assertEquals(4950, node.flowData().single().await())
}
fun testAllData() = runTest {
val node = testWorkspace.produce("Test.allData")
assertEquals(4950, node.asSequence().single().await())
}
@Test
fun testSingleData() {
runBlocking {
val node = testWorkspace.produce("Test.singleData")
assertEquals(12, node.flowData().single().await())
}
fun testSingleData() = runTest {
val node = testWorkspace.produce("Test.singleData")
assertEquals(12, node.asSequence().single().await())
}
}

View File

@ -9,10 +9,9 @@ import space.kscience.dataforge.io.IOFormat
import space.kscience.dataforge.io.io
import space.kscience.dataforge.io.readUtf8String
import space.kscience.dataforge.io.writeUtf8String
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.misc.DFExperimental
import java.nio.file.Files
import java.nio.file.Path
import kotlin.reflect.KType
import kotlin.reflect.typeOf
import kotlin.test.Test
@ -20,43 +19,27 @@ import kotlin.test.assertEquals
class FileDataTest {
val dataNode = runBlocking {
DataTree<String> {
emit("dir") {
static("a", "Some string") {
"content" put "Some string"
}
}
static("b", "root data")
meta {
"content" put "This is root meta node"
val dataNode = DataTree<String> {
node("dir") {
static("a", "Some string") {
"content" put "Some string"
}
}
static("b", "root data")
meta {
"content" put "This is root meta node"
}
}
object StringIOFormat : IOFormat<String> {
override val type: KType = typeOf<String>()
object StringIOFormat : IOFormat<String> {
override val type: KType get() = typeOf<String>()
override fun writeObject(output: Output, obj: String) {
output.writeUtf8String(obj)
}
override fun readObject(input: Input): String {
return input.readUtf8String()
}
override fun toMeta(): Meta = Meta {
IOFormat.NAME_KEY put "string"
}
}
object StringFormatResolver : FileFormatResolver<String> {
override val type: KType = typeOf<String>()
override fun invoke(path: Path, meta: Meta): IOFormat<String> = StringIOFormat
override fun readObject(input: Input): String = input.readUtf8String()
}
@Test
@ -67,9 +50,9 @@ class FileDataTest {
runBlocking {
writeDataDirectory(dir, dataNode, StringIOFormat)
println(dir.toUri().toString())
val reconstructed = readDataDirectory(dir, StringFormatResolver)
assertEquals(dataNode.getData("dir.a")?.meta, reconstructed.getData("dir.a")?.meta)
assertEquals(dataNode.getData("b")?.await(), reconstructed.getData("b")?.await())
val reconstructed = readDataDirectory(dir) { _, _ -> StringIOFormat }
assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content"))
assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await())
}
}
}
@ -81,11 +64,11 @@ class FileDataTest {
Global.io.run {
val zip = Files.createTempFile("df_data_node", ".zip")
runBlocking {
writeZip(zip, dataNode, StringIOFormat)
dataNode.writeZip(zip, StringIOFormat)
println(zip.toUri().toString())
val reconstructed = readDataDirectory(zip, StringFormatResolver)
assertEquals(dataNode.getData("dir.a")?.meta, reconstructed.getData("dir.a")?.meta)
assertEquals(dataNode.getData("b")?.await(), reconstructed.getData("b")?.await())
val reconstructed = readDataDirectory(zip) { _, _ -> StringIOFormat }
assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content"))
assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await())
}
}
}

View File

@ -1,10 +1,11 @@
@file:Suppress("UNUSED_VARIABLE")
@file:OptIn(ExperimentalCoroutinesApi::class)
package space.kscience.dataforge.workspace
import kotlinx.coroutines.flow.first
import kotlinx.coroutines.flow.single
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.test.runTest
import org.junit.jupiter.api.Timeout
import space.kscience.dataforge.context.*
import space.kscience.dataforge.data.*
@ -28,7 +29,7 @@ public inline fun <reified P : Plugin> P.toFactory(): PluginFactory<P> = object
override val type: KClass<out P> = P::class
}
public fun Workspace.runBlocking(task: String, block: MutableMeta.() -> Unit = {}): DataSet<Any> = runBlocking {
public fun Workspace.produceBlocking(task: String, block: MutableMeta.() -> Unit = {}): DataSet<Any> = runBlocking {
produce(task, block)
}
@ -39,7 +40,7 @@ class SimpleWorkspaceTest {
override val tag: PluginTag = PluginTag("test")
val test by task<Any> {
populate(
populateFrom(
workspace.data.map {
it.also {
logger.info { "Test: $it" }
@ -65,8 +66,8 @@ class SimpleWorkspaceTest {
}
val filterOne by task<Int> {
workspace.data.selectOne<Int>("myData[12]")?.let { source ->
emit(source.name, source.map { it })
workspace.data.getByType<Int>("myData[12]")?.let { source ->
data(source.name, source.map { it })
}
}
@ -103,54 +104,53 @@ class SimpleWorkspaceTest {
val squareData = from(square)
val linearData = from(linear)
squareData.forEach { data ->
val newData: Data<Int> = data.combine(linearData.getData(data.name)!!) { l, r ->
val newData: Data<Int> = data.combine(linearData.get(data.name)!!) { l, r ->
l + r
}
emit(data.name, newData)
data(data.name, newData)
}
}
val sum by task<Int> {
workspace.logger.info { "Starting sum" }
val res = from(square).foldToData(0) { l, r ->
l + r.await()
l + r.value
}
emit("sum", res)
data("sum", res)
}
val averageByGroup by task<Int> {
val evenSum = workspace.data.filter { name, _ ->
val evenSum = workspace.data.filterByType<Int> { name, _ ->
name.toString().toInt() % 2 == 0
}.select<Int>().foldToData(0) { l, r ->
l + r.await()
}.foldToData(0) { l, r ->
l + r.value
}
emit("even", evenSum)
val oddSum = workspace.data.filter { name, _ ->
data("even", evenSum)
val oddSum = workspace.data.filterByType<Int> { name, _ ->
name.toString().toInt() % 2 == 1
}.select<Int>().foldToData(0) { l, r ->
l + r.await()
}.foldToData(0) { l, r ->
l + r.value
}
emit("odd", oddSum)
data("odd", oddSum)
}
val delta by task<Int> {
val averaged = from(averageByGroup)
val even = averaged.getData("event")!!
val odd = averaged.getData("odd")!!
val even = averaged["event"]!!
val odd = averaged["odd"]!!
val res = even.combine(odd) { l, r ->
l - r
}
emit("res", res)
data("res", res)
}
val customPipe by task<Int> {
workspace.data.select<Int>().forEach { data ->
workspace.data.filterByType<Int>().forEach { data ->
val meta = data.meta.toMutableMeta().apply {
"newValue" put 22
}
emit(data.name + "new", data.map { (data.meta["value"].int ?: 0) + it })
data(data.name + "new", data.map { (data.meta["value"].int ?: 0) + it })
}
}
@ -159,21 +159,17 @@ class SimpleWorkspaceTest {
@Test
@Timeout(1)
fun testWorkspace() {
runBlocking {
val node = workspace.runBlocking("sum")
val res = node.flowData().single()
assertEquals(328350, res.await())
}
fun testWorkspace() = runTest {
val node = workspace.produce("sum")
val res = node.asSequence().single()
assertEquals(328350, res.await())
}
@Test
@Timeout(1)
fun testMetaPropagation() {
runBlocking {
val node = workspace.produce("sum") { "testFlag" put true }
val res = node.flowData().single().await()
}
fun testMetaPropagation() = runTest {
val node = workspace.produce("sum") { "testFlag" put true }
val res = node.asSequence().single().await()
}
@Test
@ -195,7 +191,7 @@ class SimpleWorkspaceTest {
fun testFilter() {
runBlocking {
val node = workspace.produce("filterOne")
assertEquals(12, node.flowData().first().await())
assertEquals(12, node.asSequence().first().await())
}
}
}

View File

@ -1,13 +1,8 @@
org.gradle.jvmargs=-XX:MaxMetaspaceSize=1G
org.gradle.parallel=true
org.gradle.jvmargs=-Xmx4096m
kotlin.code.style=official
#kotlin.mpp.enableGranularSourceSetsMetadata=true
#kotlin.native.enableDependencyPropagation=false
kotlin.mpp.stability.nowarn=true
#kotlin.incremental.js.ir=true
publishing.github=false
publishing.sonatype=false
toolsVersion=0.11.1-kotlin-1.6.10
toolsVersion=0.11.5-kotlin-1.6.21

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists