diff --git a/CHANGELOG.md b/CHANGELOG.md index 48da46a3..32be6572 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - More fine-grained types in Action builders. ### Changed +- `Name::replaceLast` API - `PluginFactory` no longer requires plugin class - Collection toMap -> associateByName - Simplified `DFTL` envelope format. Closing symbols are unnecessary. Properties are discontinued. @@ -36,6 +37,8 @@ ### Removed ### Fixed +- `readDataDirectory` does not split names with dots +- Front matter reader does not crash on non-UTF files - Meta file name in readMeta from directory - Tagless and FrontMatter envelope partial readers fix. diff --git a/build.gradle.kts b/build.gradle.kts index 8df17860..01cb83df 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -9,7 +9,7 @@ plugins { allprojects { group = "space.kscience" - version = "0.6.1-dev-5" + version = "0.6.1-dev-6" } subprojects { diff --git a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataSet.kt b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataSet.kt index 72e4eb38..36428cd4 100644 --- a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataSet.kt +++ b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataSet.kt @@ -107,7 +107,7 @@ public fun DataSet.startAll(coroutineScope: CoroutineScope): Job = }.joinAll() } -public suspend fun DataSet.join(): Unit = coroutineScope { startAll(this).join() } +public suspend fun DataSet.computeAndJoinAll(): Unit = coroutineScope { startAll(this).join() } public fun DataSet<*>.toMeta(): Meta = Meta { forEach { diff --git a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataTree.kt b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataTree.kt index 1cf8971c..b70744ce 100644 --- a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataTree.kt +++ b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataTree.kt @@ -1,6 +1,7 @@ package space.kscience.dataforge.data import space.kscience.dataforge.meta.Meta +import space.kscience.dataforge.misc.DFInternal import space.kscience.dataforge.misc.Type import space.kscience.dataforge.names.* import kotlin.collections.component1 @@ -65,11 +66,16 @@ public interface DataTree : DataSet { */ public val META_ITEM_NAME_TOKEN: NameToken = NameToken("@meta") - public inline fun empty(meta: Meta = Meta.EMPTY): DataTree = object : DataTree { + @DFInternal + public fun emptyWithType(type: KType, meta: Meta = Meta.EMPTY): DataTree = object : DataTree { override val items: Map> get() = emptyMap() - override val dataType: KType get() = typeOf() + override val dataType: KType get() = type override val meta: Meta get() = meta } + + @OptIn(DFInternal::class) + public inline fun empty(meta: Meta = Meta.EMPTY): DataTree = + emptyWithType(typeOf(), meta) } } @@ -106,12 +112,8 @@ public fun DataTree.traverseItems(): Sequence DataTree.branch(branchName: Name): DataTree = object : DataTree { - override val dataType: KType get() = this@branch.dataType +@OptIn(DFInternal::class) +public fun DataTree.branch(branchName: Name): DataTree = + getItem(branchName)?.tree ?: DataTree.emptyWithType(dataType) - override val meta: Meta - get() = getItem(branchName)?.meta ?: Meta.EMPTY - - override val items: Map> - get() = getItem(branchName).tree?.items ?: emptyMap() -} +public fun DataTree.branch(branchName: String): DataTree = branch(branchName.parseAsName()) diff --git a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataTreeBuilder.kt b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataTreeBuilder.kt index b23d594b..303ba44e 100644 --- a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataTreeBuilder.kt +++ b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/DataTreeBuilder.kt @@ -124,4 +124,4 @@ public inline fun DataSourceBuilder.emit( name: String, parent: CoroutineScope, noinline block: DataSourceBuilder.() -> Unit, -): Unit = node(Name.parse(name), DataSource(parent, block)) +): Unit = node(Name.parse(name), DataSource(parent, block)) \ No newline at end of file diff --git a/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt b/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt index c1366145..046f08fa 100644 --- a/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt +++ b/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt @@ -68,8 +68,9 @@ public class FrontMatterEnvelopeFormat( } override fun peekFormat(io: IOPlugin, binary: Binary): EnvelopeFormat? = binary.read { - val line = readSafeUtf8Line() - return@read if (line.startsWith("---")) { + //read raw string to avoid UTF issues + val line = readRawString(3) + return@read if (line == "---") { default } else { null diff --git a/dataforge-meta/src/commonMain/kotlin/space/kscience/dataforge/names/Name.kt b/dataforge-meta/src/commonMain/kotlin/space/kscience/dataforge/names/Name.kt index c24cf58d..6896437f 100644 --- a/dataforge-meta/src/commonMain/kotlin/space/kscience/dataforge/names/Name.kt +++ b/dataforge-meta/src/commonMain/kotlin/space/kscience/dataforge/names/Name.kt @@ -144,6 +144,15 @@ public fun Name.firstOrNull(): NameToken? = tokens.firstOrNull() */ public fun Name.first(): NameToken = tokens.first() +/** + * Return a [Name] with its last token replaced via [replacement] rule. + * If initial [Name] is empty, return empty name. + */ +public fun Name.replaceLast(replacement: (NameToken) -> NameToken): Name { + if (isEmpty()) return Name.EMPTY + return cutLast() + replacement(lastOrNull()!!) +} + /** * Convert the [String] to a [Name] by simply wrapping it in a single name token without parsing. @@ -227,7 +236,6 @@ public fun Name.removeFirstOrNull(first: Name): Name? = if (startsWith(first)) { null } - @ThreadLocal private val nameCache = HashMap() diff --git a/dataforge-workspace/build.gradle.kts b/dataforge-workspace/build.gradle.kts index 3e20f40f..28a633c8 100644 --- a/dataforge-workspace/build.gradle.kts +++ b/dataforge-workspace/build.gradle.kts @@ -17,6 +17,7 @@ kscience{ } dependencies(jvmTest){ implementation(spclibs.logback.classic) + implementation(projects.dataforgeIo.dataforgeIoYaml) } } diff --git a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt index 683dd1ea..d9f678b3 100644 --- a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt +++ b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt @@ -19,6 +19,7 @@ import space.kscience.dataforge.names.Name import space.kscience.dataforge.names.NameToken import space.kscience.dataforge.names.asName import space.kscience.dataforge.names.plus +import space.kscience.dataforge.workspace.FileData.Companion.DEFAULT_IGNORE_EXTENSIONS import java.nio.file.Files import java.nio.file.Path import java.nio.file.StandardWatchEventKinds @@ -27,6 +28,7 @@ import java.nio.file.attribute.BasicFileAttributes import java.nio.file.spi.FileSystemProvider import java.time.Instant import kotlin.io.path.extension +import kotlin.io.path.name import kotlin.io.path.nameWithoutExtension import kotlin.io.path.readAttributes import kotlin.reflect.KType @@ -54,6 +56,8 @@ public class FileData internal constructor(private val data: Data, public public val FILE_EXTENSION_KEY: Name = FILE_KEY + "extension" public val FILE_CREATE_TIME_KEY: Name = FILE_KEY + "created" public val FILE_UPDATE_TIME_KEY: Name = FILE_KEY + "updated" + public const val DF_FILE_EXTENSION: String = "df" + public val DEFAULT_IGNORE_EXTENSIONS: Set = setOf(DF_FILE_EXTENSION) } } @@ -88,13 +92,17 @@ public fun IOPlugin.readDataFile( context(IOPlugin) @DFExperimental -private fun DataSetBuilder.directory(path: Path, formatResolver: FileFormatResolver) { +private fun DataSetBuilder.directory( + path: Path, + ignoreExtensions: Set, + formatResolver: FileFormatResolver, +) { Files.list(path).forEach { childPath -> val fileName = childPath.fileName.toString() if (fileName.startsWith(IOPlugin.META_FILE_NAME)) { meta(readMetaFile(childPath)) } else if (!fileName.startsWith("@")) { - file(childPath, formatResolver) + file(childPath, ignoreExtensions, formatResolver) } } } @@ -107,6 +115,7 @@ private fun DataSetBuilder.directory(path: Path, formatResolver: Fi public fun IOPlugin.readDataDirectory( type: KType, path: Path, + ignoreExtensions: Set = DEFAULT_IGNORE_EXTENSIONS, formatResolver: FileFormatResolver, ): DataTree { //read zipped data node @@ -116,14 +125,14 @@ public fun IOPlugin.readDataDirectory( ?: error("Zip file system provider not found") val fs = fsProvider.newFileSystem(path, mapOf("create" to "true")) - return readDataDirectory(type, fs.rootDirectories.first(), formatResolver) + return readDataDirectory(type, fs.rootDirectories.first(), ignoreExtensions, formatResolver) } if (!Files.isDirectory(path)) error("Provided path $path is not a directory") return DataTree(type) { meta { - FileData.FILE_PATH_KEY put path.toString() + FileData.FILE_PATH_KEY put path.toString() } - directory(path, formatResolver) + directory(path, ignoreExtensions, formatResolver) } } @@ -131,8 +140,9 @@ public fun IOPlugin.readDataDirectory( @DFExperimental public inline fun IOPlugin.readDataDirectory( path: Path, + ignoreExtensions: Set = DEFAULT_IGNORE_EXTENSIONS, noinline formatResolver: FileFormatResolver, -): DataTree = readDataDirectory(typeOf(), path, formatResolver) +): DataTree = readDataDirectory(typeOf(), path, ignoreExtensions, formatResolver) /** * Read raw binary data tree from the directory. All files are read as-is (save for meta files). @@ -140,7 +150,8 @@ public inline fun IOPlugin.readDataDirectory( @DFExperimental public fun IOPlugin.readRawDirectory( path: Path, -): DataTree = readDataDirectory(path) { _, _ -> IOReader.binary } + ignoreExtensions: Set = emptySet(), +): DataTree = readDataDirectory(path, ignoreExtensions) { _, _ -> IOReader.binary } private fun Path.toName() = Name(map { NameToken.parse(it.nameWithoutExtension) }) @@ -150,12 +161,13 @@ private fun Path.toName() = Name(map { NameToken.parse(it.nameWithoutExtension) public fun IOPlugin.monitorDataDirectory( type: KType, path: Path, + ignoreExtensions: Set = DEFAULT_IGNORE_EXTENSIONS, formatResolver: FileFormatResolver, ): DataSource { if (path.fileName.toString().endsWith(".zip")) error("Monitoring not supported for ZipFS") if (!Files.isDirectory(path)) error("Provided path $path is not a directory") return DataSource(type, context) { - directory(path, formatResolver) + directory(path, ignoreExtensions, formatResolver) launch(Dispatchers.IO) { val watchService = path.fileSystem.newWatchService() @@ -178,7 +190,7 @@ public fun IOPlugin.monitorDataDirectory( if (fileName.startsWith(IOPlugin.META_FILE_NAME)) { meta(readMetaFile(eventPath)) } else if (!fileName.startsWith("@")) { - file(eventPath, formatResolver) + file(eventPath, ignoreExtensions, formatResolver) } } } @@ -197,8 +209,9 @@ public fun IOPlugin.monitorDataDirectory( @DFExperimental public inline fun IOPlugin.monitorDataDirectory( path: Path, + ignoreExtensions: Set = DEFAULT_IGNORE_EXTENSIONS, noinline formatResolver: FileFormatResolver, -): DataSource = monitorDataDirectory(typeOf(), path, formatResolver) +): DataSource = monitorDataDirectory(typeOf(), path, ignoreExtensions, formatResolver) /** * Read and monitor raw binary data tree from the directory. All files are read as-is (save for meta files). @@ -206,7 +219,8 @@ public inline fun IOPlugin.monitorDataDirectory( @DFExperimental public fun IOPlugin.monitorRawDirectory( path: Path, -): DataSource = monitorDataDirectory(path) { _, _ -> IOReader.binary } + ignoreExtensions: Set = DEFAULT_IGNORE_EXTENSIONS, +): DataSource = monitorDataDirectory(path, ignoreExtensions) { _, _ -> IOReader.binary } /** * Write data tree to existing directory or create a new one using default [java.nio.file.FileSystem] provider @@ -248,14 +262,20 @@ public suspend fun IOPlugin.writeDataDirectory( /** * Add file/directory-based data tree item + * + * @param ignoreExtensions a list of file extensions for which extension should be cut from the resulting item name */ context(IOPlugin) @OptIn(DFInternal::class) @DFExperimental public fun DataSetBuilder.file( path: Path, + ignoreExtensions: Set = DEFAULT_IGNORE_EXTENSIONS, formatResolver: FileFormatResolver, ) { + + fun defaultPath() = if (path.extension in ignoreExtensions) path.nameWithoutExtension else path.name + try { //If path is a single file or a special directory, read it as single datum if (!Files.isDirectory(path) || Files.list(path).allMatch { it.fileName.toString().startsWith("@") }) { @@ -264,13 +284,13 @@ public fun DataSetBuilder.file( logger.warn { "File format is not resolved for $path. Skipping." } return } - val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension - data(name, data) + val name: String = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: defaultPath() + data(name.asName(), data) } else { //otherwise, read as directory - val data: DataTree = readDataDirectory(dataType, path, formatResolver) - val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension - node(name, data) + val data: DataTree = readDataDirectory(dataType, path, ignoreExtensions, formatResolver) + val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: defaultPath() + node(name.asName(), data) } } catch (ex: Exception) { logger.error { "Failed to read file or directory at $path: ${ex.message}" } diff --git a/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/FileDataTest.kt b/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/FileDataTest.kt index 6bfde195..6dfb8fb6 100644 --- a/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/FileDataTest.kt +++ b/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/FileDataTest.kt @@ -3,15 +3,16 @@ package space.kscience.dataforge.workspace import io.ktor.utils.io.core.Input import io.ktor.utils.io.core.Output import kotlinx.coroutines.runBlocking +import space.kscience.dataforge.context.Context import space.kscience.dataforge.context.Global import space.kscience.dataforge.data.* -import space.kscience.dataforge.io.IOFormat -import space.kscience.dataforge.io.io -import space.kscience.dataforge.io.readUtf8String -import space.kscience.dataforge.io.writeUtf8String +import space.kscience.dataforge.io.* +import space.kscience.dataforge.io.yaml.YamlPlugin import space.kscience.dataforge.meta.get import space.kscience.dataforge.misc.DFExperimental import java.nio.file.Files +import kotlin.io.path.fileSize +import kotlin.io.path.toPath import kotlin.reflect.KType import kotlin.reflect.typeOf import kotlin.test.Test @@ -44,32 +45,38 @@ class FileDataTest { @Test @DFExperimental - fun testDataWriteRead() { - Global.io.run { - val dir = Files.createTempDirectory("df_data_node") - runBlocking { - writeDataDirectory(dir, dataNode, StringIOFormat) - println(dir.toUri().toString()) - val reconstructed = readDataDirectory(dir) { _, _ -> StringIOFormat } - assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content")) - assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await()) - } + fun testDataWriteRead() = with(Global.io) { + val dir = Files.createTempDirectory("df_data_node") + runBlocking { + writeDataDirectory(dir, dataNode, StringIOFormat) + println(dir.toUri().toString()) + val reconstructed = readDataDirectory(dir) { _, _ -> StringIOFormat } + assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content")) + assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await()) } } @Test @DFExperimental - fun testZipWriteRead() { - Global.io.run { - val zip = Files.createTempFile("df_data_node", ".zip") - runBlocking { - dataNode.writeZip(zip, StringIOFormat) - println(zip.toUri().toString()) - val reconstructed = readDataDirectory(zip) { _, _ -> StringIOFormat } - assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content")) - assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await()) - } + fun testZipWriteRead() = with(Global.io) { + val zip = Files.createTempFile("df_data_node", ".zip") + runBlocking { + dataNode.writeZip(zip, StringIOFormat) + println(zip.toUri().toString()) + val reconstructed = readDataDirectory(zip) { _, _ -> StringIOFormat } + assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content")) + assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await()) } } + + @Test + fun testNonEnvelope() { + val context = Context { + plugin(YamlPlugin) + } + val resource = javaClass.classLoader.getResource("SPC.png")!! + val data: Envelope = context.io.readEnvelopeFile(resource.toURI().toPath(), true) + assertEquals(resource.toURI().toPath().fileSize(), data.data?.size?.toLong()) + } } \ No newline at end of file diff --git a/dataforge-workspace/src/jvmTest/resources/SPC.png b/dataforge-workspace/src/jvmTest/resources/SPC.png new file mode 100644 index 00000000..b8164b68 Binary files /dev/null and b/dataforge-workspace/src/jvmTest/resources/SPC.png differ