Fixes in Envelope format and data tre

This commit is contained in:
Alexander Nozik 2023-03-27 09:45:51 +03:00
parent 29fa30fb51
commit 2c2f33427a
11 changed files with 98 additions and 56 deletions

View File

@ -11,6 +11,7 @@
- More fine-grained types in Action builders.
### Changed
- `Name::replaceLast` API
- `PluginFactory` no longer requires plugin class
- Collection<Named> toMap -> associateByName
- Simplified `DFTL` envelope format. Closing symbols are unnecessary. Properties are discontinued.
@ -36,6 +37,8 @@
### Removed
### Fixed
- `readDataDirectory` does not split names with dots
- Front matter reader does not crash on non-UTF files
- Meta file name in readMeta from directory
- Tagless and FrontMatter envelope partial readers fix.

View File

@ -9,7 +9,7 @@ plugins {
allprojects {
group = "space.kscience"
version = "0.6.1-dev-5"
version = "0.6.1-dev-6"
}
subprojects {

View File

@ -107,7 +107,7 @@ public fun <T : Any> DataSet<T>.startAll(coroutineScope: CoroutineScope): Job =
}.joinAll()
}
public suspend fun <T : Any> DataSet<T>.join(): Unit = coroutineScope { startAll(this).join() }
public suspend fun <T : Any> DataSet<T>.computeAndJoinAll(): Unit = coroutineScope { startAll(this).join() }
public fun DataSet<*>.toMeta(): Meta = Meta {
forEach {

View File

@ -1,6 +1,7 @@
package space.kscience.dataforge.data
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFInternal
import space.kscience.dataforge.misc.Type
import space.kscience.dataforge.names.*
import kotlin.collections.component1
@ -65,11 +66,16 @@ public interface DataTree<out T : Any> : DataSet<T> {
*/
public val META_ITEM_NAME_TOKEN: NameToken = NameToken("@meta")
public inline fun <reified T : Any> empty(meta: Meta = Meta.EMPTY): DataTree<T> = object : DataTree<T> {
@DFInternal
public fun <T : Any> emptyWithType(type: KType, meta: Meta = Meta.EMPTY): DataTree<T> = object : DataTree<T> {
override val items: Map<NameToken, DataTreeItem<T>> get() = emptyMap()
override val dataType: KType get() = typeOf<T>()
override val dataType: KType get() = type
override val meta: Meta get() = meta
}
@OptIn(DFInternal::class)
public inline fun <reified T : Any> empty(meta: Meta = Meta.EMPTY): DataTree<T> =
emptyWithType<T>(typeOf<T>(), meta)
}
}
@ -106,12 +112,8 @@ public fun <T : Any> DataTree<T>.traverseItems(): Sequence<Pair<Name, DataTreeIt
* Get a branch of this [DataTree] with a given [branchName].
* The difference from similar method for [DataSet] is that internal logic is more simple and the return value is a [DataTree]
*/
public fun <T : Any> DataTree<T>.branch(branchName: Name): DataTree<T> = object : DataTree<T> {
override val dataType: KType get() = this@branch.dataType
@OptIn(DFInternal::class)
public fun <T : Any> DataTree<T>.branch(branchName: Name): DataTree<T> =
getItem(branchName)?.tree ?: DataTree.emptyWithType(dataType)
override val meta: Meta
get() = getItem(branchName)?.meta ?: Meta.EMPTY
override val items: Map<NameToken, DataTreeItem<T>>
get() = getItem(branchName).tree?.items ?: emptyMap()
}
public fun <T : Any> DataTree<T>.branch(branchName: String): DataTree<T> = branch(branchName.parseAsName())

View File

@ -124,4 +124,4 @@ public inline fun <reified T : Any> DataSourceBuilder<T>.emit(
name: String,
parent: CoroutineScope,
noinline block: DataSourceBuilder<T>.() -> Unit,
): Unit = node(Name.parse(name), DataSource(parent, block))
): Unit = node(Name.parse(name), DataSource(parent, block))

View File

@ -68,8 +68,9 @@ public class FrontMatterEnvelopeFormat(
}
override fun peekFormat(io: IOPlugin, binary: Binary): EnvelopeFormat? = binary.read {
val line = readSafeUtf8Line()
return@read if (line.startsWith("---")) {
//read raw string to avoid UTF issues
val line = readRawString(3)
return@read if (line == "---") {
default
} else {
null

View File

@ -144,6 +144,15 @@ public fun Name.firstOrNull(): NameToken? = tokens.firstOrNull()
*/
public fun Name.first(): NameToken = tokens.first()
/**
* Return a [Name] with its last token replaced via [replacement] rule.
* If initial [Name] is empty, return empty name.
*/
public fun Name.replaceLast(replacement: (NameToken) -> NameToken): Name {
if (isEmpty()) return Name.EMPTY
return cutLast() + replacement(lastOrNull()!!)
}
/**
* Convert the [String] to a [Name] by simply wrapping it in a single name token without parsing.
@ -227,7 +236,6 @@ public fun Name.removeFirstOrNull(first: Name): Name? = if (startsWith(first)) {
null
}
@ThreadLocal
private val nameCache = HashMap<String, Name>()

View File

@ -17,6 +17,7 @@ kscience{
}
dependencies(jvmTest){
implementation(spclibs.logback.classic)
implementation(projects.dataforgeIo.dataforgeIoYaml)
}
}

View File

@ -19,6 +19,7 @@ import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.NameToken
import space.kscience.dataforge.names.asName
import space.kscience.dataforge.names.plus
import space.kscience.dataforge.workspace.FileData.Companion.DEFAULT_IGNORE_EXTENSIONS
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.StandardWatchEventKinds
@ -27,6 +28,7 @@ import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.spi.FileSystemProvider
import java.time.Instant
import kotlin.io.path.extension
import kotlin.io.path.name
import kotlin.io.path.nameWithoutExtension
import kotlin.io.path.readAttributes
import kotlin.reflect.KType
@ -54,6 +56,8 @@ public class FileData<T> internal constructor(private val data: Data<T>, public
public val FILE_EXTENSION_KEY: Name = FILE_KEY + "extension"
public val FILE_CREATE_TIME_KEY: Name = FILE_KEY + "created"
public val FILE_UPDATE_TIME_KEY: Name = FILE_KEY + "updated"
public const val DF_FILE_EXTENSION: String = "df"
public val DEFAULT_IGNORE_EXTENSIONS: Set<String> = setOf(DF_FILE_EXTENSION)
}
}
@ -88,13 +92,17 @@ public fun <T : Any> IOPlugin.readDataFile(
context(IOPlugin) @DFExperimental
private fun <T : Any> DataSetBuilder<T>.directory(path: Path, formatResolver: FileFormatResolver<T>) {
private fun <T : Any> DataSetBuilder<T>.directory(
path: Path,
ignoreExtensions: Set<String>,
formatResolver: FileFormatResolver<T>,
) {
Files.list(path).forEach { childPath ->
val fileName = childPath.fileName.toString()
if (fileName.startsWith(IOPlugin.META_FILE_NAME)) {
meta(readMetaFile(childPath))
} else if (!fileName.startsWith("@")) {
file(childPath, formatResolver)
file(childPath, ignoreExtensions, formatResolver)
}
}
}
@ -107,6 +115,7 @@ private fun <T : Any> DataSetBuilder<T>.directory(path: Path, formatResolver: Fi
public fun <T : Any> IOPlugin.readDataDirectory(
type: KType,
path: Path,
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
formatResolver: FileFormatResolver<T>,
): DataTree<T> {
//read zipped data node
@ -116,14 +125,14 @@ public fun <T : Any> IOPlugin.readDataDirectory(
?: error("Zip file system provider not found")
val fs = fsProvider.newFileSystem(path, mapOf("create" to "true"))
return readDataDirectory(type, fs.rootDirectories.first(), formatResolver)
return readDataDirectory(type, fs.rootDirectories.first(), ignoreExtensions, formatResolver)
}
if (!Files.isDirectory(path)) error("Provided path $path is not a directory")
return DataTree(type) {
meta {
FileData.FILE_PATH_KEY put path.toString()
FileData.FILE_PATH_KEY put path.toString()
}
directory(path, formatResolver)
directory(path, ignoreExtensions, formatResolver)
}
}
@ -131,8 +140,9 @@ public fun <T : Any> IOPlugin.readDataDirectory(
@DFExperimental
public inline fun <reified T : Any> IOPlugin.readDataDirectory(
path: Path,
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
noinline formatResolver: FileFormatResolver<T>,
): DataTree<T> = readDataDirectory(typeOf<T>(), path, formatResolver)
): DataTree<T> = readDataDirectory(typeOf<T>(), path, ignoreExtensions, formatResolver)
/**
* Read raw binary data tree from the directory. All files are read as-is (save for meta files).
@ -140,7 +150,8 @@ public inline fun <reified T : Any> IOPlugin.readDataDirectory(
@DFExperimental
public fun IOPlugin.readRawDirectory(
path: Path,
): DataTree<Binary> = readDataDirectory(path) { _, _ -> IOReader.binary }
ignoreExtensions: Set<String> = emptySet(),
): DataTree<Binary> = readDataDirectory(path, ignoreExtensions) { _, _ -> IOReader.binary }
private fun Path.toName() = Name(map { NameToken.parse(it.nameWithoutExtension) })
@ -150,12 +161,13 @@ private fun Path.toName() = Name(map { NameToken.parse(it.nameWithoutExtension)
public fun <T : Any> IOPlugin.monitorDataDirectory(
type: KType,
path: Path,
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
formatResolver: FileFormatResolver<T>,
): DataSource<T> {
if (path.fileName.toString().endsWith(".zip")) error("Monitoring not supported for ZipFS")
if (!Files.isDirectory(path)) error("Provided path $path is not a directory")
return DataSource(type, context) {
directory(path, formatResolver)
directory(path, ignoreExtensions, formatResolver)
launch(Dispatchers.IO) {
val watchService = path.fileSystem.newWatchService()
@ -178,7 +190,7 @@ public fun <T : Any> IOPlugin.monitorDataDirectory(
if (fileName.startsWith(IOPlugin.META_FILE_NAME)) {
meta(readMetaFile(eventPath))
} else if (!fileName.startsWith("@")) {
file(eventPath, formatResolver)
file(eventPath, ignoreExtensions, formatResolver)
}
}
}
@ -197,8 +209,9 @@ public fun <T : Any> IOPlugin.monitorDataDirectory(
@DFExperimental
public inline fun <reified T : Any> IOPlugin.monitorDataDirectory(
path: Path,
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
noinline formatResolver: FileFormatResolver<T>,
): DataSource<T> = monitorDataDirectory(typeOf<T>(), path, formatResolver)
): DataSource<T> = monitorDataDirectory(typeOf<T>(), path, ignoreExtensions, formatResolver)
/**
* Read and monitor raw binary data tree from the directory. All files are read as-is (save for meta files).
@ -206,7 +219,8 @@ public inline fun <reified T : Any> IOPlugin.monitorDataDirectory(
@DFExperimental
public fun IOPlugin.monitorRawDirectory(
path: Path,
): DataSource<Binary> = monitorDataDirectory(path) { _, _ -> IOReader.binary }
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
): DataSource<Binary> = monitorDataDirectory(path, ignoreExtensions) { _, _ -> IOReader.binary }
/**
* Write data tree to existing directory or create a new one using default [java.nio.file.FileSystem] provider
@ -248,14 +262,20 @@ public suspend fun <T : Any> IOPlugin.writeDataDirectory(
/**
* Add file/directory-based data tree item
*
* @param ignoreExtensions a list of file extensions for which extension should be cut from the resulting item name
*/
context(IOPlugin)
@OptIn(DFInternal::class)
@DFExperimental
public fun <T : Any> DataSetBuilder<T>.file(
path: Path,
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
formatResolver: FileFormatResolver<out T>,
) {
fun defaultPath() = if (path.extension in ignoreExtensions) path.nameWithoutExtension else path.name
try {
//If path is a single file or a special directory, read it as single datum
if (!Files.isDirectory(path) || Files.list(path).allMatch { it.fileName.toString().startsWith("@") }) {
@ -264,13 +284,13 @@ public fun <T : Any> DataSetBuilder<T>.file(
logger.warn { "File format is not resolved for $path. Skipping." }
return
}
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension
data(name, data)
val name: String = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: defaultPath()
data(name.asName(), data)
} else {
//otherwise, read as directory
val data: DataTree<T> = readDataDirectory(dataType, path, formatResolver)
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension
node(name, data)
val data: DataTree<T> = readDataDirectory(dataType, path, ignoreExtensions, formatResolver)
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: defaultPath()
node(name.asName(), data)
}
} catch (ex: Exception) {
logger.error { "Failed to read file or directory at $path: ${ex.message}" }

View File

@ -3,15 +3,16 @@ package space.kscience.dataforge.workspace
import io.ktor.utils.io.core.Input
import io.ktor.utils.io.core.Output
import kotlinx.coroutines.runBlocking
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.Global
import space.kscience.dataforge.data.*
import space.kscience.dataforge.io.IOFormat
import space.kscience.dataforge.io.io
import space.kscience.dataforge.io.readUtf8String
import space.kscience.dataforge.io.writeUtf8String
import space.kscience.dataforge.io.*
import space.kscience.dataforge.io.yaml.YamlPlugin
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.misc.DFExperimental
import java.nio.file.Files
import kotlin.io.path.fileSize
import kotlin.io.path.toPath
import kotlin.reflect.KType
import kotlin.reflect.typeOf
import kotlin.test.Test
@ -44,32 +45,38 @@ class FileDataTest {
@Test
@DFExperimental
fun testDataWriteRead() {
Global.io.run {
val dir = Files.createTempDirectory("df_data_node")
runBlocking {
writeDataDirectory(dir, dataNode, StringIOFormat)
println(dir.toUri().toString())
val reconstructed = readDataDirectory(dir) { _, _ -> StringIOFormat }
assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content"))
assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await())
}
fun testDataWriteRead() = with(Global.io) {
val dir = Files.createTempDirectory("df_data_node")
runBlocking {
writeDataDirectory(dir, dataNode, StringIOFormat)
println(dir.toUri().toString())
val reconstructed = readDataDirectory(dir) { _, _ -> StringIOFormat }
assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content"))
assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await())
}
}
@Test
@DFExperimental
fun testZipWriteRead() {
Global.io.run {
val zip = Files.createTempFile("df_data_node", ".zip")
runBlocking {
dataNode.writeZip(zip, StringIOFormat)
println(zip.toUri().toString())
val reconstructed = readDataDirectory(zip) { _, _ -> StringIOFormat }
assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content"))
assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await())
}
fun testZipWriteRead() = with(Global.io) {
val zip = Files.createTempFile("df_data_node", ".zip")
runBlocking {
dataNode.writeZip(zip, StringIOFormat)
println(zip.toUri().toString())
val reconstructed = readDataDirectory(zip) { _, _ -> StringIOFormat }
assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content"))
assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await())
}
}
@Test
fun testNonEnvelope() {
val context = Context {
plugin(YamlPlugin)
}
val resource = javaClass.classLoader.getResource("SPC.png")!!
val data: Envelope = context.io.readEnvelopeFile(resource.toURI().toPath(), true)
assertEquals(resource.toURI().toPath().fileSize(), data.data?.size?.toLong())
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB