Fixes in Envelope format and data tre
This commit is contained in:
parent
29fa30fb51
commit
2c2f33427a
@ -11,6 +11,7 @@
|
||||
- More fine-grained types in Action builders.
|
||||
|
||||
### Changed
|
||||
- `Name::replaceLast` API
|
||||
- `PluginFactory` no longer requires plugin class
|
||||
- Collection<Named> toMap -> associateByName
|
||||
- Simplified `DFTL` envelope format. Closing symbols are unnecessary. Properties are discontinued.
|
||||
@ -36,6 +37,8 @@
|
||||
### Removed
|
||||
|
||||
### Fixed
|
||||
- `readDataDirectory` does not split names with dots
|
||||
- Front matter reader does not crash on non-UTF files
|
||||
- Meta file name in readMeta from directory
|
||||
- Tagless and FrontMatter envelope partial readers fix.
|
||||
|
||||
|
@ -9,7 +9,7 @@ plugins {
|
||||
|
||||
allprojects {
|
||||
group = "space.kscience"
|
||||
version = "0.6.1-dev-5"
|
||||
version = "0.6.1-dev-6"
|
||||
}
|
||||
|
||||
subprojects {
|
||||
|
@ -107,7 +107,7 @@ public fun <T : Any> DataSet<T>.startAll(coroutineScope: CoroutineScope): Job =
|
||||
}.joinAll()
|
||||
}
|
||||
|
||||
public suspend fun <T : Any> DataSet<T>.join(): Unit = coroutineScope { startAll(this).join() }
|
||||
public suspend fun <T : Any> DataSet<T>.computeAndJoinAll(): Unit = coroutineScope { startAll(this).join() }
|
||||
|
||||
public fun DataSet<*>.toMeta(): Meta = Meta {
|
||||
forEach {
|
||||
|
@ -1,6 +1,7 @@
|
||||
package space.kscience.dataforge.data
|
||||
|
||||
import space.kscience.dataforge.meta.Meta
|
||||
import space.kscience.dataforge.misc.DFInternal
|
||||
import space.kscience.dataforge.misc.Type
|
||||
import space.kscience.dataforge.names.*
|
||||
import kotlin.collections.component1
|
||||
@ -65,11 +66,16 @@ public interface DataTree<out T : Any> : DataSet<T> {
|
||||
*/
|
||||
public val META_ITEM_NAME_TOKEN: NameToken = NameToken("@meta")
|
||||
|
||||
public inline fun <reified T : Any> empty(meta: Meta = Meta.EMPTY): DataTree<T> = object : DataTree<T> {
|
||||
@DFInternal
|
||||
public fun <T : Any> emptyWithType(type: KType, meta: Meta = Meta.EMPTY): DataTree<T> = object : DataTree<T> {
|
||||
override val items: Map<NameToken, DataTreeItem<T>> get() = emptyMap()
|
||||
override val dataType: KType get() = typeOf<T>()
|
||||
override val dataType: KType get() = type
|
||||
override val meta: Meta get() = meta
|
||||
}
|
||||
|
||||
@OptIn(DFInternal::class)
|
||||
public inline fun <reified T : Any> empty(meta: Meta = Meta.EMPTY): DataTree<T> =
|
||||
emptyWithType<T>(typeOf<T>(), meta)
|
||||
}
|
||||
}
|
||||
|
||||
@ -106,12 +112,8 @@ public fun <T : Any> DataTree<T>.traverseItems(): Sequence<Pair<Name, DataTreeIt
|
||||
* Get a branch of this [DataTree] with a given [branchName].
|
||||
* The difference from similar method for [DataSet] is that internal logic is more simple and the return value is a [DataTree]
|
||||
*/
|
||||
public fun <T : Any> DataTree<T>.branch(branchName: Name): DataTree<T> = object : DataTree<T> {
|
||||
override val dataType: KType get() = this@branch.dataType
|
||||
@OptIn(DFInternal::class)
|
||||
public fun <T : Any> DataTree<T>.branch(branchName: Name): DataTree<T> =
|
||||
getItem(branchName)?.tree ?: DataTree.emptyWithType(dataType)
|
||||
|
||||
override val meta: Meta
|
||||
get() = getItem(branchName)?.meta ?: Meta.EMPTY
|
||||
|
||||
override val items: Map<NameToken, DataTreeItem<T>>
|
||||
get() = getItem(branchName).tree?.items ?: emptyMap()
|
||||
}
|
||||
public fun <T : Any> DataTree<T>.branch(branchName: String): DataTree<T> = branch(branchName.parseAsName())
|
||||
|
@ -68,8 +68,9 @@ public class FrontMatterEnvelopeFormat(
|
||||
}
|
||||
|
||||
override fun peekFormat(io: IOPlugin, binary: Binary): EnvelopeFormat? = binary.read {
|
||||
val line = readSafeUtf8Line()
|
||||
return@read if (line.startsWith("---")) {
|
||||
//read raw string to avoid UTF issues
|
||||
val line = readRawString(3)
|
||||
return@read if (line == "---") {
|
||||
default
|
||||
} else {
|
||||
null
|
||||
|
@ -144,6 +144,15 @@ public fun Name.firstOrNull(): NameToken? = tokens.firstOrNull()
|
||||
*/
|
||||
public fun Name.first(): NameToken = tokens.first()
|
||||
|
||||
/**
|
||||
* Return a [Name] with its last token replaced via [replacement] rule.
|
||||
* If initial [Name] is empty, return empty name.
|
||||
*/
|
||||
public fun Name.replaceLast(replacement: (NameToken) -> NameToken): Name {
|
||||
if (isEmpty()) return Name.EMPTY
|
||||
return cutLast() + replacement(lastOrNull()!!)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert the [String] to a [Name] by simply wrapping it in a single name token without parsing.
|
||||
@ -227,7 +236,6 @@ public fun Name.removeFirstOrNull(first: Name): Name? = if (startsWith(first)) {
|
||||
null
|
||||
}
|
||||
|
||||
|
||||
@ThreadLocal
|
||||
private val nameCache = HashMap<String, Name>()
|
||||
|
||||
|
@ -17,6 +17,7 @@ kscience{
|
||||
}
|
||||
dependencies(jvmTest){
|
||||
implementation(spclibs.logback.classic)
|
||||
implementation(projects.dataforgeIo.dataforgeIoYaml)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@ import space.kscience.dataforge.names.Name
|
||||
import space.kscience.dataforge.names.NameToken
|
||||
import space.kscience.dataforge.names.asName
|
||||
import space.kscience.dataforge.names.plus
|
||||
import space.kscience.dataforge.workspace.FileData.Companion.DEFAULT_IGNORE_EXTENSIONS
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.StandardWatchEventKinds
|
||||
@ -27,6 +28,7 @@ import java.nio.file.attribute.BasicFileAttributes
|
||||
import java.nio.file.spi.FileSystemProvider
|
||||
import java.time.Instant
|
||||
import kotlin.io.path.extension
|
||||
import kotlin.io.path.name
|
||||
import kotlin.io.path.nameWithoutExtension
|
||||
import kotlin.io.path.readAttributes
|
||||
import kotlin.reflect.KType
|
||||
@ -54,6 +56,8 @@ public class FileData<T> internal constructor(private val data: Data<T>, public
|
||||
public val FILE_EXTENSION_KEY: Name = FILE_KEY + "extension"
|
||||
public val FILE_CREATE_TIME_KEY: Name = FILE_KEY + "created"
|
||||
public val FILE_UPDATE_TIME_KEY: Name = FILE_KEY + "updated"
|
||||
public const val DF_FILE_EXTENSION: String = "df"
|
||||
public val DEFAULT_IGNORE_EXTENSIONS: Set<String> = setOf(DF_FILE_EXTENSION)
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,13 +92,17 @@ public fun <T : Any> IOPlugin.readDataFile(
|
||||
|
||||
|
||||
context(IOPlugin) @DFExperimental
|
||||
private fun <T : Any> DataSetBuilder<T>.directory(path: Path, formatResolver: FileFormatResolver<T>) {
|
||||
private fun <T : Any> DataSetBuilder<T>.directory(
|
||||
path: Path,
|
||||
ignoreExtensions: Set<String>,
|
||||
formatResolver: FileFormatResolver<T>,
|
||||
) {
|
||||
Files.list(path).forEach { childPath ->
|
||||
val fileName = childPath.fileName.toString()
|
||||
if (fileName.startsWith(IOPlugin.META_FILE_NAME)) {
|
||||
meta(readMetaFile(childPath))
|
||||
} else if (!fileName.startsWith("@")) {
|
||||
file(childPath, formatResolver)
|
||||
file(childPath, ignoreExtensions, formatResolver)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -107,6 +115,7 @@ private fun <T : Any> DataSetBuilder<T>.directory(path: Path, formatResolver: Fi
|
||||
public fun <T : Any> IOPlugin.readDataDirectory(
|
||||
type: KType,
|
||||
path: Path,
|
||||
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
|
||||
formatResolver: FileFormatResolver<T>,
|
||||
): DataTree<T> {
|
||||
//read zipped data node
|
||||
@ -116,14 +125,14 @@ public fun <T : Any> IOPlugin.readDataDirectory(
|
||||
?: error("Zip file system provider not found")
|
||||
val fs = fsProvider.newFileSystem(path, mapOf("create" to "true"))
|
||||
|
||||
return readDataDirectory(type, fs.rootDirectories.first(), formatResolver)
|
||||
return readDataDirectory(type, fs.rootDirectories.first(), ignoreExtensions, formatResolver)
|
||||
}
|
||||
if (!Files.isDirectory(path)) error("Provided path $path is not a directory")
|
||||
return DataTree(type) {
|
||||
meta {
|
||||
FileData.FILE_PATH_KEY put path.toString()
|
||||
}
|
||||
directory(path, formatResolver)
|
||||
directory(path, ignoreExtensions, formatResolver)
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,8 +140,9 @@ public fun <T : Any> IOPlugin.readDataDirectory(
|
||||
@DFExperimental
|
||||
public inline fun <reified T : Any> IOPlugin.readDataDirectory(
|
||||
path: Path,
|
||||
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
|
||||
noinline formatResolver: FileFormatResolver<T>,
|
||||
): DataTree<T> = readDataDirectory(typeOf<T>(), path, formatResolver)
|
||||
): DataTree<T> = readDataDirectory(typeOf<T>(), path, ignoreExtensions, formatResolver)
|
||||
|
||||
/**
|
||||
* Read raw binary data tree from the directory. All files are read as-is (save for meta files).
|
||||
@ -140,7 +150,8 @@ public inline fun <reified T : Any> IOPlugin.readDataDirectory(
|
||||
@DFExperimental
|
||||
public fun IOPlugin.readRawDirectory(
|
||||
path: Path,
|
||||
): DataTree<Binary> = readDataDirectory(path) { _, _ -> IOReader.binary }
|
||||
ignoreExtensions: Set<String> = emptySet(),
|
||||
): DataTree<Binary> = readDataDirectory(path, ignoreExtensions) { _, _ -> IOReader.binary }
|
||||
|
||||
|
||||
private fun Path.toName() = Name(map { NameToken.parse(it.nameWithoutExtension) })
|
||||
@ -150,12 +161,13 @@ private fun Path.toName() = Name(map { NameToken.parse(it.nameWithoutExtension)
|
||||
public fun <T : Any> IOPlugin.monitorDataDirectory(
|
||||
type: KType,
|
||||
path: Path,
|
||||
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
|
||||
formatResolver: FileFormatResolver<T>,
|
||||
): DataSource<T> {
|
||||
if (path.fileName.toString().endsWith(".zip")) error("Monitoring not supported for ZipFS")
|
||||
if (!Files.isDirectory(path)) error("Provided path $path is not a directory")
|
||||
return DataSource(type, context) {
|
||||
directory(path, formatResolver)
|
||||
directory(path, ignoreExtensions, formatResolver)
|
||||
launch(Dispatchers.IO) {
|
||||
val watchService = path.fileSystem.newWatchService()
|
||||
|
||||
@ -178,7 +190,7 @@ public fun <T : Any> IOPlugin.monitorDataDirectory(
|
||||
if (fileName.startsWith(IOPlugin.META_FILE_NAME)) {
|
||||
meta(readMetaFile(eventPath))
|
||||
} else if (!fileName.startsWith("@")) {
|
||||
file(eventPath, formatResolver)
|
||||
file(eventPath, ignoreExtensions, formatResolver)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -197,8 +209,9 @@ public fun <T : Any> IOPlugin.monitorDataDirectory(
|
||||
@DFExperimental
|
||||
public inline fun <reified T : Any> IOPlugin.monitorDataDirectory(
|
||||
path: Path,
|
||||
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
|
||||
noinline formatResolver: FileFormatResolver<T>,
|
||||
): DataSource<T> = monitorDataDirectory(typeOf<T>(), path, formatResolver)
|
||||
): DataSource<T> = monitorDataDirectory(typeOf<T>(), path, ignoreExtensions, formatResolver)
|
||||
|
||||
/**
|
||||
* Read and monitor raw binary data tree from the directory. All files are read as-is (save for meta files).
|
||||
@ -206,7 +219,8 @@ public inline fun <reified T : Any> IOPlugin.monitorDataDirectory(
|
||||
@DFExperimental
|
||||
public fun IOPlugin.monitorRawDirectory(
|
||||
path: Path,
|
||||
): DataSource<Binary> = monitorDataDirectory(path) { _, _ -> IOReader.binary }
|
||||
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
|
||||
): DataSource<Binary> = monitorDataDirectory(path, ignoreExtensions) { _, _ -> IOReader.binary }
|
||||
|
||||
/**
|
||||
* Write data tree to existing directory or create a new one using default [java.nio.file.FileSystem] provider
|
||||
@ -248,14 +262,20 @@ public suspend fun <T : Any> IOPlugin.writeDataDirectory(
|
||||
|
||||
/**
|
||||
* Add file/directory-based data tree item
|
||||
*
|
||||
* @param ignoreExtensions a list of file extensions for which extension should be cut from the resulting item name
|
||||
*/
|
||||
context(IOPlugin)
|
||||
@OptIn(DFInternal::class)
|
||||
@DFExperimental
|
||||
public fun <T : Any> DataSetBuilder<T>.file(
|
||||
path: Path,
|
||||
ignoreExtensions: Set<String> = DEFAULT_IGNORE_EXTENSIONS,
|
||||
formatResolver: FileFormatResolver<out T>,
|
||||
) {
|
||||
|
||||
fun defaultPath() = if (path.extension in ignoreExtensions) path.nameWithoutExtension else path.name
|
||||
|
||||
try {
|
||||
//If path is a single file or a special directory, read it as single datum
|
||||
if (!Files.isDirectory(path) || Files.list(path).allMatch { it.fileName.toString().startsWith("@") }) {
|
||||
@ -264,13 +284,13 @@ public fun <T : Any> DataSetBuilder<T>.file(
|
||||
logger.warn { "File format is not resolved for $path. Skipping." }
|
||||
return
|
||||
}
|
||||
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension
|
||||
data(name, data)
|
||||
val name: String = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: defaultPath()
|
||||
data(name.asName(), data)
|
||||
} else {
|
||||
//otherwise, read as directory
|
||||
val data: DataTree<T> = readDataDirectory(dataType, path, formatResolver)
|
||||
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension
|
||||
node(name, data)
|
||||
val data: DataTree<T> = readDataDirectory(dataType, path, ignoreExtensions, formatResolver)
|
||||
val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: defaultPath()
|
||||
node(name.asName(), data)
|
||||
}
|
||||
} catch (ex: Exception) {
|
||||
logger.error { "Failed to read file or directory at $path: ${ex.message}" }
|
||||
|
@ -3,15 +3,16 @@ package space.kscience.dataforge.workspace
|
||||
import io.ktor.utils.io.core.Input
|
||||
import io.ktor.utils.io.core.Output
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import space.kscience.dataforge.context.Context
|
||||
import space.kscience.dataforge.context.Global
|
||||
import space.kscience.dataforge.data.*
|
||||
import space.kscience.dataforge.io.IOFormat
|
||||
import space.kscience.dataforge.io.io
|
||||
import space.kscience.dataforge.io.readUtf8String
|
||||
import space.kscience.dataforge.io.writeUtf8String
|
||||
import space.kscience.dataforge.io.*
|
||||
import space.kscience.dataforge.io.yaml.YamlPlugin
|
||||
import space.kscience.dataforge.meta.get
|
||||
import space.kscience.dataforge.misc.DFExperimental
|
||||
import java.nio.file.Files
|
||||
import kotlin.io.path.fileSize
|
||||
import kotlin.io.path.toPath
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.typeOf
|
||||
import kotlin.test.Test
|
||||
@ -44,8 +45,7 @@ class FileDataTest {
|
||||
|
||||
@Test
|
||||
@DFExperimental
|
||||
fun testDataWriteRead() {
|
||||
Global.io.run {
|
||||
fun testDataWriteRead() = with(Global.io) {
|
||||
val dir = Files.createTempDirectory("df_data_node")
|
||||
runBlocking {
|
||||
writeDataDirectory(dir, dataNode, StringIOFormat)
|
||||
@ -55,13 +55,11 @@ class FileDataTest {
|
||||
assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@DFExperimental
|
||||
fun testZipWriteRead() {
|
||||
Global.io.run {
|
||||
fun testZipWriteRead() = with(Global.io) {
|
||||
val zip = Files.createTempFile("df_data_node", ".zip")
|
||||
runBlocking {
|
||||
dataNode.writeZip(zip, StringIOFormat)
|
||||
@ -71,5 +69,14 @@ class FileDataTest {
|
||||
assertEquals(dataNode["b"]?.await(), reconstructed["b"]?.await())
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testNonEnvelope() {
|
||||
val context = Context {
|
||||
plugin(YamlPlugin)
|
||||
}
|
||||
val resource = javaClass.classLoader.getResource("SPC.png")!!
|
||||
val data: Envelope = context.io.readEnvelopeFile(resource.toURI().toPath(), true)
|
||||
assertEquals(resource.toURI().toPath().fileSize(), data.data?.size?.toLong())
|
||||
}
|
||||
}
|
BIN
dataforge-workspace/src/jvmTest/resources/SPC.png
Normal file
BIN
dataforge-workspace/src/jvmTest/resources/SPC.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 22 KiB |
Loading…
Reference in New Issue
Block a user