From 48331288578371c544ef22d3f4f04811add0519b Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Sat, 21 May 2022 11:08:59 +0300 Subject: [PATCH] refactor file reading --- .../space/kscience/dataforge/data/Data.kt | 2 +- .../space/kscience/dataforge/io/IOFormat.kt | 4 +- .../kscience/dataforge/workspace/fileData.kt | 128 +++++------------- .../kscience/dataforge/workspace/zipData.kt | 72 ++++++++++ .../dataforge/workspace/FileDataTest.kt | 2 +- 5 files changed, 111 insertions(+), 97 deletions(-) create mode 100644 dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/zipData.kt diff --git a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/Data.kt b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/Data.kt index 41182882..c484927f 100644 --- a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/Data.kt +++ b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/Data.kt @@ -15,7 +15,7 @@ import kotlin.reflect.typeOf * A data element characterized by its meta */ @Type(Data.TYPE) -public interface Data : Goal, MetaRepr { +public interface Data : Goal, MetaRepr { /** * Type marker for the data. The type is known before the calculation takes place so it could be checked. */ diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/IOFormat.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/IOFormat.kt index 75c53e70..3a0d4eea 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/IOFormat.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/IOFormat.kt @@ -27,7 +27,7 @@ public interface IOReader { public fun readObject(input: Input): T } -public inline fun IOReader(crossinline read: Input.() -> T): IOReader = object : IOReader { +public inline fun IOReader(crossinline read: Input.() -> T): IOReader = object : IOReader { override val type: KType = typeOf() override fun readObject(input: Input): T = input.read() @@ -41,7 +41,7 @@ public fun interface IOWriter { /** * And interface for reading and writing objects into with IO streams */ -public interface IOFormat : IOReader, IOWriter +public interface IOFormat : IOReader, IOWriter public fun Input.readObject(format: IOReader): T = format.readObject(this@readObject) diff --git a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt index c15972b8..67f1ca12 100644 --- a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt +++ b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt @@ -1,6 +1,5 @@ package space.kscience.dataforge.workspace -import io.ktor.utils.io.streams.asOutput import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.isActive import kotlinx.coroutines.launch @@ -17,11 +16,13 @@ import space.kscience.dataforge.names.Name import space.kscience.dataforge.names.NameToken import space.kscience.dataforge.names.asName import space.kscience.dataforge.names.plus -import java.nio.file.* +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.StandardWatchEventKinds +import java.nio.file.WatchEvent import java.nio.file.attribute.BasicFileAttributes import java.nio.file.spi.FileSystemProvider -import java.util.zip.ZipEntry -import java.util.zip.ZipOutputStream +import java.time.Instant import kotlin.io.path.extension import kotlin.io.path.nameWithoutExtension import kotlin.io.path.readAttributes @@ -34,34 +35,20 @@ import kotlin.streams.toList public typealias FileFormatResolver = (path: Path, meta: Meta) -> IOReader -public object FileData { - public val META_FILE_KEY: Name = "file".asName() - public val META_FILE_PATH_KEY: Name = META_FILE_KEY + "path" - public val META_FILE_EXTENSION_KEY: Name = META_FILE_KEY + "extension" - public val META_FILE_CREATE_TIME_KEY: Name = META_FILE_KEY + "created" - public val META_FILE_UPDATE_TIME_KEY: Name = META_FILE_KEY + "update" -} +public class FileData internal constructor(private val data: Data) : Data by data { + public val path: String? get() = meta[META_FILE_PATH_KEY].string + public val extension: String? get() = meta[META_FILE_EXTENSION_KEY].string -@DFInternal -@DFExperimental -public fun IOPlugin.readDataFile( - type: KType, - path: Path, - formatResolver: FileFormatResolver, -): Data { - val envelope = readEnvelopeFile(path, true) - val format = formatResolver(path, envelope.meta) - val updatedMeta = envelope.meta.copy { - FileData.META_FILE_PATH_KEY put path.toString() - FileData.META_FILE_EXTENSION_KEY put path.extension + public val createdTime: Instant? get() = meta[META_FILE_CREATE_TIME_KEY].string?.let { Instant.parse(it) } + public val updatedTime: Instant? get() = meta[META_FILE_UPDATE_TIME_KEY].string?.let { Instant.parse(it) } - val attributes = path.readAttributes() - FileData.META_FILE_UPDATE_TIME_KEY put attributes.lastModifiedTime().toInstant().toString() - FileData.META_FILE_CREATE_TIME_KEY put attributes.creationTime().toInstant().toString() - } - return Data(type, updatedMeta) { - envelope.data?.readWith(format) ?: error("Can't convert envelope without content to Data") + public companion object { + public val META_FILE_KEY: Name = "file".asName() + public val META_FILE_PATH_KEY: Name = META_FILE_KEY + "path" + public val META_FILE_EXTENSION_KEY: Name = META_FILE_KEY + "extension" + public val META_FILE_CREATE_TIME_KEY: Name = META_FILE_KEY + "created" + public val META_FILE_UPDATE_TIME_KEY: Name = META_FILE_KEY + "update" } } @@ -72,10 +59,25 @@ public fun IOPlugin.readDataFile( */ @OptIn(DFInternal::class) @DFExperimental -public inline fun IOPlugin.readDataFile( +public fun IOPlugin.readDataFile( path: Path, - noinline formatResolver: FileFormatResolver, -): Data = readDataFile(typeOf(), path, formatResolver) + formatResolver: FileFormatResolver, +): FileData { + val envelope = readEnvelopeFile(path, true) + val format = formatResolver(path, envelope.meta) + val updatedMeta = envelope.meta.copy { + FileData.META_FILE_PATH_KEY put path.toString() + FileData.META_FILE_EXTENSION_KEY put path.extension + + val attributes = path.readAttributes() + FileData.META_FILE_UPDATE_TIME_KEY put attributes.lastModifiedTime().toInstant().toString() + FileData.META_FILE_CREATE_TIME_KEY put attributes.creationTime().toInstant().toString() + } + return FileData(Data(format.type, updatedMeta) { + envelope.data?.readWith(format) ?: error("Can't convert envelope without content to Data") + }) +} + context(IOPlugin) @DFExperimental private fun DataSetBuilder.directory(path: Path, formatResolver: FileFormatResolver) { @@ -122,7 +124,6 @@ public inline fun IOPlugin.readDataDirectory( ): DataTree = readDataDirectory(typeOf(), path, formatResolver) - @OptIn(DFExperimental::class) private fun Path.toName() = Name(map { NameToken.parse(it.nameWithoutExtension) }) @@ -219,65 +220,6 @@ public suspend fun IOPlugin.writeDataDirectory( } } - -@Suppress("BlockingMethodInNonBlockingContext") -private suspend fun ZipOutputStream.writeNode( - name: String, - treeItem: DataTreeItem, - dataFormat: IOFormat, - envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat, -) { - withContext(Dispatchers.IO) { - when (treeItem) { - is DataTreeItem.Leaf -> { - //TODO add directory-based envelope writer - val envelope = treeItem.data.toEnvelope(dataFormat) - val entry = ZipEntry(name) - putNextEntry(entry) - asOutput().run { - envelopeFormat.writeEnvelope(this, envelope) - flush() - } - } - is DataTreeItem.Node -> { - val entry = ZipEntry("$name/") - putNextEntry(entry) - closeEntry() - treeItem.tree.items.forEach { (token, item) -> - val childName = "$name/$token" - writeNode(childName, item, dataFormat, envelopeFormat) - } - } - } - } -} - -@DFExperimental -public suspend fun FileData.writeZip( - path: Path, - tree: DataTree, - format: IOFormat, - envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat, -) { - withContext(Dispatchers.IO) { - val actualFile = if (path.toString().endsWith(".zip")) { - path - } else { - path.resolveSibling(path.fileName.toString() + ".zip") - } - val fos = Files.newOutputStream( - actualFile, - StandardOpenOption.WRITE, - StandardOpenOption.CREATE, - StandardOpenOption.TRUNCATE_EXISTING - ) - val zos = ZipOutputStream(fos) - zos.use { - it.writeNode("", DataTreeItem.Node(tree), format, envelopeFormat) - } - } -} - /** * Add file/directory-based data tree item */ @@ -289,7 +231,7 @@ public fun DataSetBuilder.file( ) { //If path is a single file or a special directory, read it as single datum if (!Files.isDirectory(path) || Files.list(path).allMatch { it.fileName.toString().startsWith("@") }) { - val data = readDataFile(dataType, path, formatResolver) + val data = readDataFile(path, formatResolver) val name = data.meta[Envelope.ENVELOPE_NAME_KEY].string ?: path.nameWithoutExtension data(name, data) } else { diff --git a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/zipData.kt b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/zipData.kt new file mode 100644 index 00000000..b596d1cb --- /dev/null +++ b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/zipData.kt @@ -0,0 +1,72 @@ +package space.kscience.dataforge.workspace + +import io.ktor.utils.io.streams.asOutput +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.withContext +import space.kscience.dataforge.data.DataTree +import space.kscience.dataforge.data.DataTreeItem +import space.kscience.dataforge.io.EnvelopeFormat +import space.kscience.dataforge.io.IOFormat +import space.kscience.dataforge.io.TaggedEnvelopeFormat +import space.kscience.dataforge.misc.DFExperimental +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.StandardOpenOption +import java.util.zip.ZipEntry +import java.util.zip.ZipOutputStream + + +private suspend fun ZipOutputStream.writeNode( + name: String, + treeItem: DataTreeItem, + dataFormat: IOFormat, + envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat, +): Unit = withContext(Dispatchers.IO) { + when (treeItem) { + is DataTreeItem.Leaf -> { + //TODO add directory-based envelope writer + val envelope = treeItem.data.toEnvelope(dataFormat) + val entry = ZipEntry(name) + putNextEntry(entry) + asOutput().run { + envelopeFormat.writeEnvelope(this, envelope) + flush() + } + } + is DataTreeItem.Node -> { + val entry = ZipEntry("$name/") + putNextEntry(entry) + closeEntry() + treeItem.tree.items.forEach { (token, item) -> + val childName = "$name/$token" + writeNode(childName, item, dataFormat, envelopeFormat) + } + } + } +} + +/** + * Write this [DataTree] as a zip archive + */ +@DFExperimental +public suspend fun DataTree.writeZip( + path: Path, + format: IOFormat, + envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat, +): Unit = withContext(Dispatchers.IO) { + val actualFile = if (path.toString().endsWith(".zip")) { + path + } else { + path.resolveSibling(path.fileName.toString() + ".zip") + } + val fos = Files.newOutputStream( + actualFile, + StandardOpenOption.WRITE, + StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING + ) + val zos = ZipOutputStream(fos) + zos.use { + it.writeNode("", DataTreeItem.Node(this@writeZip), format, envelopeFormat) + } +} \ No newline at end of file diff --git a/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/FileDataTest.kt b/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/FileDataTest.kt index a048f3fc..6bfde195 100644 --- a/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/FileDataTest.kt +++ b/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/FileDataTest.kt @@ -64,7 +64,7 @@ class FileDataTest { Global.io.run { val zip = Files.createTempFile("df_data_node", ".zip") runBlocking { - FileData.writeZip(zip, dataNode, StringIOFormat) + dataNode.writeZip(zip, StringIOFormat) println(zip.toUri().toString()) val reconstructed = readDataDirectory(zip) { _, _ -> StringIOFormat } assertEquals(dataNode["dir.a"]?.meta?.get("content"), reconstructed["dir.a"]?.meta?.get("content"))