From 5d7ddb4e001f17564de3d847008b14558dd8a532 Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Wed, 25 Jan 2023 18:56:19 +0300 Subject: [PATCH] Simplify DFTL. fix io bugs --- CHANGELOG.md | 1 + build.gradle.kts | 2 +- .../dataforge-io-yaml/build.gradle.kts | 4 +- .../io/yaml/FrontMatterEnvelopeFormat.kt | 61 +++--- .../space/kscience/dataforge/io/Binary.kt | 15 +- .../space/kscience/dataforge/io/Envelope.kt | 2 +- .../kscience/dataforge/io/EnvelopeFormat.kt | 21 -- .../kscience/dataforge/io/EnvelopeParts.kt | 27 +-- .../space/kscience/dataforge/io/IOFormat.kt | 2 + .../dataforge/io/TaggedEnvelopeFormat.kt | 39 ++-- .../dataforge/io/TaglessEnvelopeFormat.kt | 206 ++++++------------ .../space/kscience/dataforge/io/ioMisc.kt | 96 +++----- .../dataforge/io/EnvelopeFormatTest.kt | 68 +++--- .../space/kscience/dataforge/io/IOTest.kt | 6 +- .../kscience/dataforge/io/MultipartTest.kt | 6 +- .../space/kscience/dataforge/io/fileIO.kt | 15 +- .../dataforge/workspace/envelopeData.kt | 2 +- .../kscience/dataforge/workspace/fileData.kt | 8 +- .../kscience/dataforge/workspace/zipData.kt | 2 +- .../workspace/CachingWorkspaceTest.kt | 4 +- gradle.properties | 1 + 21 files changed, 216 insertions(+), 372 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f663d2d7..49956a9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - More fine-grained types in Action builders. ### Changed +- Simplified `DFTL` envelope format. Closing symbols are unnecessary. Properties are discontinued. - Meta `get` method allows nullable receiver - `withDefault` functions do not add new keys to meta children and are consistent. - `dataforge.meta.values` package is merged into `dataforge.meta` for better star imports diff --git a/build.gradle.kts b/build.gradle.kts index 7ffa0076..ec560a10 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -9,7 +9,7 @@ plugins { allprojects { group = "space.kscience" - version = "0.6.0-dev-15" + version = "0.6.1-dev-1" } subprojects { diff --git a/dataforge-io/dataforge-io-yaml/build.gradle.kts b/dataforge-io/dataforge-io-yaml/build.gradle.kts index f42b2911..abab21ee 100644 --- a/dataforge-io/dataforge-io-yaml/build.gradle.kts +++ b/dataforge-io/dataforge-io-yaml/build.gradle.kts @@ -1,13 +1,13 @@ plugins { id("space.kscience.gradle.mpp") -// id("space.kscience.gradle.native") } description = "YAML meta IO" kscience { + native() useSerialization{ - yamlKt("0.9.0-dev-1") + yamlKt() } } diff --git a/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt b/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt index 244284c8..c1366145 100644 --- a/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt +++ b/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt @@ -2,8 +2,6 @@ package space.kscience.dataforge.io.yaml import io.ktor.utils.io.core.Input import io.ktor.utils.io.core.Output -import io.ktor.utils.io.core.buildPacket -import io.ktor.utils.io.core.readBytes import space.kscience.dataforge.context.Context import space.kscience.dataforge.context.Global import space.kscience.dataforge.io.* @@ -14,50 +12,46 @@ import space.kscience.dataforge.names.plus public class FrontMatterEnvelopeFormat( private val io: IOPlugin, private val meta: Meta = Meta.EMPTY, + private val metaFormatFactory: MetaFormatFactory = YamlMetaFormat, ) : EnvelopeFormat { - override fun readPartial(input: Input): PartialEnvelope { + override fun readObject(binary: Binary): Envelope = binary.read { var offset = 0 - offset += input.discardWithSeparator( + offset += discardWithSeparator( SEPARATOR.encodeToByteArray(), atMost = 1024, - skipUntilEndOfLine = false ) - val line = input.readSafeUtf8Line() + val line = ByteArray { + offset += readWithSeparatorTo(this, "\n".encodeToByteArray()) + }.decodeToString() + val readMetaFormat = line.trim().takeIf { it.isNotBlank() }?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat - //TODO replace by preview - val packet = buildPacket { - offset += input.readBytesWithSeparatorTo( - this, - SEPARATOR.encodeToByteArray(), - skipUntilEndOfLine = true - ) + val packet = ByteArray { + offset += readWithSeparatorTo(this, SEPARATOR.encodeToByteArray()) } - val meta = readMetaFormat.readMeta(packet) - return PartialEnvelope(meta, offset, null) + + offset += discardLine() + + val meta = readMetaFormat.readObject(packet.asBinary()) + Envelope(meta, binary.view(offset)) } - override fun readObject(input: Input): Envelope { - val partial = readPartial(input) - val data = input.readBytes().asBinary() - return SimpleEnvelope(partial.meta, data) - } + override fun readObject(input: Input): Envelope = readObject(input.readBinary()) - override fun writeEnvelope( + override fun writeObject( output: Output, - envelope: Envelope, - metaFormatFactory: MetaFormatFactory, - formatMeta: Meta, + obj: Envelope, ) { - val metaFormat = metaFormatFactory.build(this@FrontMatterEnvelopeFormat.io.context, formatMeta) - output.writeRawString("$SEPARATOR\r\n") - metaFormat.run { this.writeObject(output, envelope.meta) } + val metaFormat = metaFormatFactory.build(io.context, meta) + val formatSuffix = if (metaFormat is YamlMetaFormat) "" else metaFormatFactory.shortName + output.writeRawString("$SEPARATOR${formatSuffix}\r\n") + metaFormat.run { metaFormat.writeObject(output, obj.meta) } output.writeRawString("$SEPARATOR\r\n") //Printing data - envelope.data?.let { data -> + obj.data?.let { data -> output.writeBinary(data) } } @@ -84,15 +78,12 @@ public class FrontMatterEnvelopeFormat( private val default by lazy { build(Global, Meta.EMPTY) } - override fun readPartial(input: Input): PartialEnvelope = - default.readPartial(input) + override fun readObject(binary: Binary): Envelope = default.readObject(binary) - override fun writeEnvelope( + override fun writeObject( output: Output, - envelope: Envelope, - metaFormatFactory: MetaFormatFactory, - formatMeta: Meta, - ): Unit = default.writeEnvelope(output, envelope, metaFormatFactory, formatMeta) + obj: Envelope, + ): Unit = default.writeObject(output, obj) override fun readObject(input: Input): Envelope = default.readObject(input) diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/Binary.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/Binary.kt index e8b2ae16..bcc20064 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/Binary.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/Binary.kt @@ -21,6 +21,12 @@ public interface Binary { public suspend fun readSuspend(offset: Int = 0, atMost: Int = size - offset, block: suspend Input.() -> R): R + /** + * Read a binary with given [offset] relative to this binary and given [binarySize]. + * In general, resulting binary is of the same type as this one, but it is not guaranteed. + */ + public fun view(offset: Int, binarySize: Int = size - offset): Binary + public companion object { public val EMPTY: Binary = ByteArrayBinary(ByteArray(0)) } @@ -57,6 +63,9 @@ internal class ByteArrayBinary( input.close() } } + + override fun view(offset: Int, binarySize: Int): ByteArrayBinary = + ByteArrayBinary(array, start + offset, binarySize) } public fun ByteArray.asBinary(): Binary = ByteArrayBinary(this) @@ -65,7 +74,7 @@ public fun ByteArray.asBinary(): Binary = ByteArrayBinary(this) * Produce a [ByteArray] representing an exact copy of this [Binary] */ public fun Binary.toByteArray(): ByteArray = if (this is ByteArrayBinary) { - array.copyOf() // TODO do we need to ensure data safety here? + array.copyOfRange(start, start + size) // TODO do we need to ensure data safety here? } else { read { readBytes() @@ -73,8 +82,8 @@ public fun Binary.toByteArray(): ByteArray = if (this is ByteArrayBinary) { } //TODO optimize for file-based Inputs -public fun Input.readBinary(size: Int): Binary { - val array = readBytes(size) +public fun Input.readBinary(size: Int? = null): Binary { + val array = if (size == null) readBytes() else readBytes(size) return ByteArrayBinary(array) } diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/Envelope.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/Envelope.kt index d03932d7..728a0e69 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/Envelope.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/Envelope.kt @@ -34,7 +34,7 @@ public interface Envelope { } } -public class SimpleEnvelope(override val meta: Meta, override val data: Binary?) : Envelope +internal class SimpleEnvelope(override val meta: Meta, override val data: Binary?) : Envelope public fun Envelope(meta: Meta, data: Binary?): Envelope = SimpleEnvelope(meta, data) diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeFormat.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeFormat.kt index 502bcba6..28b59abb 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeFormat.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeFormat.kt @@ -1,7 +1,6 @@ package space.kscience.dataforge.io import io.ktor.utils.io.core.Input -import io.ktor.utils.io.core.Output import space.kscience.dataforge.context.Context import space.kscience.dataforge.io.EnvelopeFormatFactory.Companion.ENVELOPE_FORMAT_TYPE import space.kscience.dataforge.meta.Meta @@ -11,29 +10,9 @@ import space.kscience.dataforge.names.asName import kotlin.reflect.KType import kotlin.reflect.typeOf -/** - * A partially read envelope with meta, but without data - */ -public data class PartialEnvelope(val meta: Meta, val dataOffset: Int, val dataSize: ULong?) - public interface EnvelopeFormat : IOFormat { override val type: KType get() = typeOf() - - public val defaultMetaFormat: MetaFormatFactory get() = JsonMetaFormat - - public fun readPartial(input: Input): PartialEnvelope - - public fun writeEnvelope( - output: Output, - envelope: Envelope, - metaFormatFactory: MetaFormatFactory = defaultMetaFormat, - formatMeta: Meta = Meta.EMPTY, - ) - - override fun readObject(input: Input): Envelope - - override fun writeObject(output: Output, obj: Envelope): Unit = writeEnvelope(output, obj) } public fun EnvelopeFormat.read(input: Input): Envelope = readObject(input) diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeParts.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeParts.kt index 72e9dfd4..248ecf19 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeParts.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeParts.kt @@ -1,12 +1,10 @@ package space.kscience.dataforge.io -import space.kscience.dataforge.context.invoke import space.kscience.dataforge.io.Envelope.Companion.ENVELOPE_NODE_KEY import space.kscience.dataforge.io.PartDescriptor.Companion.DEFAULT_MULTIPART_DATA_SEPARATOR import space.kscience.dataforge.io.PartDescriptor.Companion.MULTIPART_DATA_TYPE import space.kscience.dataforge.io.PartDescriptor.Companion.MULTIPART_KEY import space.kscience.dataforge.io.PartDescriptor.Companion.PARTS_KEY -import space.kscience.dataforge.io.PartDescriptor.Companion.PART_FORMAT_KEY import space.kscience.dataforge.io.PartDescriptor.Companion.SEPARATOR_KEY import space.kscience.dataforge.meta.* import space.kscience.dataforge.names.asName @@ -24,8 +22,6 @@ private class PartDescriptor : Scheme() { const val DEFAULT_MULTIPART_DATA_SEPARATOR = "\r\n#~PART~#\r\n" - val PART_FORMAT_KEY = "format".asName() - const val MULTIPART_DATA_TYPE = "envelope.multipart" } } @@ -73,21 +69,12 @@ public fun EnvelopeBuilder.multipart( */ public fun EnvelopeBuilder.envelopes( envelopes: List, - formatFactory: EnvelopeFormatFactory = TaggedEnvelopeFormat, - formatMeta: Meta? = null, separator: String = DEFAULT_MULTIPART_DATA_SEPARATOR, ) { val parts = envelopes.map { - val format = formatMeta?.let { formatFactory(formatMeta) } ?: formatFactory - val binary = Binary(it, format) + val binary = Binary(it, TaggedEnvelopeFormat) EnvelopePart(binary, null) } - meta { - (MULTIPART_KEY + PART_FORMAT_KEY) put { - IOFormatFactory.NAME_KEY put formatFactory.name.toString() - formatMeta?.let { IOFormatFactory.META_KEY put formatMeta } - } - } multipart(parts, separator) } @@ -115,14 +102,4 @@ public val EnvelopePart.name: String? get() = description?.get("name").string /** * Represent envelope part by an envelope */ -public fun EnvelopePart.envelope(plugin: IOPlugin): Envelope { - val formatItem = description?.get(PART_FORMAT_KEY) - return if (formatItem != null) { - val format: EnvelopeFormat = plugin.resolveEnvelopeFormat(formatItem) - ?: error("Envelope format for $formatItem is not resolved") - binary.readWith(format) - } else { - error("Envelope description not found") - //SimpleEnvelope(description ?: Meta.EMPTY, binary) - } -} \ No newline at end of file +public fun EnvelopePart.envelope(): Envelope = binary.readWith(TaggedEnvelopeFormat) \ No newline at end of file diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/IOFormat.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/IOFormat.kt index f4735878..10f33e19 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/IOFormat.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/IOFormat.kt @@ -25,6 +25,8 @@ public interface IOReader { public val type: KType public fun readObject(input: Input): T + + public fun readObject(binary: Binary): T = binary.read { readObject(this) } } public inline fun IOReader(crossinline read: Input.() -> T): IOReader = object : IOReader { diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaggedEnvelopeFormat.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaggedEnvelopeFormat.kt index 2e5d3cc6..a320837d 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaggedEnvelopeFormat.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaggedEnvelopeFormat.kt @@ -18,6 +18,7 @@ import space.kscience.dataforge.names.plus public class TaggedEnvelopeFormat( public val io: IOPlugin, public val version: VERSION = VERSION.DF02, + public val metaFormatFactory: MetaFormatFactory = JsonMetaFormat ) : EnvelopeFormat { // private val metaFormat = io.metaFormat(metaFormatKey) @@ -40,20 +41,18 @@ public class TaggedEnvelopeFormat( writeRawString(END_SEQUENCE) } - override fun writeEnvelope( + override fun writeObject( output: Output, - envelope: Envelope, - metaFormatFactory: MetaFormatFactory, - formatMeta: Meta, + obj: Envelope, ) { - val metaFormat = metaFormatFactory.build(this@TaggedEnvelopeFormat.io.context, formatMeta) - val metaBytes = Binary(envelope.meta,metaFormat) - val actualSize: ULong = (envelope.data?.size ?: 0).toULong() + val metaFormat = metaFormatFactory.build(io.context, Meta.EMPTY) + val metaBytes = Binary(obj.meta,metaFormat) + val actualSize: ULong = (obj.data?.size ?: 0).toULong() val tag = Tag(metaFormatFactory.key, metaBytes.size.toUInt() + 2u, actualSize) output.writeBinary(tag.toBinary()) output.writeBinary(metaBytes) output.writeRawString("\r\n") - envelope.data?.let { + obj.data?.let { output.writeBinary(it) } } @@ -79,18 +78,18 @@ public class TaggedEnvelopeFormat( return SimpleEnvelope(meta, data) } - override fun readPartial(input: Input): PartialEnvelope { - val tag = input.readTag(this.version) + override fun readObject(binary: Binary): Envelope = binary.read{ + val tag = readTag(version) val metaFormat = io.resolveMetaFormat(tag.metaFormatKey) ?: error("Meta format with key ${tag.metaFormatKey} not found") - val metaBinary = input.readBinary(tag.metaSize.toInt()) + val metaBinary = readBinary(tag.metaSize.toInt()) val meta: Meta = metaFormat.readObjectFrom(metaBinary) - return PartialEnvelope(meta, (version.tagSize + tag.metaSize).toInt(), tag.dataSize) + SimpleEnvelope(meta, binary.view((version.tagSize + tag.metaSize).toInt(), tag.dataSize.toInt())) } private data class Tag( @@ -155,20 +154,16 @@ public class TaggedEnvelopeFormat( private val default by lazy { build(Global, Meta.EMPTY) } - override fun readPartial(input: Input): PartialEnvelope = - default.run { readPartial(input) } + override fun readObject(binary: Binary): Envelope = + default.run { readObject(binary) } - override fun writeEnvelope( + override fun writeObject( output: Output, - envelope: Envelope, - metaFormatFactory: MetaFormatFactory, - formatMeta: Meta, + obj: Envelope, ): Unit = default.run { - writeEnvelope( + writeObject( output, - envelope, - metaFormatFactory, - formatMeta + obj, ) } diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaglessEnvelopeFormat.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaglessEnvelopeFormat.kt index 4e2bacfc..e6dc054f 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaglessEnvelopeFormat.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaglessEnvelopeFormat.kt @@ -1,64 +1,54 @@ package space.kscience.dataforge.io -import io.ktor.utils.io.core.* +import io.ktor.utils.io.core.ByteReadPacket +import io.ktor.utils.io.core.Input +import io.ktor.utils.io.core.Output +import io.ktor.utils.io.core.readUTF8UntilDelimiterTo import space.kscience.dataforge.context.Context import space.kscience.dataforge.context.Global import space.kscience.dataforge.meta.Meta -import space.kscience.dataforge.meta.get import space.kscience.dataforge.meta.isEmpty -import space.kscience.dataforge.meta.string import space.kscience.dataforge.names.Name import space.kscience.dataforge.names.plus -import kotlin.collections.set /** - * A text envelope format with human-readable tag. + * A text envelope format based on block separators. * TODO add description */ public class TaglessEnvelopeFormat( public val io: IOPlugin, public val meta: Meta = Meta.EMPTY, + public val metaFormatFactory: MetaFormatFactory = JsonMetaFormat, ) : EnvelopeFormat { - private val metaStart = meta[META_START_PROPERTY].string ?: DEFAULT_META_START - private val dataStart = meta[DATA_START_PROPERTY].string ?: DEFAULT_DATA_START +// private val metaStart = meta[META_START_PROPERTY].string ?: DEFAULT_META_START +// private val dataStart = meta[DATA_START_PROPERTY].string ?: DEFAULT_DATA_START - private fun Output.writeProperty(key: String, value: Any) { - writeFully("#? $key: $value;\r\n".encodeToByteArray()) - } +// private fun Output.writeProperty(key: String, value: Any) { +// writeFully("#? $key: $value;\r\n".encodeToByteArray()) +// } - override fun writeEnvelope( + override fun writeObject( output: Output, - envelope: Envelope, - metaFormatFactory: MetaFormatFactory, - formatMeta: Meta, + obj: Envelope, ) { - val metaFormat = metaFormatFactory.build(this.io.context, formatMeta) + val metaFormat = metaFormatFactory.build(this.io.context, meta) //printing header output.writeRawString(TAGLESS_ENVELOPE_HEADER + "\r\n") - //printing all properties - output.writeProperty(META_TYPE_PROPERTY, - metaFormatFactory.shortName) - //TODO add optional metaFormat properties - val actualSize: Int = envelope.data?.size ?: 0 - - output.writeProperty(DATA_LENGTH_PROPERTY, actualSize) - //Printing meta - if (!envelope.meta.isEmpty()) { - val metaBinary = Binary(envelope.meta, metaFormat) - output.writeProperty(META_LENGTH_PROPERTY, - metaBinary.size + 2) - output.writeUtf8String(this.metaStart + "\r\n") + if (!obj.meta.isEmpty()) { + val metaBinary = Binary(obj.meta, metaFormat) + output.writeUtf8String(META_START + "-${metaFormatFactory.shortName}\r\n") output.writeBinary(metaBinary) output.writeRawString("\r\n") } //Printing data - envelope.data?.let { data -> - output.writeUtf8String(this.dataStart + "\r\n") + obj.data?.let { data -> + //val actualSize: Int = envelope.data?.size ?: 0 + output.writeUtf8String(DATA_START + "\r\n") output.writeBinary(data) } } @@ -68,121 +58,46 @@ public class TaglessEnvelopeFormat( input.discardWithSeparator( TAGLESS_ENVELOPE_HEADER.encodeToByteArray(), atMost = 1024, - skipUntilEndOfLine = true ) - val properties = HashMap() - - var line = "" - while (line.isBlank() || line.startsWith("#?")) { - if (line.startsWith("#?")) { - val match = propertyPattern.find(line) - ?: error("Line $line does not match property declaration pattern") - val (key, value) = match.destructured - properties[key] = value - } - try { - line = ByteArray { - try { - input.readBytesWithSeparatorTo(this, byteArrayOf('\n'.code.toByte()), 1024) - } catch (ex: BufferLimitExceededException) { - throw IllegalStateException("Property line exceeds maximum line length (1024)", ex) - } - }.decodeToString().trim() - } catch (ex: EOFException) { - return SimpleEnvelope(Meta.EMPTY, Binary.EMPTY) - } - } - var meta: Meta = Meta.EMPTY - if (line.startsWith(metaStart)) { - val metaFormat = properties[META_TYPE_PROPERTY]?.let { io.resolveMetaFormat(it) } ?: JsonMetaFormat - val metaSize = properties[META_LENGTH_PROPERTY]?.toInt() - meta = if (metaSize != null) { - metaFormat.readObjectFrom(input.readBinary(metaSize)) - } else { - error("Can't partially read an envelope with undefined meta size") - } - } + var data: Binary? = null - //skip until data start input.discardWithSeparator( - dataStart.encodeToByteArray(), + SEPARATOR_PREFIX, atMost = 1024, - skipUntilEndOfLine = true ) - val data: Binary = if (properties.containsKey(DATA_LENGTH_PROPERTY)) { - input.readBinary(properties[DATA_LENGTH_PROPERTY]!!.toInt()) -// val bytes = ByteArray(properties[DATA_LENGTH_PROPERTY]!!.toInt()) -// readByteArray(bytes) -// bytes.asBinary() - } else { - input.readBytes().asBinary() - } + var header: String = ByteArray { + input.readUTF8UntilDelimiterTo(this, "\n") + }.decodeToString() - return SimpleEnvelope(meta, data) - } - - - override fun readPartial(input: Input): PartialEnvelope { - var offset = 0 - - //read preamble - - offset += input.discardWithSeparator( - TAGLESS_ENVELOPE_HEADER.encodeToByteArray(), - atMost = 1024, - skipUntilEndOfLine = true - ) - - val properties = HashMap() - - var line = "" - while (line.isBlank() || line.startsWith("#?")) { - if (line.startsWith("#?")) { - val match = propertyPattern.find(line) - ?: error("Line $line does not match property declaration pattern") - val (key, value) = match.destructured - properties[key] = value + while (!input.endOfInput) { + val block = ByteArray { + input.readWithSeparatorTo(this, SEPARATOR_PREFIX) } - try { - line = ByteArray { - val read = try { - input.readBytesWithSeparatorTo(this, byteArrayOf('\n'.code.toByte()), 1024) - } catch (ex: BufferLimitExceededException) { - throw IllegalStateException("Property line exceeds maximum line length (1024)", ex) - } - offset += read - }.decodeToString().trim() - } catch (ex: EOFException) { - return PartialEnvelope(Meta.EMPTY, offset, 0.toULong()) + + val nextHeader = ByteArray { + input.readWithSeparatorTo(this, "\n".encodeToByteArray()) + }.decodeToString() + + //terminate on end + if (header.startsWith("END")) break + + + if (header.startsWith("META")) { + //TODO check format + val metaFormat: MetaFormatFactory = JsonMetaFormat + meta = metaFormat.readMeta(ByteReadPacket(block)) } - } - var meta: Meta = Meta.EMPTY - - if (line.startsWith(metaStart)) { - val metaFormat = properties[META_TYPE_PROPERTY]?.let { io.resolveMetaFormat(it) } ?: JsonMetaFormat - val metaSize = properties[META_LENGTH_PROPERTY]?.toInt() - meta = if (metaSize != null) { - offset += metaSize - metaFormat.readObjectFrom(input.readBinary(metaSize)) - } else { - error("Can't partially read an envelope with undefined meta size") + if (header.startsWith("DATA")) { + data = block.asBinary() } + header = nextHeader } - - //skip until data start - offset += input.discardWithSeparator( - dataStart.encodeToByteArray(), - atMost = 1024, - skipUntilEndOfLine = true - ) - - val dataSize = properties[DATA_LENGTH_PROPERTY]?.toULong() - return PartialEnvelope(meta, offset, dataSize) + return Envelope(meta, data) } public companion object : EnvelopeFormatFactory { @@ -196,11 +111,17 @@ public class TaglessEnvelopeFormat( public const val TAGLESS_ENVELOPE_TYPE: String = "tagless" - public const val TAGLESS_ENVELOPE_HEADER: String = "#~DFTL~#" - public const val META_START_PROPERTY: String = "metaSeparator" - public const val DEFAULT_META_START: String = "#~META~#" - public const val DATA_START_PROPERTY: String = "dataSeparator" - public const val DEFAULT_DATA_START: String = "#~DATA~#" + public val SEPARATOR_PREFIX: ByteArray = "\n#~".encodeToByteArray() + + public const val TAGLESS_ENVELOPE_HEADER: String = "#~DFTL" + + // public const val META_START_PROPERTY: String = "metaSeparator" + public const val META_START: String = "#~META" + + // public const val DATA_START_PROPERTY: String = "dataSeparator" + public const val DATA_START: String = "#~DATA" + + public const val END: String = "#~END" public const val code: Int = 0x4446544c //DFTL @@ -210,20 +131,15 @@ public class TaglessEnvelopeFormat( private val default by lazy { build(Global, Meta.EMPTY) } - override fun readPartial(input: Input): PartialEnvelope = - default.run { readPartial(input) } + override fun readObject(binary: Binary): Envelope = default.run { readObject(binary) } - override fun writeEnvelope( + override fun writeObject( output: Output, - envelope: Envelope, - metaFormatFactory: MetaFormatFactory, - formatMeta: Meta, + obj: Envelope, ): Unit = default.run { - writeEnvelope( + writeObject( output, - envelope, - metaFormatFactory, - formatMeta + obj, ) } diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt index 5c17e9ac..ad03ba6e 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt @@ -7,7 +7,6 @@ import io.ktor.utils.io.core.* import io.ktor.utils.io.core.internal.ChunkBuffer import space.kscience.dataforge.meta.Meta import space.kscience.dataforge.misc.DFExperimental -import kotlin.math.min public fun Output.writeRawString(str: String) { writeFully(str.toByteArray(Charsets.ISO_8859_1)) @@ -31,26 +30,7 @@ public inline fun ByteArray(block: Output.() -> Unit): ByteArray = public inline fun Binary(block: Output.() -> Unit): Binary = ByteArray(block).asBinary() -/** - * View section of a [Binary] as an independent binary - */ -public class BinaryView(private val source: Binary, private val start: Int, override val size: Int) : Binary { - - init { - require(start > 0) - require(start + size <= source.size) { "View boundary is outside source binary size" } - } - - override fun read(offset: Int, atMost: Int, block: Input.() -> R): R = - source.read(start + offset, min(size, atMost), block) - - override suspend fun readSuspend(offset: Int, atMost: Int, block: suspend Input.() -> R): R = - source.readSuspend(start + offset, min(size, atMost), block) -} - -public fun Binary.view(start: Int, size: Int): BinaryView = BinaryView(this, start, size) - -public operator fun Binary.get(range: IntRange): BinaryView = view(range.first, range.last - range.first) +public operator fun Binary.get(range: IntRange): Binary = view(range.first, range.last - range.first) /** * Return inferred [EnvelopeFormat] if only one format could read given file. If no format accepts the binary, return null. If @@ -68,22 +48,6 @@ public fun IOPlugin.peekBinaryEnvelopeFormat(binary: Binary): EnvelopeFormat? { } } -/** - * Zero-copy read this binary as an envelope using given [this@toEnvelope] - */ -@DFExperimental -public fun EnvelopeFormat.readBinary(binary: Binary): Envelope { - val partialEnvelope: PartialEnvelope = binary.read { - run { - readPartial(this@read) - } - } - val offset: Int = partialEnvelope.dataOffset.toInt() - val size: Int = partialEnvelope.dataSize?.toInt() ?: (binary.size - offset) - val envelopeBinary = BinaryView(binary, offset, size) - return SimpleEnvelope(partialEnvelope.meta, envelopeBinary) -} - /** * A zero-copy read from */ @@ -92,9 +56,9 @@ public fun IOPlugin.readEnvelope( binary: Binary, readNonEnvelopes: Boolean = false, formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat, -): Envelope = formatPicker(binary)?.readBinary(binary) ?: if (readNonEnvelopes) { +): Envelope = formatPicker(binary)?.readObject(binary) ?: if (readNonEnvelopes) { // if no format accepts file, read it as binary - SimpleEnvelope(Meta.EMPTY, binary) + Envelope(Meta.EMPTY, binary) } else error("Can't infer format for $binary") @DFExperimental @@ -126,62 +90,70 @@ private class RingByteArray( private fun Int.forward(n: Int): Int = (this + n) % (buffer.size) - fun compare(inputArray: ByteArray): Boolean = when { + fun contentEquals(inputArray: ByteArray): Boolean = when { inputArray.size != buffer.size -> false size < buffer.size -> false else -> inputArray.indices.all { inputArray[it] == get(it) } } + } +private fun RingByteArray.toArray(): ByteArray = ByteArray(size) { get(it) } + /** - * Read [Input] into [output] until designated multy-byte [separator] and optionally continues until + * Read [Input] into [output] until designated multibyte [separator] and optionally continues until * the end of the line after it. Throw error if [separator] not found and [atMost] bytes are read. * Also fails if [separator] not found until the end of input. * * Separator itself is not read into Output. * + * @param errorOnEof if true error is thrown if separator is never encountered + * * @return bytes actually being read, including separator */ -public fun Input.readBytesWithSeparatorTo( +public fun Input.readWithSeparatorTo( output: Output, separator: ByteArray, atMost: Int = Int.MAX_VALUE, - skipUntilEndOfLine: Boolean = false, + errorOnEof: Boolean = false, ): Int { var counter = 0 val rb = RingByteArray(ByteArray(separator.size)) - var separatorFound = false takeWhile { buffer -> while (buffer.canRead()) { val byte = buffer.readByte() counter++ if (counter >= atMost) error("Maximum number of bytes to be read $atMost reached.") - //If end-of-line-search is on, terminate - if (separatorFound) { - if (endOfInput || byte == '\n'.code.toByte()) { - return counter - } - } else { - rb.push(byte) - if (rb.compare(separator)) { - separatorFound = true - if (!skipUntilEndOfLine) { - return counter - } - } else if (rb.isFull()) { - output.writeByte(rb[0]) - } + rb.push(byte) + if (rb.contentEquals(separator)) { + return counter + } else if (rb.isFull()) { + output.writeByte(rb[0]) } } !endOfInput } - error("Read to the end of input without encountering ${separator.decodeToString()}") + if (errorOnEof) { + error("Read to the end of input without encountering ${separator.decodeToString()}") + } else { + for(i in 1 until rb.size){ + output.writeByte(rb[i]) + } + counter += (rb.size - 1) + return counter + } +} + +public fun Input.discardLine(): Int { + return discardUntilDelimiter('\n'.code.toByte()).also { + discard(1) + }.toInt() + 1 } public fun Input.discardWithSeparator( separator: ByteArray, atMost: Int = Int.MAX_VALUE, - skipUntilEndOfLine: Boolean = false, + errorOnEof: Boolean = false, ): Int { val dummy: Output = object : Output(ChunkBuffer.Pool) { override fun closeDestination() { @@ -193,5 +165,5 @@ public fun Input.discardWithSeparator( } } - return readBytesWithSeparatorTo(dummy, separator, atMost, skipUntilEndOfLine) + return readWithSeparatorTo(dummy, separator, atMost, errorOnEof) } diff --git a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/EnvelopeFormatTest.kt b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/EnvelopeFormatTest.kt index f6106f4b..999f3175 100644 --- a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/EnvelopeFormatTest.kt +++ b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/EnvelopeFormatTest.kt @@ -1,8 +1,6 @@ package space.kscience.dataforge.io -import io.ktor.utils.io.core.ByteReadPacket -import io.ktor.utils.io.core.readDouble -import io.ktor.utils.io.core.writeDouble +import io.ktor.utils.io.core.readBytes import kotlin.test.Test import kotlin.test.assertEquals @@ -14,40 +12,52 @@ class EnvelopeFormatTest { "d" put 22.2 } data { - writeDouble(22.2) -// repeat(2000){ -// writeInt(it) -// } + writeUtf8String("12345678") } } @Test - fun testTaggedFormat() { - TaggedEnvelopeFormat.run { - val byteArray = writeToByteArray(envelope) - //println(byteArray.decodeToString()) - val res = readFromByteArray(byteArray) - assertEquals(envelope.meta, res.meta) - val double = res.data?.read { - readDouble() - } - assertEquals(22.2, double) + fun testTaggedFormat() = with(TaggedEnvelopeFormat) { + val byteArray = writeToByteArray(envelope) + val res = readFromByteArray(byteArray) + assertEquals(envelope.meta, res.meta) + val bytes = res.data?.read { + readBytes() } + assertEquals("12345678", bytes?.decodeToString()) } @Test - fun testTaglessFormat() { - TaglessEnvelopeFormat.run { - val byteArray = writeToByteArray(envelope) - //println(byteArray.decodeToString()) - val partial = readPartial(ByteReadPacket(byteArray)) - assertEquals(8, partial.dataSize?.toInt()) - val res = readFromByteArray(byteArray) - assertEquals(envelope.meta, res.meta) - val double = res.data?.read { - readDouble() - } - assertEquals(22.2, double) + fun testTaglessFormat() = with(TaglessEnvelopeFormat) { + val byteArray = writeToByteArray(envelope) + println(byteArray.decodeToString()) + val res = readFromByteArray(byteArray) + assertEquals(envelope.meta, res.meta) + val bytes = res.data?.read { + readBytes() } + assertEquals("12345678", bytes?.decodeToString()) + } + + @Test + fun testManualDftl(){ + val envelopeString = """ + #~DFTL + #~META + { + "@envelope": { + "type": "test.format" + }, + "d": 22.2 + } + #~DATA + 12345678 + """.trimIndent() + val res = TaglessEnvelopeFormat.readFromByteArray(envelopeString.encodeToByteArray()) + assertEquals(envelope.meta, res.meta) + val bytes = res.data?.read { + readBytes() + } + assertEquals("12345678", bytes?.decodeToString()) } } \ No newline at end of file diff --git a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt index a2583bb1..f02066da 100644 --- a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt +++ b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt @@ -31,7 +31,7 @@ class IOTest { binary.read { val array = ByteArray { - val read = readBytesWithSeparatorTo(this, "---".encodeToByteArray(), skipUntilEndOfLine = true) + val read = readWithSeparatorTo(this, "---".encodeToByteArray()) + discardLine() assertEquals(12, read) } assertEquals(""" @@ -43,13 +43,13 @@ class IOTest { assertFails { binary.read { - discardWithSeparator("---".encodeToByteArray(), atMost = 3) + discardWithSeparator("---".encodeToByteArray(), atMost = 3 ) } } assertFails { binary.read{ - discardWithSeparator("-+-".encodeToByteArray()) + discardWithSeparator("-+-".encodeToByteArray(), errorOnEof = true) } } diff --git a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/MultipartTest.kt b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/MultipartTest.kt index 6315d744..2412b969 100644 --- a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/MultipartTest.kt +++ b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/MultipartTest.kt @@ -27,12 +27,12 @@ class MultipartTest { } val partsEnvelope = Envelope { - envelopes(envelopes, TaglessEnvelopeFormat) + envelopes(envelopes) } @Test fun testParts() { - val format = TaglessEnvelopeFormat + val format = TaggedEnvelopeFormat val singleEnvelopeData = Binary(envelopes[0], format) val singleEnvelopeSize = singleEnvelopeData.size val bytes = Binary(partsEnvelope, format) @@ -40,7 +40,7 @@ class MultipartTest { val reconstructed = bytes.readWith(format) println(reconstructed.meta) val parts = reconstructed.parts() - val envelope = parts[2].envelope(io) + val envelope = parts[2].envelope() assertEquals(2, envelope.meta["value"].int) println(reconstructed.data!!.size) } diff --git a/dataforge-io/src/jvmMain/kotlin/space/kscience/dataforge/io/fileIO.kt b/dataforge-io/src/jvmMain/kotlin/space/kscience/dataforge/io/fileIO.kt index 64f4907e..cf88d3ae 100644 --- a/dataforge-io/src/jvmMain/kotlin/space/kscience/dataforge/io/fileIO.kt +++ b/dataforge-io/src/jvmMain/kotlin/space/kscience/dataforge/io/fileIO.kt @@ -36,6 +36,8 @@ internal class PathBinary( } return ByteReadPacket(array).block() } + + override fun view(offset: Int, binarySize: Int) = PathBinary(path, fileOffset + offset, binarySize) } public fun Path.asBinary(): Binary = PathBinary(this) @@ -73,15 +75,7 @@ public fun Path.rewrite(block: Output.() -> Unit): Unit { } @DFExperimental -public fun EnvelopeFormat.readFile(path: Path): Envelope { - val partialEnvelope: PartialEnvelope = path.asBinary().read { - readPartial(this@read) - } - val offset: Int = partialEnvelope.dataOffset.toInt() - val size: Int = partialEnvelope.dataSize?.toInt() ?: (Files.size(path).toInt() - offset) - val binary = PathBinary(path, offset, size) - return SimpleEnvelope(partialEnvelope.meta, binary) -} +public fun EnvelopeFormat.readFile(path: Path): Envelope = readObject(path.asBinary()) /** * Resolve IOFormat based on type @@ -239,10 +233,9 @@ public fun IOPlugin.writeEnvelopeFile( path: Path, envelope: Envelope, envelopeFormat: EnvelopeFormat = TaggedEnvelopeFormat, - metaFormat: MetaFormatFactory? = null, ) { path.rewrite { - envelopeFormat.writeEnvelope(this, envelope, metaFormat ?: envelopeFormat.defaultMetaFormat) + envelopeFormat.writeObject(this, envelope) } } diff --git a/dataforge-workspace/src/commonMain/kotlin/space/kscience/dataforge/workspace/envelopeData.kt b/dataforge-workspace/src/commonMain/kotlin/space/kscience/dataforge/workspace/envelopeData.kt index d88a7333..39bb0726 100644 --- a/dataforge-workspace/src/commonMain/kotlin/space/kscience/dataforge/workspace/envelopeData.kt +++ b/dataforge-workspace/src/commonMain/kotlin/space/kscience/dataforge/workspace/envelopeData.kt @@ -17,5 +17,5 @@ public fun Envelope.toData(format: IOReader): Data = Data(format public suspend fun Data.toEnvelope(format: IOWriter): Envelope { val obj = await() val binary = Binary(obj, format) - return SimpleEnvelope(meta, binary) + return Envelope(meta, binary) } \ No newline at end of file diff --git a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt index 21151113..f94a3090 100644 --- a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt +++ b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/fileData.kt @@ -30,7 +30,6 @@ import kotlin.io.path.nameWithoutExtension import kotlin.io.path.readAttributes import kotlin.reflect.KType import kotlin.reflect.typeOf -import kotlin.streams.toList //public typealias FileFormatResolver = (Path, Meta) -> IOFormat @@ -193,7 +192,6 @@ public suspend fun IOPlugin.writeDataDirectory( tree: DataTree, format: IOWriter, envelopeFormat: EnvelopeFormat? = null, - metaFormat: MetaFormatFactory? = null, ) { withContext(Dispatchers.IO) { if (!Files.exists(path)) { @@ -210,15 +208,15 @@ public suspend fun IOPlugin.writeDataDirectory( is DataTreeItem.Leaf -> { val envelope = item.data.toEnvelope(format) if (envelopeFormat != null) { - writeEnvelopeFile(childPath, envelope, envelopeFormat, metaFormat) + writeEnvelopeFile(childPath, envelope, envelopeFormat) } else { - writeEnvelopeDirectory(childPath, envelope, metaFormat ?: JsonMetaFormat) + writeEnvelopeDirectory(childPath, envelope) } } } } val treeMeta = tree.meta - writeMetaFile(path, treeMeta, metaFormat ?: JsonMetaFormat) + writeMetaFile(path, treeMeta) } } diff --git a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/zipData.kt b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/zipData.kt index b596d1cb..706a2d0b 100644 --- a/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/zipData.kt +++ b/dataforge-workspace/src/jvmMain/kotlin/space/kscience/dataforge/workspace/zipData.kt @@ -29,7 +29,7 @@ private suspend fun ZipOutputStream.writeNode( val entry = ZipEntry(name) putNextEntry(entry) asOutput().run { - envelopeFormat.writeEnvelope(this, envelope) + envelopeFormat.writeObject(this, envelope) flush() } } diff --git a/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/CachingWorkspaceTest.kt b/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/CachingWorkspaceTest.kt index 49265ca0..8021c29f 100644 --- a/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/CachingWorkspaceTest.kt +++ b/dataforge-workspace/src/jvmTest/kotlin/space/kscience/dataforge/workspace/CachingWorkspaceTest.kt @@ -23,13 +23,13 @@ internal class CachingWorkspaceTest { useCache() val doFirst by task { - pipeFrom(data()) { _, name, meta -> + pipeFrom(data()) { _, name, _ -> println("Done first on $name with flag=${taskMeta["flag"].boolean ?: false}") } } val doSecond by task{ - pipeFrom(doFirst) { _, name, meta -> + pipeFrom(doFirst) { _, name, _ -> println("Done second on $name with flag=${taskMeta["flag"].boolean ?: false}") } } diff --git a/gradle.properties b/gradle.properties index 2542b286..0a7416e5 100644 --- a/gradle.properties +++ b/gradle.properties @@ -4,5 +4,6 @@ org.gradle.jvmargs=-Xmx4096m kotlin.code.style=official kotlin.mpp.stability.nowarn=true kotlin.incremental.js.ir=true +kotlin.native.ignoreDisabledTargets=true toolsVersion=0.13.3-kotlin-1.7.20