From 82d37f4b5582a4f8c955c0be7aa73727ae96135b Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Fri, 29 Apr 2022 18:35:44 +0300 Subject: [PATCH] Fix text envelope formats partial reads --- CHANGELOG.md | 2 + build.gradle.kts | 2 +- .../space/kscience/dataforge/data/Data.kt | 4 + .../io/yaml/FrontMatterEnvelopeFormat.kt | 62 ++++------- .../kscience/dataforge/io/yaml/YamlPlugin.kt | 2 + .../io/yaml/FrontMatterEnvelopeFormatTest.kt | 34 ++++++ .../kscience/dataforge/io/EnvelopeFormat.kt | 4 +- .../dataforge/io/TaggedEnvelopeFormat.kt | 4 +- .../dataforge/io/TaglessEnvelopeFormat.kt | 97 +++++++++------- .../space/kscience/dataforge/io/ioMisc.kt | 105 +++++++++++++++++- .../dataforge/io/EnvelopeFormatTest.kt | 15 ++- .../space/kscience/dataforge/io/IOTest.kt | 40 +++++++ .../kscience/dataforge/workspace/Workspace.kt | 5 +- 13 files changed, 284 insertions(+), 92 deletions(-) create mode 100644 dataforge-io/dataforge-io-yaml/src/commonTest/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormatTest.kt diff --git a/CHANGELOG.md b/CHANGELOG.md index 52af8804..26208ef7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - DataTree `items` call is blocking. - DataSet `getData` is no longer suspended and renamed to `get` - DataSet operates with sequences of data instead of flows +- PartialEnvelope uses `Int` instead `UInt`. ### Deprecated @@ -19,6 +20,7 @@ ### Fixed - Meta file name in readMeta from directory +- Tagless and FrontMatter envelope partial readers fix. ### Security diff --git a/build.gradle.kts b/build.gradle.kts index 977b44f8..9db78013 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -4,7 +4,7 @@ plugins { allprojects { group = "space.kscience" - version = "0.6.0-dev-2" + version = "0.6.0-dev-3" repositories{ mavenCentral() } diff --git a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/Data.kt b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/Data.kt index 8e681678..41182882 100644 --- a/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/Data.kt +++ b/dataforge-data/src/commonMain/kotlin/space/kscience/dataforge/data/Data.kt @@ -83,6 +83,10 @@ public class StaticData( override val meta: Meta = Meta.EMPTY, ) : Data, StaticGoal(value) +@Suppress("FunctionName") +public inline fun Data(value: T, meta: Meta = Meta.EMPTY): StaticData = + StaticData(typeOf(), value, meta) + @Suppress("FunctionName") @DFInternal public fun Data( diff --git a/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt b/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt index 001c8ef0..2734ebd9 100644 --- a/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt +++ b/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormat.kt @@ -2,8 +2,8 @@ package space.kscience.dataforge.io.yaml import io.ktor.utils.io.core.Input import io.ktor.utils.io.core.Output +import io.ktor.utils.io.core.buildPacket import io.ktor.utils.io.core.readBytes -import io.ktor.utils.io.core.readUTF8Line import space.kscience.dataforge.context.Context import space.kscience.dataforge.context.Global import space.kscience.dataforge.io.* @@ -11,6 +11,8 @@ import space.kscience.dataforge.io.IOFormat.Companion.META_KEY import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY import space.kscience.dataforge.meta.Meta import space.kscience.dataforge.misc.DFExperimental +import space.kscience.dataforge.names.Name +import space.kscience.dataforge.names.plus @DFExperimental public class FrontMatterEnvelopeFormat( @@ -19,51 +21,33 @@ public class FrontMatterEnvelopeFormat( ) : EnvelopeFormat { override fun readPartial(input: Input): PartialEnvelope { - var line: String - var offset = 0u - do { - line = input.readUTF8Line() ?: error("Input does not contain front matter separator") - offset += line.encodeToByteArray().size.toUInt() - } while (!line.startsWith(SEPARATOR)) + var offset = 0 - val readMetaFormat = - metaTypeRegex.matchEntire(line)?.groupValues?.first() - ?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat + offset += input.discardWithSeparator( + SEPARATOR.encodeToByteArray(), + atMost = 1024, + skipUntilEndOfLine = false + ) + + val line = input.readSafeUtf8Line() + val readMetaFormat = line.trim().takeIf { it.isNotBlank() }?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat //TODO replace by preview - val meta = Binary { - do { - line = input.readSafeUtf8Line() - writeUtf8String(line + "\r\n") - offset += line.encodeToByteArray().size.toUInt() - } while (!line.startsWith(SEPARATOR)) - }.read { - readMetaFormat.readMeta(input) - + val packet = buildPacket { + offset += input.readBytesWithSeparatorTo( + this, + SEPARATOR.encodeToByteArray(), + skipUntilEndOfLine = true + ) } + val meta = readMetaFormat.readMeta(packet) return PartialEnvelope(meta, offset, null) } override fun readObject(input: Input): Envelope { - var line: String - do { - line = input.readSafeUtf8Line() //?: error("Input does not contain front matter separator") - } while (!line.startsWith(SEPARATOR)) - - val readMetaFormat = - metaTypeRegex.matchEntire(line)?.groupValues?.first() - ?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat - - val meta = Binary { - do { - writeUtf8String(input.readSafeUtf8Line() + "\r\n") - } while (!line.startsWith(SEPARATOR)) - }.read { - readMetaFormat.readMeta(input) - } - val bytes = input.readBytes() - val data = bytes.asBinary() - return SimpleEnvelope(meta, data) + val partial = readPartial(input) + val data = input.readBytes().asBinary() + return SimpleEnvelope(partial.meta, data) } override fun writeEnvelope( @@ -92,6 +76,8 @@ public class FrontMatterEnvelopeFormat( private val metaTypeRegex = "---(\\w*)\\s*".toRegex() + override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + "frontMatter" + override fun build(context: Context, meta: Meta): EnvelopeFormat { return FrontMatterEnvelopeFormat(context.io, meta) } diff --git a/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/YamlPlugin.kt b/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/YamlPlugin.kt index 1e7530f9..dea3a38c 100644 --- a/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/YamlPlugin.kt +++ b/dataforge-io/dataforge-io-yaml/src/commonMain/kotlin/space/kscience/dataforge/io/yaml/YamlPlugin.kt @@ -4,6 +4,7 @@ import space.kscience.dataforge.context.AbstractPlugin import space.kscience.dataforge.context.Context import space.kscience.dataforge.context.PluginFactory import space.kscience.dataforge.context.PluginTag +import space.kscience.dataforge.io.EnvelopeFormatFactory import space.kscience.dataforge.io.IOPlugin import space.kscience.dataforge.io.MetaFormatFactory import space.kscience.dataforge.meta.Meta @@ -20,6 +21,7 @@ public class YamlPlugin(meta: Meta) : AbstractPlugin(meta) { override fun content(target: String): Map = when (target) { MetaFormatFactory.META_FORMAT_TYPE -> mapOf("yaml".asName() to YamlMetaFormat) + EnvelopeFormatFactory.ENVELOPE_FORMAT_TYPE -> mapOf(FrontMatterEnvelopeFormat.name to FrontMatterEnvelopeFormat) else -> super.content(target) } diff --git a/dataforge-io/dataforge-io-yaml/src/commonTest/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormatTest.kt b/dataforge-io/dataforge-io-yaml/src/commonTest/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormatTest.kt new file mode 100644 index 00000000..0c242597 --- /dev/null +++ b/dataforge-io/dataforge-io-yaml/src/commonTest/kotlin/space/kscience/dataforge/io/yaml/FrontMatterEnvelopeFormatTest.kt @@ -0,0 +1,34 @@ +package space.kscience.dataforge.io.yaml + +import space.kscience.dataforge.context.Context +import space.kscience.dataforge.io.io +import space.kscience.dataforge.io.readEnvelope +import space.kscience.dataforge.io.toByteArray +import space.kscience.dataforge.meta.get +import space.kscience.dataforge.meta.string +import kotlin.test.Test +import kotlin.test.assertEquals + +internal class FrontMatterEnvelopeFormatTest { + + val context = Context { + plugin(YamlPlugin) + } + + @Test + fun frontMatter(){ + val text = """ + --- + content_type: magprog + magprog_section: contacts + section_title: Контакты + language: ru + --- + Some text here + """.trimIndent() + + val envelope = context.io.readEnvelope(text) + assertEquals("Some text here", envelope.data!!.toByteArray().decodeToString().trim()) + assertEquals("magprog", envelope.meta["content_type"].string) + } +} \ No newline at end of file diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeFormat.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeFormat.kt index 9cb2cef0..4dea572b 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeFormat.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/EnvelopeFormat.kt @@ -14,7 +14,7 @@ import kotlin.reflect.typeOf /** * A partially read envelope with meta, but without data */ -public data class PartialEnvelope(val meta: Meta, val dataOffset: UInt, val dataSize: ULong?) +public data class PartialEnvelope(val meta: Meta, val dataOffset: Int, val dataSize: ULong?) public interface EnvelopeFormat : IOFormat { override val type: KType get() = typeOf() @@ -39,7 +39,6 @@ public fun EnvelopeFormat.read(input: Input): Envelope = readObject(input) @Type(ENVELOPE_FORMAT_TYPE) public interface EnvelopeFormatFactory : IOFormatFactory, EnvelopeFormat { - override val name: Name get() = "envelope".asName() override val type: KType get() = typeOf() override fun build(context: Context, meta: Meta): EnvelopeFormat @@ -51,6 +50,7 @@ public interface EnvelopeFormatFactory : IOFormatFactory, EnvelopeForm public fun peekFormat(io: IOPlugin, binary: Binary): EnvelopeFormat? public companion object { + public val ENVELOPE_FACTORY_NAME: Name = "envelope".asName() public const val ENVELOPE_FORMAT_TYPE: String = "io.format.envelope" } } \ No newline at end of file diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaggedEnvelopeFormat.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaggedEnvelopeFormat.kt index 96eaddb6..933dc343 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaggedEnvelopeFormat.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaggedEnvelopeFormat.kt @@ -92,7 +92,7 @@ public class TaggedEnvelopeFormat( val meta: Meta = metaFormat.readObject(metaBinary) - return PartialEnvelope(meta, version.tagSize + tag.metaSize, tag.dataSize) + return PartialEnvelope(meta, (version.tagSize + tag.metaSize).toInt(), tag.dataSize) } private data class Tag( @@ -117,7 +117,7 @@ public class TaggedEnvelopeFormat( private const val START_SEQUENCE = "#~" private const val END_SEQUENCE = "~#\r\n" - override val name: Name = super.name + "tagged" + override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + "tagged" override fun build(context: Context, meta: Meta): EnvelopeFormat { val io = context.io diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaglessEnvelopeFormat.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaglessEnvelopeFormat.kt index 56e3f582..9a0f4a98 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaglessEnvelopeFormat.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/TaglessEnvelopeFormat.kt @@ -10,7 +10,7 @@ import space.kscience.dataforge.meta.get import space.kscience.dataforge.meta.isEmpty import space.kscience.dataforge.meta.string import space.kscience.dataforge.names.Name -import space.kscience.dataforge.names.asName +import space.kscience.dataforge.names.plus import kotlin.collections.set /** @@ -33,7 +33,7 @@ public class TaglessEnvelopeFormat( output: Output, envelope: Envelope, metaFormatFactory: MetaFormatFactory, - formatMeta: Meta + formatMeta: Meta, ) { val metaFormat = metaFormatFactory.build(this.io.context, formatMeta) @@ -66,13 +66,16 @@ public class TaglessEnvelopeFormat( } override fun readObject(input: Input): Envelope { - var line: String - do { - line = input.readSafeUtf8Line() // ?: error("Input does not contain tagless envelope header") - } while (!line.startsWith(TAGLESS_ENVELOPE_HEADER)) + //read preamble + input.discardWithSeparator( + TAGLESS_ENVELOPE_HEADER.encodeToByteArray(), + atMost = 1024, + skipUntilEndOfLine = true + ) + val properties = HashMap() - line = "" + var line = "" while (line.isBlank() || line.startsWith("#?")) { if (line.startsWith("#?")) { val match = propertyPattern.find(line) @@ -80,9 +83,17 @@ public class TaglessEnvelopeFormat( val (key, value) = match.destructured properties[key] = value } - //If can't read line, return envelope without data - if (input.endOfInput) return SimpleEnvelope(Meta.EMPTY, null) - line = input.readSafeUtf8Line() + try { + line = ByteArray { + try { + input.readBytesWithSeparatorTo(this, byteArrayOf('\n'.code.toByte()), 1024) + } catch (ex: BufferLimitExceededException) { + throw IllegalStateException("Property line exceeds maximum line length (1024)", ex) + } + }.decodeToString().trim() + } catch (ex: EOFException) { + return SimpleEnvelope(Meta.EMPTY, Binary.EMPTY) + } } var meta: Meta = Meta.EMPTY @@ -93,18 +104,16 @@ public class TaglessEnvelopeFormat( meta = if (metaSize != null) { metaFormat.readObject(input.readBinary(metaSize)) } else { - metaFormat.readObject(input) + error("Can't partially read an envelope with undefined meta size") } } - do { - try { - line = input.readSafeUtf8Line() - } catch (ex: EOFException) { - //returning an Envelope without data if end of input is reached - return SimpleEnvelope(meta, null) - } - } while (!line.startsWith(dataStart)) + //skip until data start + input.discardWithSeparator( + dataStart.encodeToByteArray(), + atMost = 1024, + skipUntilEndOfLine = true + ) val data: Binary = if (properties.containsKey(DATA_LENGTH_PROPERTY)) { input.readBinary(properties[DATA_LENGTH_PROPERTY]!!.toInt()) @@ -112,24 +121,27 @@ public class TaglessEnvelopeFormat( // readByteArray(bytes) // bytes.asBinary() } else { - Binary { - input.copyTo(this) - } + input.readBytes().asBinary() } return SimpleEnvelope(meta, data) } + override fun readPartial(input: Input): PartialEnvelope { - var offset = 0u - var line: String - do { - line = input.readSafeUtf8Line()// ?: error("Input does not contain tagless envelope header") - offset += line.encodeToByteArray().size.toUInt() - } while (!line.startsWith(TAGLESS_ENVELOPE_HEADER)) + var offset = 0 + + //read preamble + + offset += input.discardWithSeparator( + TAGLESS_ENVELOPE_HEADER.encodeToByteArray(), + atMost = 1024, + skipUntilEndOfLine = true + ) + val properties = HashMap() - line = "" + var line = "" while (line.isBlank() || line.startsWith("#?")) { if (line.startsWith("#?")) { val match = propertyPattern.find(line) @@ -138,10 +150,16 @@ public class TaglessEnvelopeFormat( properties[key] = value } try { - line = input.readSafeUtf8Line() - offset += line.encodeToByteArray().size.toUInt() + line = ByteArray { + val read = try { + input.readBytesWithSeparatorTo(this, byteArrayOf('\n'.code.toByte()), 1024) + } catch (ex: BufferLimitExceededException) { + throw IllegalStateException("Property line exceeds maximum line length (1024)", ex) + } + offset += read + }.decodeToString().trim() } catch (ex: EOFException) { - return PartialEnvelope(Meta.EMPTY, offset.toUInt(), 0.toULong()) + return PartialEnvelope(Meta.EMPTY, offset, 0.toULong()) } } @@ -151,18 +169,19 @@ public class TaglessEnvelopeFormat( val metaFormat = properties[META_TYPE_PROPERTY]?.let { io.resolveMetaFormat(it) } ?: JsonMetaFormat val metaSize = properties[META_LENGTH_PROPERTY]?.toInt() meta = if (metaSize != null) { - offset += metaSize.toUInt() + offset += metaSize metaFormat.readObject(input.readBinary(metaSize)) } else { error("Can't partially read an envelope with undefined meta size") } } - do { - line = input.readSafeUtf8Line() //?: return PartialEnvelope(Meta.EMPTY, offset.toUInt(), 0.toULong()) - offset += line.encodeToByteArray().size.toUInt() - //returning an Envelope without data if end of input is reached - } while (!line.startsWith(dataStart)) + //skip until data start + offset += input.discardWithSeparator( + dataStart.encodeToByteArray(), + atMost = 1024, + skipUntilEndOfLine = true + ) val dataSize = properties[DATA_LENGTH_PROPERTY]?.toULong() return PartialEnvelope(meta, offset, dataSize) @@ -192,7 +211,7 @@ public class TaglessEnvelopeFormat( public const val code: Int = 0x4446544c //DFTL - override val name: Name = TAGLESS_ENVELOPE_TYPE.asName() + override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + TAGLESS_ENVELOPE_TYPE override fun build(context: Context, meta: Meta): EnvelopeFormat = TaglessEnvelopeFormat(context.io, meta) diff --git a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt index acb16916..e72c0eed 100644 --- a/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt +++ b/dataforge-io/src/commonMain/kotlin/space/kscience/dataforge/io/ioMisc.kt @@ -1,8 +1,10 @@ package space.kscience.dataforge.io +import io.ktor.utils.io.bits.Memory import io.ktor.utils.io.charsets.Charsets import io.ktor.utils.io.charsets.decodeExactBytes import io.ktor.utils.io.core.* +import io.ktor.utils.io.core.internal.ChunkBuffer import space.kscience.dataforge.meta.Meta import space.kscience.dataforge.misc.DFExperimental import kotlin.math.min @@ -86,11 +88,110 @@ public fun EnvelopeFormat.readBinary(binary: Binary): Envelope { * A zero-copy read from */ @DFExperimental -public fun IOPlugin.readEnvelopeBinary( +public fun IOPlugin.readEnvelope( binary: Binary, readNonEnvelopes: Boolean = false, formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat, ): Envelope = formatPicker(binary)?.readBinary(binary) ?: if (readNonEnvelopes) { // if no format accepts file, read it as binary SimpleEnvelope(Meta.EMPTY, binary) -} else error("Can't infer format for $binary") \ No newline at end of file +} else error("Can't infer format for $binary") + +@DFExperimental +public fun IOPlugin.readEnvelope( + string: String, + readNonEnvelopes: Boolean = false, + formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat, +): Envelope = readEnvelope(string.encodeToByteArray().asBinary(), readNonEnvelopes, formatPicker) + + +private class RingByteArray( + private val buffer: ByteArray, + private var startIndex: Int = 0, + var size: Int = 0, +) { + operator fun get(index: Int): Byte { + require(index >= 0) { "Index must be positive" } + require(index < size) { "Index $index is out of circular buffer size $size" } + return buffer[startIndex.forward(index)] + } + + fun isFull(): Boolean = size == buffer.size + + fun push(element: Byte) { + buffer[startIndex.forward(size)] = element + if (isFull()) startIndex++ else size++ + + } + + private fun Int.forward(n: Int): Int = (this + n) % (buffer.size) + + fun compare(inputArray: ByteArray): Boolean = when { + inputArray.size != buffer.size -> false + size < buffer.size -> false + else -> inputArray.indices.all { inputArray[it] == get(it) } + } +} + +/** + * Read [Input] into [output] until designated multy-byte [separator] and optionally continues until + * the end of the line after it. Throw error if [separator] not found and [atMost] bytes are read. + * Also fails if [separator] not found until the end of input. + * + * Separator itself is not read into Output. + * + * @return bytes actually being read, including separator + */ +public fun Input.readBytesWithSeparatorTo( + output: Output, + separator: ByteArray, + atMost: Int = Int.MAX_VALUE, + skipUntilEndOfLine: Boolean = false, +): Int { + var counter = 0 + val rb = RingByteArray(ByteArray(separator.size)) + var separatorFound = false + takeWhile { buffer -> + while (buffer.canRead()) { + val byte = buffer.readByte() + counter++ + if (counter >= atMost) error("Maximum number of bytes to be read $atMost reached.") + //If end-of-line-search is on, terminate + if (separatorFound) { + if (endOfInput || byte == '\n'.code.toByte()) { + return counter + } + } else { + rb.push(byte) + if (rb.compare(separator)) { + separatorFound = true + if (!skipUntilEndOfLine) { + return counter + } + } else if (rb.isFull()) { + output.writeByte(rb[0]) + } + } + } + !endOfInput + } + error("Read to the end of input without encountering ${separator.decodeToString()}") +} + +public fun Input.discardWithSeparator( + separator: ByteArray, + atMost: Int = Int.MAX_VALUE, + skipUntilEndOfLine: Boolean = false, +): Int { + val dummy: Output = object :Output(ChunkBuffer.Pool){ + override fun closeDestination() { + // Do nothing + } + + override fun flush(source: Memory, offset: Int, length: Int) { + // Do nothing + } + } + + return readBytesWithSeparatorTo(dummy, separator, atMost, skipUntilEndOfLine) +} diff --git a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/EnvelopeFormatTest.kt b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/EnvelopeFormatTest.kt index edadc7f8..f6106f4b 100644 --- a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/EnvelopeFormatTest.kt +++ b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/EnvelopeFormatTest.kt @@ -1,5 +1,6 @@ package space.kscience.dataforge.io +import io.ktor.utils.io.core.ByteReadPacket import io.ktor.utils.io.core.readDouble import io.ktor.utils.io.core.writeDouble import kotlin.test.Test @@ -9,10 +10,10 @@ import kotlin.test.assertEquals class EnvelopeFormatTest { val envelope = Envelope { type = "test.format" - meta{ + meta { "d" put 22.2 } - data{ + data { writeDouble(22.2) // repeat(2000){ // writeInt(it) @@ -21,12 +22,12 @@ class EnvelopeFormatTest { } @Test - fun testTaggedFormat(){ + fun testTaggedFormat() { TaggedEnvelopeFormat.run { val byteArray = writeToByteArray(envelope) //println(byteArray.decodeToString()) val res = readFromByteArray(byteArray) - assertEquals(envelope.meta,res.meta) + assertEquals(envelope.meta, res.meta) val double = res.data?.read { readDouble() } @@ -35,12 +36,14 @@ class EnvelopeFormatTest { } @Test - fun testTaglessFormat(){ + fun testTaglessFormat() { TaglessEnvelopeFormat.run { val byteArray = writeToByteArray(envelope) //println(byteArray.decodeToString()) + val partial = readPartial(ByteReadPacket(byteArray)) + assertEquals(8, partial.dataSize?.toInt()) val res = readFromByteArray(byteArray) - assertEquals(envelope.meta,res.meta) + assertEquals(envelope.meta, res.meta) val double = res.data?.read { readDouble() } diff --git a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt index 197d1c30..a2583bb1 100644 --- a/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt +++ b/dataforge-io/src/commonTest/kotlin/space/kscience/dataforge/io/IOTest.kt @@ -2,8 +2,10 @@ package space.kscience.dataforge.io import io.ktor.utils.io.core.ByteReadPacket import io.ktor.utils.io.core.readBytes +import io.ktor.utils.io.core.readUTF8Line import kotlin.test.Test import kotlin.test.assertEquals +import kotlin.test.assertFails class IOTest { @Test @@ -14,4 +16,42 @@ class IOTest { val second = input.readBytes(4) assertEquals(4.toByte(), second[0]) } + + @Test + fun readUntilSeparator() { + val source = """ + aaa + bbb + --- + ccc + ddd + """.trimIndent() + + val binary = source.encodeToByteArray().asBinary() + + binary.read { + val array = ByteArray { + val read = readBytesWithSeparatorTo(this, "---".encodeToByteArray(), skipUntilEndOfLine = true) + assertEquals(12, read) + } + assertEquals(""" + aaa + bbb + """.trimIndent(),array.decodeToString().trim()) + assertEquals("ccc", readUTF8Line()?.trim()) + } + + assertFails { + binary.read { + discardWithSeparator("---".encodeToByteArray(), atMost = 3) + } + } + + assertFails { + binary.read{ + discardWithSeparator("-+-".encodeToByteArray()) + } + } + + } } \ No newline at end of file diff --git a/dataforge-workspace/src/commonMain/kotlin/space/kscience/dataforge/workspace/Workspace.kt b/dataforge-workspace/src/commonMain/kotlin/space/kscience/dataforge/workspace/Workspace.kt index 3aa2c0f4..6b120c76 100644 --- a/dataforge-workspace/src/commonMain/kotlin/space/kscience/dataforge/workspace/Workspace.kt +++ b/dataforge-workspace/src/commonMain/kotlin/space/kscience/dataforge/workspace/Workspace.kt @@ -1,6 +1,7 @@ package space.kscience.dataforge.workspace import space.kscience.dataforge.context.ContextAware +import space.kscience.dataforge.data.Data import space.kscience.dataforge.data.DataSet import space.kscience.dataforge.meta.Meta import space.kscience.dataforge.meta.MutableMeta @@ -34,7 +35,7 @@ public interface Workspace : ContextAware, Provider { return when (target) { "target", Meta.TYPE -> targets.mapKeys { Name.parse(it.key)} Task.TYPE -> tasks - //Data.TYPE -> data.flow().toMap() + Data.TYPE -> data.dataSequence().associateBy { it.name } else -> emptyMap() } } @@ -46,7 +47,7 @@ public interface Workspace : ContextAware, Provider { } public suspend fun produceData(taskName: Name, taskMeta: Meta, name: Name): TaskData<*>? = - produce(taskName, taskMeta).get(name) + produce(taskName, taskMeta)[name] public companion object { public const val TYPE: String = "workspace"