Fix text envelope formats partial reads
This commit is contained in:
parent
6d396368b7
commit
82d37f4b55
@ -12,6 +12,7 @@
|
|||||||
- DataTree `items` call is blocking.
|
- DataTree `items` call is blocking.
|
||||||
- DataSet `getData` is no longer suspended and renamed to `get`
|
- DataSet `getData` is no longer suspended and renamed to `get`
|
||||||
- DataSet operates with sequences of data instead of flows
|
- DataSet operates with sequences of data instead of flows
|
||||||
|
- PartialEnvelope uses `Int` instead `UInt`.
|
||||||
|
|
||||||
### Deprecated
|
### Deprecated
|
||||||
|
|
||||||
@ -19,6 +20,7 @@
|
|||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Meta file name in readMeta from directory
|
- Meta file name in readMeta from directory
|
||||||
|
- Tagless and FrontMatter envelope partial readers fix.
|
||||||
|
|
||||||
### Security
|
### Security
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ plugins {
|
|||||||
|
|
||||||
allprojects {
|
allprojects {
|
||||||
group = "space.kscience"
|
group = "space.kscience"
|
||||||
version = "0.6.0-dev-2"
|
version = "0.6.0-dev-3"
|
||||||
repositories{
|
repositories{
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
}
|
}
|
||||||
|
@ -83,6 +83,10 @@ public class StaticData<T : Any>(
|
|||||||
override val meta: Meta = Meta.EMPTY,
|
override val meta: Meta = Meta.EMPTY,
|
||||||
) : Data<T>, StaticGoal<T>(value)
|
) : Data<T>, StaticGoal<T>(value)
|
||||||
|
|
||||||
|
@Suppress("FunctionName")
|
||||||
|
public inline fun <reified T : Any> Data(value: T, meta: Meta = Meta.EMPTY): StaticData<T> =
|
||||||
|
StaticData(typeOf<T>(), value, meta)
|
||||||
|
|
||||||
@Suppress("FunctionName")
|
@Suppress("FunctionName")
|
||||||
@DFInternal
|
@DFInternal
|
||||||
public fun <T : Any> Data(
|
public fun <T : Any> Data(
|
||||||
|
@ -2,8 +2,8 @@ package space.kscience.dataforge.io.yaml
|
|||||||
|
|
||||||
import io.ktor.utils.io.core.Input
|
import io.ktor.utils.io.core.Input
|
||||||
import io.ktor.utils.io.core.Output
|
import io.ktor.utils.io.core.Output
|
||||||
|
import io.ktor.utils.io.core.buildPacket
|
||||||
import io.ktor.utils.io.core.readBytes
|
import io.ktor.utils.io.core.readBytes
|
||||||
import io.ktor.utils.io.core.readUTF8Line
|
|
||||||
import space.kscience.dataforge.context.Context
|
import space.kscience.dataforge.context.Context
|
||||||
import space.kscience.dataforge.context.Global
|
import space.kscience.dataforge.context.Global
|
||||||
import space.kscience.dataforge.io.*
|
import space.kscience.dataforge.io.*
|
||||||
@ -11,6 +11,8 @@ import space.kscience.dataforge.io.IOFormat.Companion.META_KEY
|
|||||||
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
|
import space.kscience.dataforge.io.IOFormat.Companion.NAME_KEY
|
||||||
import space.kscience.dataforge.meta.Meta
|
import space.kscience.dataforge.meta.Meta
|
||||||
import space.kscience.dataforge.misc.DFExperimental
|
import space.kscience.dataforge.misc.DFExperimental
|
||||||
|
import space.kscience.dataforge.names.Name
|
||||||
|
import space.kscience.dataforge.names.plus
|
||||||
|
|
||||||
@DFExperimental
|
@DFExperimental
|
||||||
public class FrontMatterEnvelopeFormat(
|
public class FrontMatterEnvelopeFormat(
|
||||||
@ -19,51 +21,33 @@ public class FrontMatterEnvelopeFormat(
|
|||||||
) : EnvelopeFormat {
|
) : EnvelopeFormat {
|
||||||
|
|
||||||
override fun readPartial(input: Input): PartialEnvelope {
|
override fun readPartial(input: Input): PartialEnvelope {
|
||||||
var line: String
|
var offset = 0
|
||||||
var offset = 0u
|
|
||||||
do {
|
|
||||||
line = input.readUTF8Line() ?: error("Input does not contain front matter separator")
|
|
||||||
offset += line.encodeToByteArray().size.toUInt()
|
|
||||||
} while (!line.startsWith(SEPARATOR))
|
|
||||||
|
|
||||||
val readMetaFormat =
|
offset += input.discardWithSeparator(
|
||||||
metaTypeRegex.matchEntire(line)?.groupValues?.first()
|
SEPARATOR.encodeToByteArray(),
|
||||||
?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat
|
atMost = 1024,
|
||||||
|
skipUntilEndOfLine = false
|
||||||
|
)
|
||||||
|
|
||||||
|
val line = input.readSafeUtf8Line()
|
||||||
|
val readMetaFormat = line.trim().takeIf { it.isNotBlank() }?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat
|
||||||
|
|
||||||
//TODO replace by preview
|
//TODO replace by preview
|
||||||
val meta = Binary {
|
val packet = buildPacket {
|
||||||
do {
|
offset += input.readBytesWithSeparatorTo(
|
||||||
line = input.readSafeUtf8Line()
|
this,
|
||||||
writeUtf8String(line + "\r\n")
|
SEPARATOR.encodeToByteArray(),
|
||||||
offset += line.encodeToByteArray().size.toUInt()
|
skipUntilEndOfLine = true
|
||||||
} while (!line.startsWith(SEPARATOR))
|
)
|
||||||
}.read {
|
|
||||||
readMetaFormat.readMeta(input)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
val meta = readMetaFormat.readMeta(packet)
|
||||||
return PartialEnvelope(meta, offset, null)
|
return PartialEnvelope(meta, offset, null)
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun readObject(input: Input): Envelope {
|
override fun readObject(input: Input): Envelope {
|
||||||
var line: String
|
val partial = readPartial(input)
|
||||||
do {
|
val data = input.readBytes().asBinary()
|
||||||
line = input.readSafeUtf8Line() //?: error("Input does not contain front matter separator")
|
return SimpleEnvelope(partial.meta, data)
|
||||||
} while (!line.startsWith(SEPARATOR))
|
|
||||||
|
|
||||||
val readMetaFormat =
|
|
||||||
metaTypeRegex.matchEntire(line)?.groupValues?.first()
|
|
||||||
?.let { io.resolveMetaFormat(it) } ?: YamlMetaFormat
|
|
||||||
|
|
||||||
val meta = Binary {
|
|
||||||
do {
|
|
||||||
writeUtf8String(input.readSafeUtf8Line() + "\r\n")
|
|
||||||
} while (!line.startsWith(SEPARATOR))
|
|
||||||
}.read {
|
|
||||||
readMetaFormat.readMeta(input)
|
|
||||||
}
|
|
||||||
val bytes = input.readBytes()
|
|
||||||
val data = bytes.asBinary()
|
|
||||||
return SimpleEnvelope(meta, data)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun writeEnvelope(
|
override fun writeEnvelope(
|
||||||
@ -92,6 +76,8 @@ public class FrontMatterEnvelopeFormat(
|
|||||||
|
|
||||||
private val metaTypeRegex = "---(\\w*)\\s*".toRegex()
|
private val metaTypeRegex = "---(\\w*)\\s*".toRegex()
|
||||||
|
|
||||||
|
override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + "frontMatter"
|
||||||
|
|
||||||
override fun build(context: Context, meta: Meta): EnvelopeFormat {
|
override fun build(context: Context, meta: Meta): EnvelopeFormat {
|
||||||
return FrontMatterEnvelopeFormat(context.io, meta)
|
return FrontMatterEnvelopeFormat(context.io, meta)
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ import space.kscience.dataforge.context.AbstractPlugin
|
|||||||
import space.kscience.dataforge.context.Context
|
import space.kscience.dataforge.context.Context
|
||||||
import space.kscience.dataforge.context.PluginFactory
|
import space.kscience.dataforge.context.PluginFactory
|
||||||
import space.kscience.dataforge.context.PluginTag
|
import space.kscience.dataforge.context.PluginTag
|
||||||
|
import space.kscience.dataforge.io.EnvelopeFormatFactory
|
||||||
import space.kscience.dataforge.io.IOPlugin
|
import space.kscience.dataforge.io.IOPlugin
|
||||||
import space.kscience.dataforge.io.MetaFormatFactory
|
import space.kscience.dataforge.io.MetaFormatFactory
|
||||||
import space.kscience.dataforge.meta.Meta
|
import space.kscience.dataforge.meta.Meta
|
||||||
@ -20,6 +21,7 @@ public class YamlPlugin(meta: Meta) : AbstractPlugin(meta) {
|
|||||||
|
|
||||||
override fun content(target: String): Map<Name, Any> = when (target) {
|
override fun content(target: String): Map<Name, Any> = when (target) {
|
||||||
MetaFormatFactory.META_FORMAT_TYPE -> mapOf("yaml".asName() to YamlMetaFormat)
|
MetaFormatFactory.META_FORMAT_TYPE -> mapOf("yaml".asName() to YamlMetaFormat)
|
||||||
|
EnvelopeFormatFactory.ENVELOPE_FORMAT_TYPE -> mapOf(FrontMatterEnvelopeFormat.name to FrontMatterEnvelopeFormat)
|
||||||
else -> super.content(target)
|
else -> super.content(target)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,34 @@
|
|||||||
|
package space.kscience.dataforge.io.yaml
|
||||||
|
|
||||||
|
import space.kscience.dataforge.context.Context
|
||||||
|
import space.kscience.dataforge.io.io
|
||||||
|
import space.kscience.dataforge.io.readEnvelope
|
||||||
|
import space.kscience.dataforge.io.toByteArray
|
||||||
|
import space.kscience.dataforge.meta.get
|
||||||
|
import space.kscience.dataforge.meta.string
|
||||||
|
import kotlin.test.Test
|
||||||
|
import kotlin.test.assertEquals
|
||||||
|
|
||||||
|
internal class FrontMatterEnvelopeFormatTest {
|
||||||
|
|
||||||
|
val context = Context {
|
||||||
|
plugin(YamlPlugin)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun frontMatter(){
|
||||||
|
val text = """
|
||||||
|
---
|
||||||
|
content_type: magprog
|
||||||
|
magprog_section: contacts
|
||||||
|
section_title: Контакты
|
||||||
|
language: ru
|
||||||
|
---
|
||||||
|
Some text here
|
||||||
|
""".trimIndent()
|
||||||
|
|
||||||
|
val envelope = context.io.readEnvelope(text)
|
||||||
|
assertEquals("Some text here", envelope.data!!.toByteArray().decodeToString().trim())
|
||||||
|
assertEquals("magprog", envelope.meta["content_type"].string)
|
||||||
|
}
|
||||||
|
}
|
@ -14,7 +14,7 @@ import kotlin.reflect.typeOf
|
|||||||
/**
|
/**
|
||||||
* A partially read envelope with meta, but without data
|
* A partially read envelope with meta, but without data
|
||||||
*/
|
*/
|
||||||
public data class PartialEnvelope(val meta: Meta, val dataOffset: UInt, val dataSize: ULong?)
|
public data class PartialEnvelope(val meta: Meta, val dataOffset: Int, val dataSize: ULong?)
|
||||||
|
|
||||||
public interface EnvelopeFormat : IOFormat<Envelope> {
|
public interface EnvelopeFormat : IOFormat<Envelope> {
|
||||||
override val type: KType get() = typeOf<Envelope>()
|
override val type: KType get() = typeOf<Envelope>()
|
||||||
@ -39,7 +39,6 @@ public fun EnvelopeFormat.read(input: Input): Envelope = readObject(input)
|
|||||||
|
|
||||||
@Type(ENVELOPE_FORMAT_TYPE)
|
@Type(ENVELOPE_FORMAT_TYPE)
|
||||||
public interface EnvelopeFormatFactory : IOFormatFactory<Envelope>, EnvelopeFormat {
|
public interface EnvelopeFormatFactory : IOFormatFactory<Envelope>, EnvelopeFormat {
|
||||||
override val name: Name get() = "envelope".asName()
|
|
||||||
override val type: KType get() = typeOf<Envelope>()
|
override val type: KType get() = typeOf<Envelope>()
|
||||||
|
|
||||||
override fun build(context: Context, meta: Meta): EnvelopeFormat
|
override fun build(context: Context, meta: Meta): EnvelopeFormat
|
||||||
@ -51,6 +50,7 @@ public interface EnvelopeFormatFactory : IOFormatFactory<Envelope>, EnvelopeForm
|
|||||||
public fun peekFormat(io: IOPlugin, binary: Binary): EnvelopeFormat?
|
public fun peekFormat(io: IOPlugin, binary: Binary): EnvelopeFormat?
|
||||||
|
|
||||||
public companion object {
|
public companion object {
|
||||||
|
public val ENVELOPE_FACTORY_NAME: Name = "envelope".asName()
|
||||||
public const val ENVELOPE_FORMAT_TYPE: String = "io.format.envelope"
|
public const val ENVELOPE_FORMAT_TYPE: String = "io.format.envelope"
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -92,7 +92,7 @@ public class TaggedEnvelopeFormat(
|
|||||||
val meta: Meta = metaFormat.readObject(metaBinary)
|
val meta: Meta = metaFormat.readObject(metaBinary)
|
||||||
|
|
||||||
|
|
||||||
return PartialEnvelope(meta, version.tagSize + tag.metaSize, tag.dataSize)
|
return PartialEnvelope(meta, (version.tagSize + tag.metaSize).toInt(), tag.dataSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
private data class Tag(
|
private data class Tag(
|
||||||
@ -117,7 +117,7 @@ public class TaggedEnvelopeFormat(
|
|||||||
private const val START_SEQUENCE = "#~"
|
private const val START_SEQUENCE = "#~"
|
||||||
private const val END_SEQUENCE = "~#\r\n"
|
private const val END_SEQUENCE = "~#\r\n"
|
||||||
|
|
||||||
override val name: Name = super.name + "tagged"
|
override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + "tagged"
|
||||||
|
|
||||||
override fun build(context: Context, meta: Meta): EnvelopeFormat {
|
override fun build(context: Context, meta: Meta): EnvelopeFormat {
|
||||||
val io = context.io
|
val io = context.io
|
||||||
|
@ -10,7 +10,7 @@ import space.kscience.dataforge.meta.get
|
|||||||
import space.kscience.dataforge.meta.isEmpty
|
import space.kscience.dataforge.meta.isEmpty
|
||||||
import space.kscience.dataforge.meta.string
|
import space.kscience.dataforge.meta.string
|
||||||
import space.kscience.dataforge.names.Name
|
import space.kscience.dataforge.names.Name
|
||||||
import space.kscience.dataforge.names.asName
|
import space.kscience.dataforge.names.plus
|
||||||
import kotlin.collections.set
|
import kotlin.collections.set
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -33,7 +33,7 @@ public class TaglessEnvelopeFormat(
|
|||||||
output: Output,
|
output: Output,
|
||||||
envelope: Envelope,
|
envelope: Envelope,
|
||||||
metaFormatFactory: MetaFormatFactory,
|
metaFormatFactory: MetaFormatFactory,
|
||||||
formatMeta: Meta
|
formatMeta: Meta,
|
||||||
) {
|
) {
|
||||||
val metaFormat = metaFormatFactory.build(this.io.context, formatMeta)
|
val metaFormat = metaFormatFactory.build(this.io.context, formatMeta)
|
||||||
|
|
||||||
@ -66,13 +66,16 @@ public class TaglessEnvelopeFormat(
|
|||||||
}
|
}
|
||||||
|
|
||||||
override fun readObject(input: Input): Envelope {
|
override fun readObject(input: Input): Envelope {
|
||||||
var line: String
|
//read preamble
|
||||||
do {
|
input.discardWithSeparator(
|
||||||
line = input.readSafeUtf8Line() // ?: error("Input does not contain tagless envelope header")
|
TAGLESS_ENVELOPE_HEADER.encodeToByteArray(),
|
||||||
} while (!line.startsWith(TAGLESS_ENVELOPE_HEADER))
|
atMost = 1024,
|
||||||
|
skipUntilEndOfLine = true
|
||||||
|
)
|
||||||
|
|
||||||
val properties = HashMap<String, String>()
|
val properties = HashMap<String, String>()
|
||||||
|
|
||||||
line = ""
|
var line = ""
|
||||||
while (line.isBlank() || line.startsWith("#?")) {
|
while (line.isBlank() || line.startsWith("#?")) {
|
||||||
if (line.startsWith("#?")) {
|
if (line.startsWith("#?")) {
|
||||||
val match = propertyPattern.find(line)
|
val match = propertyPattern.find(line)
|
||||||
@ -80,9 +83,17 @@ public class TaglessEnvelopeFormat(
|
|||||||
val (key, value) = match.destructured
|
val (key, value) = match.destructured
|
||||||
properties[key] = value
|
properties[key] = value
|
||||||
}
|
}
|
||||||
//If can't read line, return envelope without data
|
try {
|
||||||
if (input.endOfInput) return SimpleEnvelope(Meta.EMPTY, null)
|
line = ByteArray {
|
||||||
line = input.readSafeUtf8Line()
|
try {
|
||||||
|
input.readBytesWithSeparatorTo(this, byteArrayOf('\n'.code.toByte()), 1024)
|
||||||
|
} catch (ex: BufferLimitExceededException) {
|
||||||
|
throw IllegalStateException("Property line exceeds maximum line length (1024)", ex)
|
||||||
|
}
|
||||||
|
}.decodeToString().trim()
|
||||||
|
} catch (ex: EOFException) {
|
||||||
|
return SimpleEnvelope(Meta.EMPTY, Binary.EMPTY)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var meta: Meta = Meta.EMPTY
|
var meta: Meta = Meta.EMPTY
|
||||||
@ -93,18 +104,16 @@ public class TaglessEnvelopeFormat(
|
|||||||
meta = if (metaSize != null) {
|
meta = if (metaSize != null) {
|
||||||
metaFormat.readObject(input.readBinary(metaSize))
|
metaFormat.readObject(input.readBinary(metaSize))
|
||||||
} else {
|
} else {
|
||||||
metaFormat.readObject(input)
|
error("Can't partially read an envelope with undefined meta size")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
//skip until data start
|
||||||
try {
|
input.discardWithSeparator(
|
||||||
line = input.readSafeUtf8Line()
|
dataStart.encodeToByteArray(),
|
||||||
} catch (ex: EOFException) {
|
atMost = 1024,
|
||||||
//returning an Envelope without data if end of input is reached
|
skipUntilEndOfLine = true
|
||||||
return SimpleEnvelope(meta, null)
|
)
|
||||||
}
|
|
||||||
} while (!line.startsWith(dataStart))
|
|
||||||
|
|
||||||
val data: Binary = if (properties.containsKey(DATA_LENGTH_PROPERTY)) {
|
val data: Binary = if (properties.containsKey(DATA_LENGTH_PROPERTY)) {
|
||||||
input.readBinary(properties[DATA_LENGTH_PROPERTY]!!.toInt())
|
input.readBinary(properties[DATA_LENGTH_PROPERTY]!!.toInt())
|
||||||
@ -112,24 +121,27 @@ public class TaglessEnvelopeFormat(
|
|||||||
// readByteArray(bytes)
|
// readByteArray(bytes)
|
||||||
// bytes.asBinary()
|
// bytes.asBinary()
|
||||||
} else {
|
} else {
|
||||||
Binary {
|
input.readBytes().asBinary()
|
||||||
input.copyTo(this)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return SimpleEnvelope(meta, data)
|
return SimpleEnvelope(meta, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
override fun readPartial(input: Input): PartialEnvelope {
|
override fun readPartial(input: Input): PartialEnvelope {
|
||||||
var offset = 0u
|
var offset = 0
|
||||||
var line: String
|
|
||||||
do {
|
//read preamble
|
||||||
line = input.readSafeUtf8Line()// ?: error("Input does not contain tagless envelope header")
|
|
||||||
offset += line.encodeToByteArray().size.toUInt()
|
offset += input.discardWithSeparator(
|
||||||
} while (!line.startsWith(TAGLESS_ENVELOPE_HEADER))
|
TAGLESS_ENVELOPE_HEADER.encodeToByteArray(),
|
||||||
|
atMost = 1024,
|
||||||
|
skipUntilEndOfLine = true
|
||||||
|
)
|
||||||
|
|
||||||
val properties = HashMap<String, String>()
|
val properties = HashMap<String, String>()
|
||||||
|
|
||||||
line = ""
|
var line = ""
|
||||||
while (line.isBlank() || line.startsWith("#?")) {
|
while (line.isBlank() || line.startsWith("#?")) {
|
||||||
if (line.startsWith("#?")) {
|
if (line.startsWith("#?")) {
|
||||||
val match = propertyPattern.find(line)
|
val match = propertyPattern.find(line)
|
||||||
@ -138,10 +150,16 @@ public class TaglessEnvelopeFormat(
|
|||||||
properties[key] = value
|
properties[key] = value
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
line = input.readSafeUtf8Line()
|
line = ByteArray {
|
||||||
offset += line.encodeToByteArray().size.toUInt()
|
val read = try {
|
||||||
|
input.readBytesWithSeparatorTo(this, byteArrayOf('\n'.code.toByte()), 1024)
|
||||||
|
} catch (ex: BufferLimitExceededException) {
|
||||||
|
throw IllegalStateException("Property line exceeds maximum line length (1024)", ex)
|
||||||
|
}
|
||||||
|
offset += read
|
||||||
|
}.decodeToString().trim()
|
||||||
} catch (ex: EOFException) {
|
} catch (ex: EOFException) {
|
||||||
return PartialEnvelope(Meta.EMPTY, offset.toUInt(), 0.toULong())
|
return PartialEnvelope(Meta.EMPTY, offset, 0.toULong())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,18 +169,19 @@ public class TaglessEnvelopeFormat(
|
|||||||
val metaFormat = properties[META_TYPE_PROPERTY]?.let { io.resolveMetaFormat(it) } ?: JsonMetaFormat
|
val metaFormat = properties[META_TYPE_PROPERTY]?.let { io.resolveMetaFormat(it) } ?: JsonMetaFormat
|
||||||
val metaSize = properties[META_LENGTH_PROPERTY]?.toInt()
|
val metaSize = properties[META_LENGTH_PROPERTY]?.toInt()
|
||||||
meta = if (metaSize != null) {
|
meta = if (metaSize != null) {
|
||||||
offset += metaSize.toUInt()
|
offset += metaSize
|
||||||
metaFormat.readObject(input.readBinary(metaSize))
|
metaFormat.readObject(input.readBinary(metaSize))
|
||||||
} else {
|
} else {
|
||||||
error("Can't partially read an envelope with undefined meta size")
|
error("Can't partially read an envelope with undefined meta size")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
//skip until data start
|
||||||
line = input.readSafeUtf8Line() //?: return PartialEnvelope(Meta.EMPTY, offset.toUInt(), 0.toULong())
|
offset += input.discardWithSeparator(
|
||||||
offset += line.encodeToByteArray().size.toUInt()
|
dataStart.encodeToByteArray(),
|
||||||
//returning an Envelope without data if end of input is reached
|
atMost = 1024,
|
||||||
} while (!line.startsWith(dataStart))
|
skipUntilEndOfLine = true
|
||||||
|
)
|
||||||
|
|
||||||
val dataSize = properties[DATA_LENGTH_PROPERTY]?.toULong()
|
val dataSize = properties[DATA_LENGTH_PROPERTY]?.toULong()
|
||||||
return PartialEnvelope(meta, offset, dataSize)
|
return PartialEnvelope(meta, offset, dataSize)
|
||||||
@ -192,7 +211,7 @@ public class TaglessEnvelopeFormat(
|
|||||||
|
|
||||||
public const val code: Int = 0x4446544c //DFTL
|
public const val code: Int = 0x4446544c //DFTL
|
||||||
|
|
||||||
override val name: Name = TAGLESS_ENVELOPE_TYPE.asName()
|
override val name: Name = EnvelopeFormatFactory.ENVELOPE_FACTORY_NAME + TAGLESS_ENVELOPE_TYPE
|
||||||
|
|
||||||
override fun build(context: Context, meta: Meta): EnvelopeFormat = TaglessEnvelopeFormat(context.io, meta)
|
override fun build(context: Context, meta: Meta): EnvelopeFormat = TaglessEnvelopeFormat(context.io, meta)
|
||||||
|
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
package space.kscience.dataforge.io
|
package space.kscience.dataforge.io
|
||||||
|
|
||||||
|
import io.ktor.utils.io.bits.Memory
|
||||||
import io.ktor.utils.io.charsets.Charsets
|
import io.ktor.utils.io.charsets.Charsets
|
||||||
import io.ktor.utils.io.charsets.decodeExactBytes
|
import io.ktor.utils.io.charsets.decodeExactBytes
|
||||||
import io.ktor.utils.io.core.*
|
import io.ktor.utils.io.core.*
|
||||||
|
import io.ktor.utils.io.core.internal.ChunkBuffer
|
||||||
import space.kscience.dataforge.meta.Meta
|
import space.kscience.dataforge.meta.Meta
|
||||||
import space.kscience.dataforge.misc.DFExperimental
|
import space.kscience.dataforge.misc.DFExperimental
|
||||||
import kotlin.math.min
|
import kotlin.math.min
|
||||||
@ -86,11 +88,110 @@ public fun EnvelopeFormat.readBinary(binary: Binary): Envelope {
|
|||||||
* A zero-copy read from
|
* A zero-copy read from
|
||||||
*/
|
*/
|
||||||
@DFExperimental
|
@DFExperimental
|
||||||
public fun IOPlugin.readEnvelopeBinary(
|
public fun IOPlugin.readEnvelope(
|
||||||
binary: Binary,
|
binary: Binary,
|
||||||
readNonEnvelopes: Boolean = false,
|
readNonEnvelopes: Boolean = false,
|
||||||
formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat,
|
formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat,
|
||||||
): Envelope = formatPicker(binary)?.readBinary(binary) ?: if (readNonEnvelopes) {
|
): Envelope = formatPicker(binary)?.readBinary(binary) ?: if (readNonEnvelopes) {
|
||||||
// if no format accepts file, read it as binary
|
// if no format accepts file, read it as binary
|
||||||
SimpleEnvelope(Meta.EMPTY, binary)
|
SimpleEnvelope(Meta.EMPTY, binary)
|
||||||
} else error("Can't infer format for $binary")
|
} else error("Can't infer format for $binary")
|
||||||
|
|
||||||
|
@DFExperimental
|
||||||
|
public fun IOPlugin.readEnvelope(
|
||||||
|
string: String,
|
||||||
|
readNonEnvelopes: Boolean = false,
|
||||||
|
formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat,
|
||||||
|
): Envelope = readEnvelope(string.encodeToByteArray().asBinary(), readNonEnvelopes, formatPicker)
|
||||||
|
|
||||||
|
|
||||||
|
private class RingByteArray(
|
||||||
|
private val buffer: ByteArray,
|
||||||
|
private var startIndex: Int = 0,
|
||||||
|
var size: Int = 0,
|
||||||
|
) {
|
||||||
|
operator fun get(index: Int): Byte {
|
||||||
|
require(index >= 0) { "Index must be positive" }
|
||||||
|
require(index < size) { "Index $index is out of circular buffer size $size" }
|
||||||
|
return buffer[startIndex.forward(index)]
|
||||||
|
}
|
||||||
|
|
||||||
|
fun isFull(): Boolean = size == buffer.size
|
||||||
|
|
||||||
|
fun push(element: Byte) {
|
||||||
|
buffer[startIndex.forward(size)] = element
|
||||||
|
if (isFull()) startIndex++ else size++
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun Int.forward(n: Int): Int = (this + n) % (buffer.size)
|
||||||
|
|
||||||
|
fun compare(inputArray: ByteArray): Boolean = when {
|
||||||
|
inputArray.size != buffer.size -> false
|
||||||
|
size < buffer.size -> false
|
||||||
|
else -> inputArray.indices.all { inputArray[it] == get(it) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read [Input] into [output] until designated multy-byte [separator] and optionally continues until
|
||||||
|
* the end of the line after it. Throw error if [separator] not found and [atMost] bytes are read.
|
||||||
|
* Also fails if [separator] not found until the end of input.
|
||||||
|
*
|
||||||
|
* Separator itself is not read into Output.
|
||||||
|
*
|
||||||
|
* @return bytes actually being read, including separator
|
||||||
|
*/
|
||||||
|
public fun Input.readBytesWithSeparatorTo(
|
||||||
|
output: Output,
|
||||||
|
separator: ByteArray,
|
||||||
|
atMost: Int = Int.MAX_VALUE,
|
||||||
|
skipUntilEndOfLine: Boolean = false,
|
||||||
|
): Int {
|
||||||
|
var counter = 0
|
||||||
|
val rb = RingByteArray(ByteArray(separator.size))
|
||||||
|
var separatorFound = false
|
||||||
|
takeWhile { buffer ->
|
||||||
|
while (buffer.canRead()) {
|
||||||
|
val byte = buffer.readByte()
|
||||||
|
counter++
|
||||||
|
if (counter >= atMost) error("Maximum number of bytes to be read $atMost reached.")
|
||||||
|
//If end-of-line-search is on, terminate
|
||||||
|
if (separatorFound) {
|
||||||
|
if (endOfInput || byte == '\n'.code.toByte()) {
|
||||||
|
return counter
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rb.push(byte)
|
||||||
|
if (rb.compare(separator)) {
|
||||||
|
separatorFound = true
|
||||||
|
if (!skipUntilEndOfLine) {
|
||||||
|
return counter
|
||||||
|
}
|
||||||
|
} else if (rb.isFull()) {
|
||||||
|
output.writeByte(rb[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
!endOfInput
|
||||||
|
}
|
||||||
|
error("Read to the end of input without encountering ${separator.decodeToString()}")
|
||||||
|
}
|
||||||
|
|
||||||
|
public fun Input.discardWithSeparator(
|
||||||
|
separator: ByteArray,
|
||||||
|
atMost: Int = Int.MAX_VALUE,
|
||||||
|
skipUntilEndOfLine: Boolean = false,
|
||||||
|
): Int {
|
||||||
|
val dummy: Output = object :Output(ChunkBuffer.Pool){
|
||||||
|
override fun closeDestination() {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun flush(source: Memory, offset: Int, length: Int) {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return readBytesWithSeparatorTo(dummy, separator, atMost, skipUntilEndOfLine)
|
||||||
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
package space.kscience.dataforge.io
|
package space.kscience.dataforge.io
|
||||||
|
|
||||||
|
import io.ktor.utils.io.core.ByteReadPacket
|
||||||
import io.ktor.utils.io.core.readDouble
|
import io.ktor.utils.io.core.readDouble
|
||||||
import io.ktor.utils.io.core.writeDouble
|
import io.ktor.utils.io.core.writeDouble
|
||||||
import kotlin.test.Test
|
import kotlin.test.Test
|
||||||
@ -9,10 +10,10 @@ import kotlin.test.assertEquals
|
|||||||
class EnvelopeFormatTest {
|
class EnvelopeFormatTest {
|
||||||
val envelope = Envelope {
|
val envelope = Envelope {
|
||||||
type = "test.format"
|
type = "test.format"
|
||||||
meta{
|
meta {
|
||||||
"d" put 22.2
|
"d" put 22.2
|
||||||
}
|
}
|
||||||
data{
|
data {
|
||||||
writeDouble(22.2)
|
writeDouble(22.2)
|
||||||
// repeat(2000){
|
// repeat(2000){
|
||||||
// writeInt(it)
|
// writeInt(it)
|
||||||
@ -21,12 +22,12 @@ class EnvelopeFormatTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun testTaggedFormat(){
|
fun testTaggedFormat() {
|
||||||
TaggedEnvelopeFormat.run {
|
TaggedEnvelopeFormat.run {
|
||||||
val byteArray = writeToByteArray(envelope)
|
val byteArray = writeToByteArray(envelope)
|
||||||
//println(byteArray.decodeToString())
|
//println(byteArray.decodeToString())
|
||||||
val res = readFromByteArray(byteArray)
|
val res = readFromByteArray(byteArray)
|
||||||
assertEquals(envelope.meta,res.meta)
|
assertEquals(envelope.meta, res.meta)
|
||||||
val double = res.data?.read {
|
val double = res.data?.read {
|
||||||
readDouble()
|
readDouble()
|
||||||
}
|
}
|
||||||
@ -35,12 +36,14 @@ class EnvelopeFormatTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun testTaglessFormat(){
|
fun testTaglessFormat() {
|
||||||
TaglessEnvelopeFormat.run {
|
TaglessEnvelopeFormat.run {
|
||||||
val byteArray = writeToByteArray(envelope)
|
val byteArray = writeToByteArray(envelope)
|
||||||
//println(byteArray.decodeToString())
|
//println(byteArray.decodeToString())
|
||||||
|
val partial = readPartial(ByteReadPacket(byteArray))
|
||||||
|
assertEquals(8, partial.dataSize?.toInt())
|
||||||
val res = readFromByteArray(byteArray)
|
val res = readFromByteArray(byteArray)
|
||||||
assertEquals(envelope.meta,res.meta)
|
assertEquals(envelope.meta, res.meta)
|
||||||
val double = res.data?.read {
|
val double = res.data?.read {
|
||||||
readDouble()
|
readDouble()
|
||||||
}
|
}
|
||||||
|
@ -2,8 +2,10 @@ package space.kscience.dataforge.io
|
|||||||
|
|
||||||
import io.ktor.utils.io.core.ByteReadPacket
|
import io.ktor.utils.io.core.ByteReadPacket
|
||||||
import io.ktor.utils.io.core.readBytes
|
import io.ktor.utils.io.core.readBytes
|
||||||
|
import io.ktor.utils.io.core.readUTF8Line
|
||||||
import kotlin.test.Test
|
import kotlin.test.Test
|
||||||
import kotlin.test.assertEquals
|
import kotlin.test.assertEquals
|
||||||
|
import kotlin.test.assertFails
|
||||||
|
|
||||||
class IOTest {
|
class IOTest {
|
||||||
@Test
|
@Test
|
||||||
@ -14,4 +16,42 @@ class IOTest {
|
|||||||
val second = input.readBytes(4)
|
val second = input.readBytes(4)
|
||||||
assertEquals(4.toByte(), second[0])
|
assertEquals(4.toByte(), second[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun readUntilSeparator() {
|
||||||
|
val source = """
|
||||||
|
aaa
|
||||||
|
bbb
|
||||||
|
---
|
||||||
|
ccc
|
||||||
|
ddd
|
||||||
|
""".trimIndent()
|
||||||
|
|
||||||
|
val binary = source.encodeToByteArray().asBinary()
|
||||||
|
|
||||||
|
binary.read {
|
||||||
|
val array = ByteArray {
|
||||||
|
val read = readBytesWithSeparatorTo(this, "---".encodeToByteArray(), skipUntilEndOfLine = true)
|
||||||
|
assertEquals(12, read)
|
||||||
|
}
|
||||||
|
assertEquals("""
|
||||||
|
aaa
|
||||||
|
bbb
|
||||||
|
""".trimIndent(),array.decodeToString().trim())
|
||||||
|
assertEquals("ccc", readUTF8Line()?.trim())
|
||||||
|
}
|
||||||
|
|
||||||
|
assertFails {
|
||||||
|
binary.read {
|
||||||
|
discardWithSeparator("---".encodeToByteArray(), atMost = 3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertFails {
|
||||||
|
binary.read{
|
||||||
|
discardWithSeparator("-+-".encodeToByteArray())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
@ -1,6 +1,7 @@
|
|||||||
package space.kscience.dataforge.workspace
|
package space.kscience.dataforge.workspace
|
||||||
|
|
||||||
import space.kscience.dataforge.context.ContextAware
|
import space.kscience.dataforge.context.ContextAware
|
||||||
|
import space.kscience.dataforge.data.Data
|
||||||
import space.kscience.dataforge.data.DataSet
|
import space.kscience.dataforge.data.DataSet
|
||||||
import space.kscience.dataforge.meta.Meta
|
import space.kscience.dataforge.meta.Meta
|
||||||
import space.kscience.dataforge.meta.MutableMeta
|
import space.kscience.dataforge.meta.MutableMeta
|
||||||
@ -34,7 +35,7 @@ public interface Workspace : ContextAware, Provider {
|
|||||||
return when (target) {
|
return when (target) {
|
||||||
"target", Meta.TYPE -> targets.mapKeys { Name.parse(it.key)}
|
"target", Meta.TYPE -> targets.mapKeys { Name.parse(it.key)}
|
||||||
Task.TYPE -> tasks
|
Task.TYPE -> tasks
|
||||||
//Data.TYPE -> data.flow().toMap()
|
Data.TYPE -> data.dataSequence().associateBy { it.name }
|
||||||
else -> emptyMap()
|
else -> emptyMap()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -46,7 +47,7 @@ public interface Workspace : ContextAware, Provider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public suspend fun produceData(taskName: Name, taskMeta: Meta, name: Name): TaskData<*>? =
|
public suspend fun produceData(taskName: Name, taskMeta: Meta, name: Name): TaskData<*>? =
|
||||||
produce(taskName, taskMeta).get(name)
|
produce(taskName, taskMeta)[name]
|
||||||
|
|
||||||
public companion object {
|
public companion object {
|
||||||
public const val TYPE: String = "workspace"
|
public const val TYPE: String = "workspace"
|
||||||
|
Loading…
Reference in New Issue
Block a user