Explicit postprocessor

This commit is contained in:
Alexander Nozik 2023-11-28 19:45:16 +03:00
parent c986ede110
commit eeaa080a88
7 changed files with 132 additions and 87 deletions

View File

@ -1,10 +1,23 @@
package space.kscience.snark package space.kscience.snark
import kotlinx.io.Source
import space.kscience.dataforge.io.IOReader import space.kscience.dataforge.io.IOReader
import space.kscience.dataforge.io.asBinary
import space.kscience.dataforge.misc.DfId import space.kscience.dataforge.misc.DfId
import space.kscience.snark.SnarkIOReader.Companion.DEFAULT_PRIORITY
import space.kscience.snark.SnarkIOReader.Companion.DF_TYPE import space.kscience.snark.SnarkIOReader.Companion.DF_TYPE
@DfId(DF_TYPE)
public interface SnarkIOReader<out T>: IOReader<T> {
public val types: Set<String>
public val priority: Int get() = DEFAULT_PRIORITY
public fun readFrom(source: String): T
public companion object {
public const val DF_TYPE: String = "snark.reader"
public const val DEFAULT_PRIORITY: Int = 10
}
}
/** /**
* A wrapper class for IOReader that adds priority and MIME type handling. * A wrapper class for IOReader that adds priority and MIME type handling.
* *
@ -13,24 +26,18 @@ import space.kscience.snark.SnarkIOReader.Companion.DF_TYPE
* @property types The set of supported types that can be read by the SnarkIOReader. * @property types The set of supported types that can be read by the SnarkIOReader.
* @property priority The priority of the SnarkIOReader. Higher priority SnarkIOReader instances will be preferred over lower priority ones. * @property priority The priority of the SnarkIOReader. Higher priority SnarkIOReader instances will be preferred over lower priority ones.
*/ */
@DfId(DF_TYPE)
public class SnarkIOReader<out T>( private class SnarkIOReaderWrapper<out T>(
private val reader: IOReader<T>, private val reader: IOReader<T>,
public val types: Set<String>, override val types: Set<String>,
public val priority: Int = DEFAULT_PRIORITY, override val priority: Int = DEFAULT_PRIORITY,
) : IOReader<T> by reader { ) : IOReader<T> by reader, SnarkIOReader<T> {
public fun readFrom(source: String): T{ override fun readFrom(source: String): T = readFrom(source.encodeToByteArray().asBinary())
}
public companion object {
public const val DF_TYPE: String = "snark.reader"
public const val DEFAULT_PRIORITY: Int = 10
}
} }
public fun <T : Any> SnarkIOReader( public fun <T : Any> SnarkIOReader(
reader: IOReader<T>, reader: IOReader<T>,
vararg types: String, vararg types: String,
): SnarkIOReader<T> = SnarkIOReader(reader, types.toSet()) priority: Int = DEFAULT_PRIORITY
): SnarkIOReader<T> = SnarkIOReaderWrapper(reader, types.toSet(), priority)

View File

@ -9,7 +9,7 @@ import space.kscience.dataforge.names.NameToken
@DfId(TextProcessor.DF_TYPE) @DfId(TextProcessor.DF_TYPE)
public fun interface TextProcessor { public fun interface TextProcessor {
public fun process(text: String): String public fun process(text: CharSequence): String
public companion object { public companion object {
public const val DF_TYPE: String = "snark.textTransformation" public const val DF_TYPE: String = "snark.textTransformation"

View File

@ -13,22 +13,18 @@ import space.kscience.snark.SnarkContext
//TODO replace by VisionForge type //TODO replace by VisionForge type
//typealias HtmlFragment = context(PageBuilder, TagConsumer<*>) () -> Unit
public fun interface HtmlFragment { public fun interface HtmlFragment {
public fun TagConsumer<*>.renderFragment(page: WebPage) public fun TagConsumer<*>.renderFragment()
//TODO move pageBuilder to a context receiver after KT-52967 is fixed
} }
public typealias HtmlData = Data<HtmlFragment> public typealias HtmlData = Data<HtmlFragment>
//fun HtmlData(meta: Meta, content: context(PageBuilder) TagConsumer<*>.() -> Unit): HtmlData =
// Data(HtmlFragment(content), meta)
context(WebPage) context(WebPage)
public fun FlowContent.htmlData(data: HtmlData): Unit = runBlocking(Dispatchers.IO) { public fun FlowContent.htmlData(data: HtmlData): Unit = runBlocking(Dispatchers.IO) {
with(data.await()) { consumer.renderFragment(page) } withSnarkPage(page) {
with(data.await()) { consumer.renderFragment() }
}
} }
context(SnarkContext) context(SnarkContext)

View File

@ -6,12 +6,15 @@ import io.ktor.http.ContentType
import kotlinx.io.readByteArray import kotlinx.io.readByteArray
import space.kscience.dataforge.context.* import space.kscience.dataforge.context.*
import space.kscience.dataforge.data.* import space.kscience.dataforge.data.*
import space.kscience.dataforge.io.Binary
import space.kscience.dataforge.io.IOPlugin import space.kscience.dataforge.io.IOPlugin
import space.kscience.dataforge.io.IOReader import space.kscience.dataforge.io.IOReader
import space.kscience.dataforge.io.JsonMetaFormat import space.kscience.dataforge.io.JsonMetaFormat
import space.kscience.dataforge.io.yaml.YamlMetaFormat import space.kscience.dataforge.io.yaml.YamlMetaFormat
import space.kscience.dataforge.io.yaml.YamlPlugin import space.kscience.dataforge.io.yaml.YamlPlugin
import space.kscience.dataforge.meta.* import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.misc.DFExperimental import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.* import space.kscience.dataforge.names.*
import space.kscience.dataforge.provider.dfId import space.kscience.dataforge.provider.dfId
@ -28,7 +31,7 @@ public fun <T : Any> SnarkIOReader(
reader: IOReader<T>, reader: IOReader<T>,
vararg types: ContentType, vararg types: ContentType,
priority: Int = SnarkIOReader.DEFAULT_PRIORITY, priority: Int = SnarkIOReader.DEFAULT_PRIORITY,
): SnarkIOReader<T> = SnarkIOReader(reader, types.map { it.toString() }.toSet(), priority) ): SnarkIOReader<T> = SnarkIOReader(reader, *types.map { it.toString() }.toTypedArray(), priority = priority)
/** /**
@ -59,10 +62,10 @@ public class SnarkHtml : WorkspacePlugin() {
override fun content(target: String): Map<Name, Any> = when (target) { override fun content(target: String): Map<Name, Any> = when (target) {
SnarkIOReader::class.dfId -> mapOf( SnarkIOReader::class.dfId -> mapOf(
"html".asName() to HtmlIOFormat.snarkReader, "html".asName() to HtmlReader,
"markdown".asName() to MarkdownIOFormat.snarkReader, "markdown".asName() to MarkdownReader,
"json".asName() to SnarkIOReader(JsonMetaFormat, ContentType.Application.Json), "json".asName() to SnarkIOReader(JsonMetaFormat, ContentType.Application.Json),
"yaml".asName() to SnarkIOReader(YamlMetaFormat, "text/yaml"), "yaml".asName() to SnarkIOReader(YamlMetaFormat, "text/yaml", "yaml"),
"png".asName() to SnarkIOReader(ImageIOReader, ContentType.Image.PNG), "png".asName() to SnarkIOReader(ImageIOReader, ContentType.Image.PNG),
"jpg".asName() to SnarkIOReader(ImageIOReader, ContentType.Image.JPEG), "jpg".asName() to SnarkIOReader(ImageIOReader, ContentType.Image.JPEG),
"gif".asName() to SnarkIOReader(ImageIOReader, ContentType.Image.GIF), "gif".asName() to SnarkIOReader(ImageIOReader, ContentType.Image.GIF),
@ -83,9 +86,15 @@ public class SnarkHtml : WorkspacePlugin() {
else -> super.content(target) else -> super.content(target)
} }
// public val assets: TaskReference<Binary> by task<Binary> {
// node(Name.EMPTY, from(allData).filter { name, meta ->
//
// })
// }
public val preprocess: TaskReference<String> by task<String> { public val preprocess: TaskReference<String> by task<String> {
pipeFrom<String,String>(dataByType<String>()) { text, _, meta -> pipeFrom<String, String>(dataByType<String>()) { text, _, meta ->
meta[TextProcessor.TEXT_TRANSFORMATION_KEY]?.let { meta[TextProcessor.TEXT_TRANSFORMATION_KEY]?.let {
snark.textProcessor(it).process(text) snark.textProcessor(it).process(text)
} ?: text } ?: text

View File

@ -0,0 +1,74 @@
package space.kscience.snark.html
import kotlinx.html.A
import kotlinx.html.FlowContent
import kotlinx.html.Tag
import kotlinx.html.TagConsumer
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.names.parseAsName
import space.kscience.snark.TextProcessor
public class WebPageTextProcessor(private val page: WebPage) : TextProcessor {
private val regex = """\$\{([\w.]*)(?>\("(.*)"\))?}""".toRegex()
/**
* A basic [TextProcessor] that replaces `${...}` expressions in text. The following expressions are recognised:
* * `homeRef` resolves to [homeRef]
* * `resolveRef("...")` -> [WebPage.resolveRef]
* * `resolvePageRef("...")` -> [WebPage.resolvePageRef]
* * `pageMeta.get("...") -> [WebPage.pageMeta] get string method
* Otherwise return unchanged string
*/
override fun process(text: CharSequence): String = text.replace(regex) { match ->
when (match.groups[1]!!.value) {
"homeRef" -> page.homeRef
"resolveRef" -> {
val refString = match.groups[2]?.value ?: error("resolveRef requires a string (quoted) argument")
page.resolveRef(refString)
}
"resolvePageRef" -> {
val refString = match.groups[2]?.value
?: error("resolvePageRef requires a string (quoted) argument")
page.localisedPageRef(refString.parseAsName())
}
"pageMeta.get" -> {
val nameString = match.groups[2]?.value
?: error("resolvePageRef requires a string (quoted) argument")
page.pageMeta[nameString.parseAsName()].string ?: "@null"
}
else -> match.value
}
}
}
public class WebPagePostprocessor<out R>(
public val page: WebPage,
private val consumer: TagConsumer<R>,
) : TagConsumer<R> by consumer {
private val processor = WebPageTextProcessor(page)
override fun onTagAttributeChange(tag: Tag, attribute: String, value: String?) {
if (tag is A && attribute == "href" && value != null) {
consumer.onTagAttributeChange(tag, attribute, processor.process(value))
} else {
consumer.onTagAttributeChange(tag, attribute, value)
}
}
override fun onTagContent(content: CharSequence) {
consumer.onTagContent(processor.process(content))
}
}
public inline fun FlowContent.withSnarkPage(page: WebPage, block: FlowContent.() -> Unit) {
val fc = object : FlowContent by this {
override val consumer: TagConsumer<*> = WebPagePostprocessor(page, this@withSnarkPage.consumer)
}
fc.block()
}

View File

@ -1,43 +0,0 @@
package space.kscience.snark.html
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.names.parseAsName
import space.kscience.snark.TextProcessor
/**
* A basic [TextProcessor] that replaces `${...}` expressions in text. The following expressions are recognised:
* * `homeRef` resolves to [homeRef]
* * `resolveRef("...")` -> [WebPage.resolveRef]
* * `resolvePageRef("...")` -> [WebPage.resolvePageRef]
* * `pageMeta.get("...") -> [WebPage.pageMeta] get string method
* Otherwise return unchanged string
*/
public class WebPagePreprocessor(public val page: WebPage) : TextProcessor {
private val regex = """\$\{([\w.]*)(?>\("(.*)"\))?}""".toRegex()
override fun process(text: String): String = text.replace(regex) { match ->
when (match.groups[1]!!.value) {
"homeRef" -> page.homeRef
"resolveRef" -> {
val refString = match.groups[2]?.value ?: error("resolveRef requires a string (quoted) argument")
page.resolveRef(refString)
}
"resolvePageRef" -> {
val refString = match.groups[2]?.value
?: error("resolvePageRef requires a string (quoted) argument")
page.localisedPageRef(refString.parseAsName())
}
"pageMeta.get" -> {
val nameString = match.groups[2]?.value
?: error("resolvePageRef requires a string (quoted) argument")
page.pageMeta[nameString.parseAsName()].string ?: "@null"
}
else -> match.value
}
}
}

View File

@ -8,34 +8,31 @@ import kotlinx.io.readString
import org.intellij.markdown.flavours.commonmark.CommonMarkFlavourDescriptor import org.intellij.markdown.flavours.commonmark.CommonMarkFlavourDescriptor
import org.intellij.markdown.html.HtmlGenerator import org.intellij.markdown.html.HtmlGenerator
import org.intellij.markdown.parser.MarkdownParser import org.intellij.markdown.parser.MarkdownParser
import space.kscience.dataforge.io.IOReader
import space.kscience.snark.SnarkIOReader import space.kscience.snark.SnarkIOReader
import kotlin.reflect.KType import kotlin.reflect.KType
import kotlin.reflect.typeOf import kotlin.reflect.typeOf
public object HtmlIOFormat : IOReader<HtmlFragment> { public object HtmlReader : SnarkIOReader<HtmlFragment> {
override val type: KType = typeOf<HtmlFragment>() override val types: Set<String> = setOf("html")
override fun readFrom(source: Source): HtmlFragment = HtmlFragment { page -> override fun readFrom(source: String): HtmlFragment = HtmlFragment {
div { div {
unsafe { +source.readString() } unsafe { +source }
} }
} }
public val snarkReader: SnarkIOReader<HtmlFragment> = SnarkIOReader(this, ContentType.Text.Html) override fun readFrom(source: Source): HtmlFragment = readFrom(source.readString())
override val type: KType = typeOf<HtmlFragment>()
} }
public object MarkdownIOFormat : IOReader<HtmlFragment> { public object MarkdownReader : SnarkIOReader<HtmlFragment> {
override val type: KType = typeOf<HtmlFragment>() override val type: KType = typeOf<HtmlFragment>()
private val markdownFlavor = CommonMarkFlavourDescriptor() override val types: Set<String> = setOf("text/markdown", "md", "markdown")
private val markdownParser = MarkdownParser(markdownFlavor)
override fun readFrom(source: Source): HtmlFragment = HtmlFragment { page -> override fun readFrom(source: String): HtmlFragment = HtmlFragment {
val transformedText = source.readString() val parsedTree = markdownParser.buildMarkdownTreeFromString(source)
val parsedTree = markdownParser.buildMarkdownTreeFromString(transformedText) val htmlString = HtmlGenerator(source, parsedTree, markdownFlavor).generateHtml()
val htmlString = HtmlGenerator(transformedText, parsedTree, markdownFlavor).generateHtml()
div { div {
unsafe { unsafe {
@ -44,6 +41,11 @@ public object MarkdownIOFormat : IOReader<HtmlFragment> {
} }
} }
private val markdownFlavor = CommonMarkFlavourDescriptor()
private val markdownParser = MarkdownParser(markdownFlavor)
override fun readFrom(source: Source): HtmlFragment = readFrom(source.readString())
public val snarkReader: SnarkIOReader<HtmlFragment> = SnarkIOReader(this, ContentType.parse("text/markdown")) public val snarkReader: SnarkIOReader<HtmlFragment> = SnarkIOReader(this, ContentType.parse("text/markdown"))
} }