Move parse action into snark html

This commit is contained in:
Alexander Nozik 2024-10-12 11:51:11 +03:00
parent 52da9fe52e
commit 440f6e3b83
17 changed files with 155 additions and 127 deletions

View File

@ -58,5 +58,14 @@ Postprocessors are functions that transform fragments of HTML wrapped in them ac
Other details on HTML rendering could be found in [snark-html](./snark-html) module Other details on HTML rendering could be found in [snark-html](./snark-html) module
## Examples
### Scientific document builder
The idea of [the project](examples/document) is to produce a tree of scientific documents or papers. It does that in following steps:
1. Read data tree from `data` directory (data path could be overridden by either ktor configuration or manually).
2. Search all directories for a files called `document.yaml` or any other format that could be treated as value-tree (for example `document.json`). Use that file as a document descriptor that defines linear document structure.
3.
${modules} ${modules}

4
examples/README.md Normal file
View File

@ -0,0 +1,4 @@
# Module examples

View File

@ -0,0 +1,4 @@
# Module document

View File

@ -2,7 +2,7 @@ route: lorem.ipsum
title: Lorem Ipsum title: Lorem Ipsum
authors: authors:
- name: Alexander Nozik - name: Alexander Nozik
affiliation: MIPT affiliation: SPC
fragments: fragments:
- type: image - type: image
ref: SPC-logo.png ref: SPC-logo.png

View File

@ -0,0 +1,2 @@
This is a document body for a simple document

View File

@ -0,0 +1,9 @@
{
"title": "A simple document",
"fragments": [
{
"type": "data",
"name": "body"
}
]
}

View File

@ -0,0 +1,4 @@
<p>
<strong>This is HTML footer</strong>
</p>

View File

@ -1,8 +1,12 @@
package center.sciprog.snark.documents package center.sciprog.snark.documents
import io.ktor.server.application.Application import io.ktor.server.application.Application
import io.ktor.server.application.call
import io.ktor.server.cio.CIO import io.ktor.server.cio.CIO
import io.ktor.server.engine.embeddedServer import io.ktor.server.engine.embeddedServer
import io.ktor.server.response.respondRedirect
import io.ktor.server.routing.get
import io.ktor.server.routing.routing
import kotlinx.html.ScriptCrossorigin import kotlinx.html.ScriptCrossorigin
import kotlinx.html.link import kotlinx.html.link
import kotlinx.html.script import kotlinx.html.script
@ -14,6 +18,7 @@ import space.kscience.snark.ktor.snarkApplication
fun Application.renderAllDocuments() = snarkApplication { fun Application.renderAllDocuments() = snarkApplication {
allDocuments( allDocuments(
headers = { headers = {
//add katex headers
link { link {
rel = "stylesheet" rel = "stylesheet"
href = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css" href = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css"
@ -33,6 +38,7 @@ fun Application.renderAllDocuments() = snarkApplication {
crossorigin = ScriptCrossorigin.anonymous crossorigin = ScriptCrossorigin.anonymous
attributes["onload"] = "renderMathInElement(document.body);" attributes["onload"] = "renderMathInElement(document.body);"
} }
// Auto-render latex expressions with katex
script { script {
unsafe { unsafe {
+""" +"""
@ -51,6 +57,12 @@ fun Application.renderAllDocuments() = snarkApplication {
} }
} }
) )
routing {
get("/"){
call.respondRedirect("lorem/ipsum")
}
}
} }

21
snark-core/README.md Normal file
View File

@ -0,0 +1,21 @@
# Module snark-core
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:snark-core:0.2.0-dev-1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:snark-core:0.2.0-dev-1")
}
```

View File

@ -0,0 +1,21 @@
# Module snark-gradle-plugin
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:snark-gradle-plugin:0.2.0-dev-1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:snark-gradle-plugin:0.2.0-dev-1")
}
```

View File

@ -1,68 +0,0 @@
package space.kscience.snark.html
import space.kscience.dataforge.actions.AbstractAction
import space.kscience.dataforge.data.*
import space.kscience.dataforge.io.Binary
import space.kscience.dataforge.io.toByteArray
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.misc.DFInternal
import space.kscience.snark.SnarkReader
import space.kscience.snark.TextProcessor
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.EmptyCoroutineContext
import kotlin.reflect.KType
import kotlin.reflect.typeOf
@OptIn(DFInternal::class)
internal fun <T, R> Data<T>.transform(
type: KType,
meta: Meta = this.meta,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
block: suspend (T) -> R,
): Data<R> {
val data = Data(type, meta, coroutineContext, listOf(this)) {
block(await())
}
return data
}
public class ParseAction(private val snarkHtml: SnarkHtml) :
AbstractAction<Binary, Any>(typeOf<PageFragment>()) {
private fun parseOne(data: NamedData<Binary>): NamedData<Any>? = with(snarkHtml) {
val contentType = getContentType(data.name, data.meta)
val parser: SnarkReader<Any>? = snark.readers.values.filter { parser ->
contentType in parser.inputContentTypes
}.maxByOrNull {
it.priority
}
//ignore data for which parser is not found
if (parser != null) {
val preprocessor = meta[TextProcessor.TEXT_PREPROCESSOR_KEY]?.let { snark.preprocessor(it) }
data.transform(parser.outputType) {
if (preprocessor == null) {
parser.readFrom(it)
} else {
//TODO provide encoding
val string = it.toByteArray().decodeToString()
parser.readFrom(preprocessor.process(string))
}
}.named(data.name)
} else {
null
}
}
override fun DataSink<Any>.generate(data: DataTree<Binary>, meta: Meta) {
data.forEach {
parseOne(it)?.let { put(it) }
}
}
override fun DataSink<Any>.update(source: DataTree<Binary>, meta: Meta, namedData: NamedData<Binary>) {
parseOne(namedData)?.let { put(it) }
}
}

View File

@ -3,7 +3,6 @@
package space.kscience.snark.html package space.kscience.snark.html
import io.ktor.http.ContentType import io.ktor.http.ContentType
import kotlinx.coroutines.CoroutineScope
import kotlinx.io.readByteArray import kotlinx.io.readByteArray
import space.kscience.dataforge.actions.Action import space.kscience.dataforge.actions.Action
import space.kscience.dataforge.actions.mapping import space.kscience.dataforge.actions.mapping
@ -11,11 +10,11 @@ import space.kscience.dataforge.actions.transform
import space.kscience.dataforge.context.Context import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.PluginFactory import space.kscience.dataforge.context.PluginFactory
import space.kscience.dataforge.context.PluginTag import space.kscience.dataforge.context.PluginTag
import space.kscience.dataforge.data.* import space.kscience.dataforge.data.DataSink
import space.kscience.dataforge.io.Binary import space.kscience.dataforge.data.DataTree
import space.kscience.dataforge.io.IOPlugin import space.kscience.dataforge.data.filterByType
import space.kscience.dataforge.io.IOReader import space.kscience.dataforge.data.putAll
import space.kscience.dataforge.io.JsonMetaFormat import space.kscience.dataforge.io.*
import space.kscience.dataforge.io.yaml.YamlMetaFormat import space.kscience.dataforge.io.yaml.YamlMetaFormat
import space.kscience.dataforge.io.yaml.YamlPlugin import space.kscience.dataforge.io.yaml.YamlPlugin
import space.kscience.dataforge.meta.Meta import space.kscience.dataforge.meta.Meta
@ -106,7 +105,34 @@ public class SnarkHtml : WorkspacePlugin() {
} }
} }
public val parseAction: Action<Binary, Any> = ParseAction(this) public val parseAction: Action<Binary, Any> = Action.mapping {
val contentType = getContentType(name, meta)
val parser: SnarkReader<Any>? = snark.readers.values.filter { parser ->
contentType in parser.inputContentTypes
}.maxByOrNull {
it.priority
}
result(parser?.outputType ?: typeOf<Binary>()) { data ->
//ignore data for which parser is not found
if (parser != null) {
val preprocessor =
meta[TextProcessor.TEXT_PREPROCESSOR_KEY]?.let { snark.preprocessor(it) }
if (preprocessor == null) {
parser.readFrom(data)
} else {
//TODO provide encoding
val string = data.toByteArray().decodeToString()
parser.readFrom(preprocessor.process(string))
}
} else {
data
}
}
}
public val layoutAction: Action<Any, Any> = Action.mapping { public val layoutAction: Action<Any, Any> = Action.mapping {
@ -143,21 +169,24 @@ public class SnarkHtml : WorkspacePlugin() {
} }
} }
/**
public fun SnarkHtml.readSiteData( * Parse raw data tree into html primitives
*/
public fun SnarkHtml.parseDataTree(
binaries: DataTree<Binary>, binaries: DataTree<Binary>,
meta: Meta = Meta.EMPTY, meta: Meta = Meta.EMPTY,
): DataTree<Any> = ObservableDataTree(context) { ): DataTree<Any> = DataTree {
//put all binaries //put all binaries
putAll(binaries) putAll(binaries)
//override ones which could be parsed //override ones which could be parsed
putAll(binaries.transform(parseAction, meta)) putAll(binaries.transform(parseAction, meta))
}.transform(prepareHeaderAction, meta).transform(removeIndexAction, meta) }.transform(prepareHeaderAction, meta).transform(removeIndexAction, meta)
/**
public fun SnarkHtml.readSiteData( * Read the parsed data tree by providing [builder] for raw binary data tree
coroutineScope: CoroutineScope, */
public fun SnarkHtml.parseDataTree(
meta: Meta = Meta.EMPTY, meta: Meta = Meta.EMPTY,
//TODO add IO plugin as a context parameter //TODO add IO plugin as a context parameter
builder: DataSink<Binary>.() -> Unit, builder: DataSink<Binary>.() -> Unit,
): DataTree<Any> = readSiteData(ObservableDataTree(coroutineScope) { builder() }, meta) ): DataTree<Any> = parseDataTree(DataTree { builder() }, meta)

View File

@ -1,12 +0,0 @@
ktor {
application {
modules = [ ru.mipt.spc.ApplicationKt.spcModule ]
}
deployment {
port = 7080
watch = ["classes", "data/"]
}
development = true
}

View File

@ -1,29 +0,0 @@
<configuration>
<timestamp key="bySecond" datePattern="yyyyMMdd'T'HHmmss"/>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{YYYY-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<root level="trace">
<appender-ref ref="STDOUT"/>
</root>
<appender name="FILE" class="ch.qos.logback.core.FileAppender">
<!-- use the previously created timestamp to create a uniquely
named log file -->
<file>logs/${bySecond}.txt</file>
<encoder>
<pattern>%d{YYYY-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<root level="DEBUG">
<appender-ref ref="FILE" />
</root>
<logger name="org.eclipse.jetty" level="INFO"/>
<logger name="io.netty" level="INFO"/>
</configuration>

21
snark-ktor/README.md Normal file
View File

@ -0,0 +1,21 @@
# Module snark-ktor
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:snark-ktor:0.2.0-dev-1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:snark-ktor:0.2.0-dev-1")
}
```

View File

@ -18,6 +18,7 @@ private const val BUILD_DATE_FILE = "/buildDate"
* *
* @return true if cache is valid and false if it is reset * @return true if cache is valid and false if it is reset
*/ */
@Deprecated("To be removed")
fun Application.prepareSnarkDataCacheDirectory(dataPath: Path): Boolean { fun Application.prepareSnarkDataCacheDirectory(dataPath: Path): Boolean {
// Clear data directory if it is outdated // Clear data directory if it is outdated

View File

@ -15,7 +15,7 @@ import space.kscience.dataforge.workspace.FileData
import space.kscience.dataforge.workspace.directory import space.kscience.dataforge.workspace.directory
import space.kscience.snark.html.HtmlSite import space.kscience.snark.html.HtmlSite
import space.kscience.snark.html.SnarkHtml import space.kscience.snark.html.SnarkHtml
import space.kscience.snark.html.readSiteData import space.kscience.snark.html.parseDataTree
import kotlin.io.path.Path import kotlin.io.path.Path
import kotlin.io.path.exists import kotlin.io.path.exists
@ -47,7 +47,7 @@ public fun Route.site(
error("Data directory at $dataDirectory is not resolved") error("Data directory at $dataDirectory is not resolved")
} }
val siteData = snark.readSiteData(context) { val siteData = snark.parseDataTree {
directory(snark.io, Name.EMPTY, dataDirectory) directory(snark.io, Name.EMPTY, dataDirectory)
} }