Move parse action into snark html

This commit is contained in:
Alexander Nozik 2024-10-12 11:51:11 +03:00
parent 52da9fe52e
commit 440f6e3b83
17 changed files with 155 additions and 127 deletions

View File

@ -58,5 +58,14 @@ Postprocessors are functions that transform fragments of HTML wrapped in them ac
Other details on HTML rendering could be found in [snark-html](./snark-html) module
## Examples
### Scientific document builder
The idea of [the project](examples/document) is to produce a tree of scientific documents or papers. It does that in following steps:
1. Read data tree from `data` directory (data path could be overridden by either ktor configuration or manually).
2. Search all directories for a files called `document.yaml` or any other format that could be treated as value-tree (for example `document.json`). Use that file as a document descriptor that defines linear document structure.
3.
${modules}

4
examples/README.md Normal file
View File

@ -0,0 +1,4 @@
# Module examples

View File

@ -0,0 +1,4 @@
# Module document

View File

@ -2,7 +2,7 @@ route: lorem.ipsum
title: Lorem Ipsum
authors:
- name: Alexander Nozik
affiliation: MIPT
affiliation: SPC
fragments:
- type: image
ref: SPC-logo.png

View File

@ -0,0 +1,2 @@
This is a document body for a simple document

View File

@ -0,0 +1,9 @@
{
"title": "A simple document",
"fragments": [
{
"type": "data",
"name": "body"
}
]
}

View File

@ -0,0 +1,4 @@
<p>
<strong>This is HTML footer</strong>
</p>

View File

@ -1,8 +1,12 @@
package center.sciprog.snark.documents
import io.ktor.server.application.Application
import io.ktor.server.application.call
import io.ktor.server.cio.CIO
import io.ktor.server.engine.embeddedServer
import io.ktor.server.response.respondRedirect
import io.ktor.server.routing.get
import io.ktor.server.routing.routing
import kotlinx.html.ScriptCrossorigin
import kotlinx.html.link
import kotlinx.html.script
@ -14,6 +18,7 @@ import space.kscience.snark.ktor.snarkApplication
fun Application.renderAllDocuments() = snarkApplication {
allDocuments(
headers = {
//add katex headers
link {
rel = "stylesheet"
href = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css"
@ -33,6 +38,7 @@ fun Application.renderAllDocuments() = snarkApplication {
crossorigin = ScriptCrossorigin.anonymous
attributes["onload"] = "renderMathInElement(document.body);"
}
// Auto-render latex expressions with katex
script {
unsafe {
+"""
@ -51,6 +57,12 @@ fun Application.renderAllDocuments() = snarkApplication {
}
}
)
routing {
get("/"){
call.respondRedirect("lorem/ipsum")
}
}
}

21
snark-core/README.md Normal file
View File

@ -0,0 +1,21 @@
# Module snark-core
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:snark-core:0.2.0-dev-1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:snark-core:0.2.0-dev-1")
}
```

View File

@ -0,0 +1,21 @@
# Module snark-gradle-plugin
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:snark-gradle-plugin:0.2.0-dev-1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:snark-gradle-plugin:0.2.0-dev-1")
}
```

View File

@ -1,68 +0,0 @@
package space.kscience.snark.html
import space.kscience.dataforge.actions.AbstractAction
import space.kscience.dataforge.data.*
import space.kscience.dataforge.io.Binary
import space.kscience.dataforge.io.toByteArray
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.misc.DFInternal
import space.kscience.snark.SnarkReader
import space.kscience.snark.TextProcessor
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.EmptyCoroutineContext
import kotlin.reflect.KType
import kotlin.reflect.typeOf
@OptIn(DFInternal::class)
internal fun <T, R> Data<T>.transform(
type: KType,
meta: Meta = this.meta,
coroutineContext: CoroutineContext = EmptyCoroutineContext,
block: suspend (T) -> R,
): Data<R> {
val data = Data(type, meta, coroutineContext, listOf(this)) {
block(await())
}
return data
}
public class ParseAction(private val snarkHtml: SnarkHtml) :
AbstractAction<Binary, Any>(typeOf<PageFragment>()) {
private fun parseOne(data: NamedData<Binary>): NamedData<Any>? = with(snarkHtml) {
val contentType = getContentType(data.name, data.meta)
val parser: SnarkReader<Any>? = snark.readers.values.filter { parser ->
contentType in parser.inputContentTypes
}.maxByOrNull {
it.priority
}
//ignore data for which parser is not found
if (parser != null) {
val preprocessor = meta[TextProcessor.TEXT_PREPROCESSOR_KEY]?.let { snark.preprocessor(it) }
data.transform(parser.outputType) {
if (preprocessor == null) {
parser.readFrom(it)
} else {
//TODO provide encoding
val string = it.toByteArray().decodeToString()
parser.readFrom(preprocessor.process(string))
}
}.named(data.name)
} else {
null
}
}
override fun DataSink<Any>.generate(data: DataTree<Binary>, meta: Meta) {
data.forEach {
parseOne(it)?.let { put(it) }
}
}
override fun DataSink<Any>.update(source: DataTree<Binary>, meta: Meta, namedData: NamedData<Binary>) {
parseOne(namedData)?.let { put(it) }
}
}

View File

@ -3,7 +3,6 @@
package space.kscience.snark.html
import io.ktor.http.ContentType
import kotlinx.coroutines.CoroutineScope
import kotlinx.io.readByteArray
import space.kscience.dataforge.actions.Action
import space.kscience.dataforge.actions.mapping
@ -11,11 +10,11 @@ import space.kscience.dataforge.actions.transform
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.PluginFactory
import space.kscience.dataforge.context.PluginTag
import space.kscience.dataforge.data.*
import space.kscience.dataforge.io.Binary
import space.kscience.dataforge.io.IOPlugin
import space.kscience.dataforge.io.IOReader
import space.kscience.dataforge.io.JsonMetaFormat
import space.kscience.dataforge.data.DataSink
import space.kscience.dataforge.data.DataTree
import space.kscience.dataforge.data.filterByType
import space.kscience.dataforge.data.putAll
import space.kscience.dataforge.io.*
import space.kscience.dataforge.io.yaml.YamlMetaFormat
import space.kscience.dataforge.io.yaml.YamlPlugin
import space.kscience.dataforge.meta.Meta
@ -106,7 +105,34 @@ public class SnarkHtml : WorkspacePlugin() {
}
}
public val parseAction: Action<Binary, Any> = ParseAction(this)
public val parseAction: Action<Binary, Any> = Action.mapping {
val contentType = getContentType(name, meta)
val parser: SnarkReader<Any>? = snark.readers.values.filter { parser ->
contentType in parser.inputContentTypes
}.maxByOrNull {
it.priority
}
result(parser?.outputType ?: typeOf<Binary>()) { data ->
//ignore data for which parser is not found
if (parser != null) {
val preprocessor =
meta[TextProcessor.TEXT_PREPROCESSOR_KEY]?.let { snark.preprocessor(it) }
if (preprocessor == null) {
parser.readFrom(data)
} else {
//TODO provide encoding
val string = data.toByteArray().decodeToString()
parser.readFrom(preprocessor.process(string))
}
} else {
data
}
}
}
public val layoutAction: Action<Any, Any> = Action.mapping {
@ -143,21 +169,24 @@ public class SnarkHtml : WorkspacePlugin() {
}
}
public fun SnarkHtml.readSiteData(
/**
* Parse raw data tree into html primitives
*/
public fun SnarkHtml.parseDataTree(
binaries: DataTree<Binary>,
meta: Meta = Meta.EMPTY,
): DataTree<Any> = ObservableDataTree(context) {
): DataTree<Any> = DataTree {
//put all binaries
putAll(binaries)
//override ones which could be parsed
putAll(binaries.transform(parseAction, meta))
}.transform(prepareHeaderAction, meta).transform(removeIndexAction, meta)
public fun SnarkHtml.readSiteData(
coroutineScope: CoroutineScope,
/**
* Read the parsed data tree by providing [builder] for raw binary data tree
*/
public fun SnarkHtml.parseDataTree(
meta: Meta = Meta.EMPTY,
//TODO add IO plugin as a context parameter
builder: DataSink<Binary>.() -> Unit,
): DataTree<Any> = readSiteData(ObservableDataTree(coroutineScope) { builder() }, meta)
): DataTree<Any> = parseDataTree(DataTree { builder() }, meta)

View File

@ -1,12 +0,0 @@
ktor {
application {
modules = [ ru.mipt.spc.ApplicationKt.spcModule ]
}
deployment {
port = 7080
watch = ["classes", "data/"]
}
development = true
}

View File

@ -1,29 +0,0 @@
<configuration>
<timestamp key="bySecond" datePattern="yyyyMMdd'T'HHmmss"/>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{YYYY-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<root level="trace">
<appender-ref ref="STDOUT"/>
</root>
<appender name="FILE" class="ch.qos.logback.core.FileAppender">
<!-- use the previously created timestamp to create a uniquely
named log file -->
<file>logs/${bySecond}.txt</file>
<encoder>
<pattern>%d{YYYY-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<root level="DEBUG">
<appender-ref ref="FILE" />
</root>
<logger name="org.eclipse.jetty" level="INFO"/>
<logger name="io.netty" level="INFO"/>
</configuration>

21
snark-ktor/README.md Normal file
View File

@ -0,0 +1,21 @@
# Module snark-ktor
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:snark-ktor:0.2.0-dev-1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:snark-ktor:0.2.0-dev-1")
}
```

View File

@ -18,6 +18,7 @@ private const val BUILD_DATE_FILE = "/buildDate"
*
* @return true if cache is valid and false if it is reset
*/
@Deprecated("To be removed")
fun Application.prepareSnarkDataCacheDirectory(dataPath: Path): Boolean {
// Clear data directory if it is outdated

View File

@ -15,7 +15,7 @@ import space.kscience.dataforge.workspace.FileData
import space.kscience.dataforge.workspace.directory
import space.kscience.snark.html.HtmlSite
import space.kscience.snark.html.SnarkHtml
import space.kscience.snark.html.readSiteData
import space.kscience.snark.html.parseDataTree
import kotlin.io.path.Path
import kotlin.io.path.exists
@ -47,7 +47,7 @@ public fun Route.site(
error("Data directory at $dataDirectory is not resolved")
}
val siteData = snark.readSiteData(context) {
val siteData = snark.parseDataTree {
directory(snark.io, Name.EMPTY, dataDirectory)
}