[WIP] refactor in progress

This commit is contained in:
Alexander Nozik 2024-01-19 22:25:26 +03:00
parent 018b52aaff
commit 3b318c3a8b
9 changed files with 130 additions and 51 deletions

View File

@ -14,7 +14,7 @@ allprojects {
}
}
val dataforgeVersion by extra("0.7.1")
val dataforgeVersion by extra("0.8.0-dev-1")
ksciencePublish {
pom("https://github.com/SciProgCentre/snark") {

View File

@ -0,0 +1,60 @@
package space.kscience.snark
import space.kscience.dataforge.actions.AbstractAction
import space.kscience.dataforge.data.*
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MutableMeta
import space.kscience.dataforge.meta.copy
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.NameToken
import space.kscience.dataforge.names.replaceLast
import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* An action to change header (name and meta) without changing the data itself or its computation state
*/
public class ReWrapAction<R : Any>(
type: KType,
private val newMeta: MutableMeta.(name: Name) -> Unit = {},
private val newName: (name: Name, meta: Meta?) -> Name,
) : AbstractAction<R, R>(type) {
override fun DataSetBuilder<R>.generate(data: DataSet<R>, meta: Meta) {
data.forEach { namedData ->
data(
newName(namedData.name, namedData.meta),
namedData.data.withMeta(namedData.meta.copy { newMeta(namedData.name) })
)
}
}
override fun DataSourceBuilder<R>.update(dataSet: DataSet<R>, meta: Meta, updateKey: Name) {
val datum = dataSet[updateKey]
data(
newName(updateKey, datum?.meta),
datum?.withMeta(datum.meta.copy { newMeta(updateKey) })
)
}
public companion object {
public inline fun <reified R : Any> removeExtensions(
vararg bypassExtensions: String,
noinline newMeta: MutableMeta.(name: Name) -> Unit = {},
): ReWrapAction<R> = ReWrapAction(typeOf<R>(), newMeta = newMeta) { name, _ ->
name.replaceLast { token ->
val extension = token.body.substringAfterLast('.')
if (extension in bypassExtensions) {
NameToken(token.body.removeSuffix(".$extension"))
} else {
token
}
}
}
}
}
public inline fun <reified R : Any> ReWrapAction(
noinline newMeta: MutableMeta.(name: Name) -> Unit = {},
noinline newName: (Name, Meta?) -> Name
): ReWrapAction<R> = ReWrapAction(typeOf<R>(), newMeta, newName)

View File

@ -14,6 +14,7 @@ public fun interface TextProcessor {
public companion object {
public const val DF_TYPE: String = "snark.textTransformation"
public val TEXT_TRANSFORMATION_KEY: NameToken = NameToken("transformation")
public val TEXT_PREPROCESSOR_KEY: NameToken = NameToken("preprocessor")
}
}

View File

@ -2,14 +2,12 @@
package space.kscience.snark
import space.kscience.dataforge.data.DataSet
import space.kscience.dataforge.data.DataTree
import space.kscience.dataforge.data.node
import space.kscience.dataforge.io.Binary
import space.kscience.dataforge.io.IOPlugin
import space.kscience.dataforge.meta.*
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.workspace.Workspace
import space.kscience.dataforge.workspace.WorkspaceBuilder
import space.kscience.dataforge.workspace.readRawDirectory
@ -39,13 +37,11 @@ private fun IOPlugin.readResources(
public fun Snark.workspace(
meta: Meta,
customData: DataSet<*> = DataSet.EMPTY,
workspaceBuilder: WorkspaceBuilder.() -> Unit = {},
): Workspace = Workspace {
data {
node(Name.EMPTY, customData)
meta.getIndexed("directory").forEach { (index, directoryMeta) ->
val dataDirectory = directoryMeta["path"].string ?: error("Directory path not defined")
val nodeName = directoryMeta["name"].string ?: directoryMeta.string ?: index ?: ""

View File

@ -11,8 +11,8 @@ import space.kscience.dataforge.names.Name
public fun interface HtmlPage {
context(PageContextWithData)
public fun HTML.renderPage()
context(PageContextWithData, HTML)
public fun renderPage()
public companion object {
public fun createHtmlString(

View File

@ -3,8 +3,10 @@
package space.kscience.snark.html
import io.ktor.http.ContentType
import kotlinx.coroutines.CoroutineScope
import kotlinx.io.readByteArray
import space.kscience.dataforge.actions.Action
import space.kscience.dataforge.actions.mapping
import space.kscience.dataforge.context.Context
import space.kscience.dataforge.context.PluginFactory
import space.kscience.dataforge.context.PluginTag
@ -13,27 +15,27 @@ import space.kscience.dataforge.io.*
import space.kscience.dataforge.io.yaml.YamlMetaFormat
import space.kscience.dataforge.io.yaml.YamlPlugin
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.copy
import space.kscience.dataforge.meta.get
import space.kscience.dataforge.meta.set
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.dataforge.names.Name
import space.kscience.dataforge.names.asName
import space.kscience.dataforge.provider.dfId
import space.kscience.dataforge.provider.dfType
import space.kscience.dataforge.workspace.*
import space.kscience.snark.ReWrapAction
import space.kscience.snark.Snark
import space.kscience.snark.SnarkReader
import space.kscience.snark.TextProcessor
import java.net.URLConnection
import kotlin.io.path.Path
import kotlin.io.path.extension
import kotlin.reflect.typeOf
public fun <T : Any, R : Any> DataSet<T>.transform(action: Action<T, R>, meta: Meta = Meta.EMPTY): DataSet<R> =
action.execute(this, meta)
public fun <T : Any> TaskResultBuilder<T>.fill(dataSet: DataSet<T>) {
public fun <T : Any> DataSetBuilder<T>.fill(dataSet: DataSet<T>) {
node(Name.EMPTY, dataSet)
}
@ -48,7 +50,7 @@ public class SnarkHtml : WorkspacePlugin() {
override val tag: PluginTag get() = Companion.tag
override fun content(target: String): Map<Name, Any> = when (target) {
SnarkReader::class.dfId -> mapOf(
SnarkReader::class.dfType -> mapOf(
"html".asName() to HtmlReader,
"markdown".asName() to MarkdownReader,
"json".asName() to SnarkReader(JsonMetaFormat, ContentType.Application.Json.toString()),
@ -64,52 +66,55 @@ public class SnarkHtml : WorkspacePlugin() {
}
public val parse: TaskReference<Any> by task<Any> {
from(allData).forEach { (dataName, data) ->
val contentType = getContentType(dataName, data.meta)
public val prepareHeaderAction: Action<Any, Any> = ReWrapAction.removeExtensions("html", "md") { name ->
val contentType = getContentType(name, this)
set(CONTENT_TYPE_KEY, contentType)
}
public val parseAction: Action<Any, Any> = Action.mapping {
val contentType = getContentType(name, meta)
val parser = snark.readers.values.filter { parser ->
contentType in parser.types
}.maxByOrNull {
it.priority
} ?: return@forEach //ignore data for which parser is not found
val preprocessor = meta[TextProcessor.TEXT_TRANSFORMATION_KEY]?.let { snark.preprocessor(it) }
val newMeta = data.meta.copy {
CONTENT_TYPE_KEY put contentType
}
//pass data for which parser is not found
if (parser == null) {
result { it }
} else {
when (data.type) {
typeOf<String>() -> {
data(dataName, data.map { content ->
val string = content as String
val preprocessor = meta[TextProcessor.TEXT_PREPROCESSOR_KEY]?.let { snark.preprocessor(it) }
result {
when (it) {
is CharSequence -> {
val string = it.toString()
val preprocessed = preprocessor?.process(string) ?: string
parser.readFrom(preprocessed)
})
}
typeOf<Binary>() -> {
data(dataName, data.map(meta = newMeta) { content ->
val binary = content as Binary
is Binary -> {
if (preprocessor == null) {
parser.readFrom(binary)
parser.readFrom(it)
} else {
//TODO provide encoding
val string = binary.toByteArray().decodeToString()
val string = it.toByteArray().decodeToString()
parser.readFrom(preprocessor.process(string))
}
})
}
// bypass for non textual-data
else -> data(dataName, data.withMeta(newMeta))
else -> it
}
}
}
}
public val site: TaskReference<Any> by task<Any> {
fill(from(allData))
fill(from(parse))
public val parse: TaskReference<Any> by task<Any>({
description = "Parse all data for which reader is resolved"
}) {
fill(from(allData).transform(parseAction, taskMeta))
}
@ -128,3 +133,18 @@ public class SnarkHtml : WorkspacePlugin() {
}
}
public fun SnarkHtml.readSiteData(
binaries: DataSource<Binary>,
meta: Meta = Meta.EMPTY,
): DataSet<Any> = binaries.transform(parseAction, meta)
public fun SnarkHtml.readSiteData(
coroutineScope: CoroutineScope,
meta: Meta = Meta.EMPTY,
builder: context(IOPlugin) DataSourceBuilder<Any>.() -> Unit,
): DataSet<Any> = DataSource(coroutineScope) { builder(io, this) }
.transform(prepareHeaderAction, meta)
.transform(parseAction, meta)

View File

@ -1,5 +1,5 @@
plugins {
id("space.kscience.gradle.jvm")
id("space.kscience.gradle.mpp")
`maven-publish`
}
@ -7,15 +7,17 @@ val dataforgeVersion: String by rootProject.extra
val ktorVersion = space.kscience.gradle.KScienceVersions.ktorVersion
kscience{
jvm()
useContextReceivers()
}
dependencies {
jvmMain{
api(projects.snarkHtml)
api("io.ktor:ktor-server-core:$ktorVersion")
api("io.ktor:ktor-server-html-builder:$ktorVersion")
api("io.ktor:ktor-server-host-common:$ktorVersion")
testApi("io.ktor:ktor-server-tests:$ktorVersion")
}
jvmTest{
api("io.ktor:ktor-server-tests:$ktorVersion")
}
}