Merge branch 'feature/SNRK-71/document-builder-implementation' into release/SNRK-86/assembly

This commit is contained in:
Kirill Grachev 2023-05-06 18:14:36 +03:00
commit 363f63b8da
12 changed files with 4489 additions and 0 deletions

View File

@ -43,4 +43,5 @@ include(
":snark-html",
":snark-ktor",
":snark-storage-driver",
":snark-document-builder",
)

View File

@ -0,0 +1,17 @@
plugins {
id("space.kscience.gradle.jvm")
`maven-publish`
id("kotlinx-serialization")
}
val coroutinesVersion = space.kscience.gradle.KScienceVersions.coroutinesVersion
val jacksonVersion = "2.14.2"
dependencies {
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:$coroutinesVersion")
implementation(project(":snark-storage-driver"))
implementation("com.fasterxml.jackson.module:jackson-module-kotlin:$jacksonVersion")
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.3.3")
}

View File

@ -0,0 +1,44 @@
package documentBuilder
public typealias FileName = String
/**
* Node of dependency graph.
*
* One node represents one file and its dependencies
*
* @property mdAst - AST tree of current file.
* @property dependencies - list of tail end adjacent to this node (dependencies of current file to be resolved).
*/
public data class DependencyGraphNode(
val mdAst: MdAstRoot,
val dependencies: List<DependencyGraphEdge>
)
/**
* Interface of all dependency edges.
*/
public sealed interface DependencyGraphEdge {
}
/**
* Include dependency edge.
*
* @property parentNode - node inside AST tree, that is parent for dependent node.
* @property dependentNode - dependent node, i.e. node of part of document with include commands
* @property includeList - list of files to be included.
*/
public data class IncludeDependency(
val parentNode: MdAstParent,
val dependentNode: MdAstElement,
val includeList: List<FileName>
) : DependencyGraphEdge
/**
* Whole dependency graph.
*
* @property nodes - map of nodes, where you can find DependencyGraphNode of file by its name.
*/
public data class DependencyGraph(
val nodes: Map<FileName, DependencyGraphNode>
)

View File

@ -0,0 +1,50 @@
package documentBuilder
import com.fasterxml.jackson.core.io.BigDecimalParser
import space.kscience.snark.storage.*
import java.nio.file.Path
import java.nio.file.Paths
private val DEFAULT_DOCUMENT_ROOT = "main.md"
public suspend fun buildDocument(documentDirectory: Directory) {
val dependencyGraph = buildDependencyGraph(documentDirectory)
TODO() /*resolving of dependencies*/
}
public suspend fun buildDependencyGraph(root: Directory): DependencyGraph {
val nodes = HashMap<FileName, DependencyGraphNode>()
buildNodes(root, nodes)
return DependencyGraph(nodes)
}
private suspend fun buildNodes(folder: Directory, nodes: HashMap<FileName, DependencyGraphNode>) {
val pathString = folder.path.toString()
assert(!nodes.containsKey(pathString))
val rootDcoument = folder.get(DEFAULT_DOCUMENT_ROOT)
nodes.put(pathString, buildDependencyGraphNode(rootDcoument.readAll(), folder.path))
val dependencies = getDependencies(nodes.getValue(pathString))
for (dependency in dependencies) {
if (!nodes.containsKey(dependency))
buildNodes(folder.getSubdir(Paths.get(dependency)), nodes)
}
}
public suspend fun getDependencies(node: DependencyGraphNode): Set<FileName> {
val dependencies = mutableListOf<FileName>()
for (dependency in node.dependencies) {
when (dependency) {
is IncludeDependency -> dependencies.addAll(dependency.includeList)
}
}
return dependencies.toSet()
}

View File

@ -0,0 +1,84 @@
package documentBuilder
import kotlinx.serialization.Serializable
import kotlinx.serialization.SerialName
import com.fasterxml.jackson.annotation.JsonSubTypes
import com.fasterxml.jackson.annotation.JsonTypeInfo
@Serializable
public data class Point(val line: Int, val column: Int, val offset: Int)
@Serializable
public data class Position(val start: Point, val end: Point)
@JsonTypeInfo(
use = JsonTypeInfo.Id.NAME,
include = JsonTypeInfo.As.PROPERTY,
property = "type"
)
@JsonSubTypes(
JsonSubTypes.Type(value = MdAstRoot::class, name = "root"),
JsonSubTypes.Type(value = MdAstParagraph::class, name = "paragraph"),
JsonSubTypes.Type(value = MdAstText::class, name = "text"),
JsonSubTypes.Type(value = MdAstHeading::class, name = "heading"),
JsonSubTypes.Type(value = MdAstCode::class, name = "code"),
JsonSubTypes.Type(value = MdAstBlockquote::class, name = "blockquote")
)
@Serializable
public sealed interface MdAstElement{
public abstract var position: Position
}
@Serializable
public sealed interface MdAstParent: MdAstElement{
public var children: List<MdAstElement>
}
@Serializable
@SerialName("root")
public data class MdAstRoot(
override var children: List<MdAstElement>,
override var position: Position
): MdAstParent
@Serializable
@SerialName("paragraph")
public data class MdAstParagraph(
override var children: List<MdAstElement>,
override var position: Position
): MdAstParent
@Serializable
@SerialName("text")
public data class MdAstText(
val value: String,
override var position: Position
): MdAstElement
@Serializable
@SerialName("heading")
public data class MdAstHeading(
val depth: Int,
override var children: List<MdAstElement>,
override var position: Position
): MdAstParent
@Serializable
@SerialName("code")
public data class MdAstCode(
var lang: String? = null,
var meta: String? = null,
var value: String,
override var position: Position,
) : MdAstElement
@Serializable
@SerialName("blockquote")
public data class MdAstBlockquote(
override var children: List<MdAstElement>,
override var position: Position
): MdAstParent

View File

@ -0,0 +1,57 @@
package documentBuilder
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import com.fasterxml.jackson.module.kotlin.readValue
import java.nio.file.Path
private val MARKDOWN_PARSER = "../nodejs/MarkdownParser.js"
private val SNARK_PARSER = "../python/SnarkParse.py"
public suspend fun parseMd(mdFile: ByteArray): MdAstRoot {
return jacksonObjectMapper()
.readValue<MdAstRoot>(ProcessBuilder("node", MARKDOWN_PARSER, mdFile.toString())
.redirectOutput(ProcessBuilder.Redirect.PIPE)
.redirectError(ProcessBuilder.Redirect.INHERIT)
.start().inputStream.bufferedReader().readText())
}
public suspend fun buildDependencyGraphNode(mdFile: ByteArray, path: Path): DependencyGraphNode {
val treeRoot = parseMd(mdFile)
val dependencies = mutableListOf<DependencyGraphEdge>()
fillDependencies(treeRoot, dependencies, path)
return DependencyGraphNode(treeRoot, dependencies)
}
internal suspend fun fillDependencies(
currentNode: MdAstElement,
dependencies: MutableList<DependencyGraphEdge>,
path: Path) {
when (currentNode) {
is MdAstParent -> {
for (child in currentNode.children) {
if (child is MdAstText) {
val includeList = getIncludeFiles(child.value).toMutableList()
if (includeList.size > 0) {
includeList.replaceAll { path.toString() + "/" + it }
dependencies += IncludeDependency(currentNode, child, includeList)
}
} else {
fillDependencies(child, dependencies, path)
}
}
}
else -> {}
}
}
public suspend fun getIncludeFiles(string: String): List<FileName> {
return jacksonObjectMapper()
.readValue<List<FileName>>(ProcessBuilder("python3", SNARK_PARSER, string)
.redirectOutput(ProcessBuilder.Redirect.PIPE)
.redirectError(ProcessBuilder.Redirect.INHERIT)
.start().inputStream.bufferedReader().readText())
}

View File

@ -0,0 +1,14 @@
import {fromMarkdown} from 'mdast-util-from-markdown'
main()
function main()
{
if (process.argv.length < 3)
throw "No input"
const markdown_string = process.argv[2]
const mdast = fromMarkdown(markdown_string)
console.log(JSON.stringify(mdast))
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
{
"type": "module",
"dependencies": {
"fs": "^0.0.1-security",
"hast-util-to-html": "^8.0.4",
"mdast-util-to-hast": "^12.3.0",
"node-fetch": "^3.3.1",
"remark-html": "^15.0.2",
"remark-parse": "^10.0.1",
"require": "^2.4.20",
"to-vfile": "^7.2.4",
"unified": "^10.1.2",
"whatwg-fetch": "^3.6.2"
}
}

View File

@ -0,0 +1,26 @@
import sys
import re
import json
assert(len(sys.argv) >= 2)
string = sys.argv[1]
outputfile = sys.argv[2] if len(sys.argv) >= 3 else None
pattern = r'^([\n|\t| ]*@include\([a-z|0-9|.|_]*.md\)[\n|\t| ]*)*$'
files = []
if re.search("@include", string, re.IGNORECASE):
if re.match(pattern, string):
matches = re.findall(r'@include\((.*?)\)', string)
files.extend(matches)
else:
sys.exit("Illformed string")
if outputfile is None:
print(json.dumps(files))
else:
with open(outputfile, 'w+') as f:
json.dump(files, f)

View File

@ -0,0 +1,11 @@
package documentBuilder
import org.junit.jupiter.api.Test
import kotlinx.coroutines.runBlocking
class SomeTest {
@Test
fun justWorks() = runBlocking {
// buildDocument(Directory("../example"))
}
}

View File

@ -0,0 +1,3 @@
# Hello
I'm almost empty test document without any dependencies