diff --git a/src/main/kotlin/html.kt b/src/main/kotlin/html.kt
new file mode 100644
index 0000000..1617fdf
--- /dev/null
+++ b/src/main/kotlin/html.kt
@@ -0,0 +1,53 @@
+package ru.mipt.npm.space.documentextractor
+
+import java.nio.file.Path
+import java.nio.file.StandardOpenOption
+import java.nio.file.attribute.FileAttribute
+import kotlin.io.path.*
+
+@OptIn(ExperimentalPathApi::class)
+fun generateHtml(inputPath: Path, outputPath: Path) {
+ val scriptPath = inputPath.resolveSibling("scripts").resolve("links-to-html.lua")
+ if(!scriptPath.exists()) {
+ scriptPath.parent.createDirectories()
+ scriptPath.writeText(
+ {}.javaClass.getResource("/links-to-html.lua")!!.readText(),
+ Charsets.UTF_8,
+ StandardOpenOption.CREATE
+ )
+ }
+ inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
+ if (source.isRegularFile() && source.extension == "md") {
+ val targetPath = outputPath.resolve(source.fileName.nameWithoutExtension + ".html")
+
+ ProcessBuilder(
+ "pandoc",
+ "--standalone",
+ "--mathjax",
+ "--metadata=title: ${source.nameWithoutExtension}",
+ "--from=markdown",
+ "--to=html5",
+ "--lua-filter=${scriptPath.absolute()}",
+ "--output=${targetPath.absolute()}",
+ "${source.absolute()}",
+
+ ).also {
+ logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
+ }.inheritIO().start().waitFor()
+ CopyActionResult.CONTINUE
+ } else {
+ source.copyToIgnoringExistingDirectory(target, false)
+ }
+ }
+
+// ZipOutputStream(zipFileName.outputStream().buffered()).use { zipStream ->
+// outputPath.walk().forEach { file ->
+// val zipEntryPath = file.absolute().relativize(inputPath.absolute())
+// val entry = ZipEntry("$zipEntryPath${(if (file.isDirectory()) "/" else "")}")
+// zipStream.putNextEntry(entry)
+// if (file.isRegularFile()) {
+// file.inputStream().copyTo(zipStream)
+// }
+// }
+// }
+}
\ No newline at end of file
diff --git a/src/main/kotlin/main.kt b/src/main/kotlin/main.kt
index c638508..cd4db6d 100644
--- a/src/main/kotlin/main.kt
+++ b/src/main/kotlin/main.kt
@@ -3,6 +3,7 @@ package ru.mipt.npm.space.documentextractor
import io.ktor.client.engine.cio.CIO
import kotlinx.cli.ArgParser
import kotlinx.cli.ArgType
+import kotlinx.cli.default
import kotlinx.cli.required
import kotlinx.coroutines.coroutineScope
import space.jetbrains.api.runtime.SpaceAppInstance
@@ -29,13 +30,26 @@ suspend fun main(args: Array) {
description = "The key of the exported project"
).required()
- val path: String? by parser.option(ArgType.String, description = "Target directory. Default is './output/project-key'.")
+ val path: String? by parser.option(
+ ArgType.String,
+ description = "Target directory. Default is './output/project-key'."
+ )
val folderId: String? by parser.option(
ArgType.String,
description = "FolderId for the folder to export. By default uses project root."
)
+ val generateHtml by parser.option(
+ ArgType.Boolean,
+ description = "If defined, generate HTML directory and zip"
+ ).default(false)
+
+ val htmlOutputPath by parser.option(
+ ArgType.String,
+ description = "Path for html output directory sibling to 'output' directory"
+ ).default("html")
+
val clientId by parser.option(
ArgType.String,
description = "Space application client ID (if not defined, use environment value 'space.clientId')"
@@ -48,7 +62,7 @@ suspend fun main(args: Array) {
parser.parse(args)
- val target: Path = path?.let { Path(it) } ?: Path.of("output/$project")
+ val target: Path = path?.let { Path(it) } ?: folderId?.let { Path("output") } ?: Path("output/$project")
Files.createDirectories(target)
@@ -69,5 +83,8 @@ suspend fun main(args: Array) {
ProjectIdentifier.Key(project),
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
)
+ if (generateHtml) {
+ generateHtml(target, target.resolveSibling(htmlOutputPath))
+ }
}
}
\ No newline at end of file
diff --git a/src/main/kotlin/process.kt b/src/main/kotlin/process.kt
index 2b8fcb0..a29f72b 100644
--- a/src/main/kotlin/process.kt
+++ b/src/main/kotlin/process.kt
@@ -3,7 +3,6 @@ package ru.mipt.npm.space.documentextractor
import io.ktor.client.request.header
import io.ktor.client.request.request
import io.ktor.client.request.url
-import io.ktor.client.statement.HttpResponse
import io.ktor.client.statement.bodyAsChannel
import io.ktor.client.statement.readBytes
import io.ktor.http.HttpHeaders
@@ -23,7 +22,7 @@ import java.nio.file.Path
import kotlin.io.path.*
import kotlin.streams.toList
-private val logger by lazy { LoggerFactory.getLogger("space-extractor") }
+internal val logger by lazy { LoggerFactory.getLogger("space-extractor") }
/**
* Extract single attachment image
@@ -70,18 +69,19 @@ private val regex = """!\[(?.*)]\(/d/(?.*)\?f=0""".toRegex()
/**
* Post-process a markdown document by downloading images and replacing links
*/
-internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope{
+internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope {
val documentBody = path.readText()
val logger = LoggerFactory.getLogger("space-document-extractor")
logger.info("Processing file $path...")
val newText = documentBody.replace(regex) {
val id = it.groups["id"]?.value ?: error("Unexpected reference format: ${it.value}")
val alt = it.groups["alt"]?.value
- logger.info("Downloading image $id as images/$id")
+ val imageName = alt ?: id
+ logger.info("Downloading image $id as images/$imageName")
launch(Dispatchers.IO) {
- extractImage(path.parent, id, id)
+ extractImage(path.parent, id, imageName)
}
- "![$alt](images/$id"
+ "![$alt](images/$imageName"
}
path.writeText(newText)
}
@@ -117,10 +117,12 @@ internal suspend fun SpaceClient.downloadDocument(
extractFile(directory, document.id, document.title)
}
}
+
is TextDocument -> {
val markdownFilePath = directory.resolve(document.title + ".md")
markdownFilePath.writeText(body.text, Charsets.UTF_8)
}
+
else -> {
LoggerFactory.getLogger("space-extractor")
.warn("Can't extract document ${document.title} with type ${document.bodyType}")
@@ -165,7 +167,7 @@ suspend fun SpaceClient.downloadAndProcessDocumentsInProject(
directory: Path,
projectId: ProjectIdentifier,
rootFolder: FolderIdentifier = FolderIdentifier.Root,
-) = withContext(Dispatchers.IO){
+) = withContext(Dispatchers.IO) {
logger.info("Processing project ${projectId.compactId} to $directory")
downloadDocumentFolder(directory, projectId, rootFolder)
processMarkdownInDirectory(directory)
diff --git a/src/main/resources/links-to-html.lua b/src/main/resources/links-to-html.lua
new file mode 100644
index 0000000..0e203ea
--- /dev/null
+++ b/src/main/resources/links-to-html.lua
@@ -0,0 +1,5 @@
+# links-to-html.lua
+function Link(el)
+ el.target = string.gsub(el.target, "%.md", ".html")
+ return el
+end
\ No newline at end of file