From 3c1a1bd99d72e317123b462f94e463aba069b871 Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Fri, 16 Jun 2023 20:16:22 +0300 Subject: [PATCH] Add pandoc conversion --- src/main/kotlin/html.kt | 53 ++++++++++++++++++++++++++++ src/main/kotlin/main.kt | 21 +++++++++-- src/main/kotlin/process.kt | 16 +++++---- src/main/resources/links-to-html.lua | 5 +++ 4 files changed, 86 insertions(+), 9 deletions(-) create mode 100644 src/main/kotlin/html.kt create mode 100644 src/main/resources/links-to-html.lua diff --git a/src/main/kotlin/html.kt b/src/main/kotlin/html.kt new file mode 100644 index 0000000..1617fdf --- /dev/null +++ b/src/main/kotlin/html.kt @@ -0,0 +1,53 @@ +package ru.mipt.npm.space.documentextractor + +import java.nio.file.Path +import java.nio.file.StandardOpenOption +import java.nio.file.attribute.FileAttribute +import kotlin.io.path.* + +@OptIn(ExperimentalPathApi::class) +fun generateHtml(inputPath: Path, outputPath: Path) { + val scriptPath = inputPath.resolveSibling("scripts").resolve("links-to-html.lua") + if(!scriptPath.exists()) { + scriptPath.parent.createDirectories() + scriptPath.writeText( + {}.javaClass.getResource("/links-to-html.lua")!!.readText(), + Charsets.UTF_8, + StandardOpenOption.CREATE + ) + } + inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path -> + if (source.isRegularFile() && source.extension == "md") { + val targetPath = outputPath.resolve(source.fileName.nameWithoutExtension + ".html") + + ProcessBuilder( + "pandoc", + "--standalone", + "--mathjax", + "--metadata=title: ${source.nameWithoutExtension}", + "--from=markdown", + "--to=html5", + "--lua-filter=${scriptPath.absolute()}", + "--output=${targetPath.absolute()}", + "${source.absolute()}", + + ).also { + logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}") + }.inheritIO().start().waitFor() + CopyActionResult.CONTINUE + } else { + source.copyToIgnoringExistingDirectory(target, false) + } + } + +// ZipOutputStream(zipFileName.outputStream().buffered()).use { zipStream -> +// outputPath.walk().forEach { file -> +// val zipEntryPath = file.absolute().relativize(inputPath.absolute()) +// val entry = ZipEntry("$zipEntryPath${(if (file.isDirectory()) "/" else "")}") +// zipStream.putNextEntry(entry) +// if (file.isRegularFile()) { +// file.inputStream().copyTo(zipStream) +// } +// } +// } +} \ No newline at end of file diff --git a/src/main/kotlin/main.kt b/src/main/kotlin/main.kt index c638508..cd4db6d 100644 --- a/src/main/kotlin/main.kt +++ b/src/main/kotlin/main.kt @@ -3,6 +3,7 @@ package ru.mipt.npm.space.documentextractor import io.ktor.client.engine.cio.CIO import kotlinx.cli.ArgParser import kotlinx.cli.ArgType +import kotlinx.cli.default import kotlinx.cli.required import kotlinx.coroutines.coroutineScope import space.jetbrains.api.runtime.SpaceAppInstance @@ -29,13 +30,26 @@ suspend fun main(args: Array) { description = "The key of the exported project" ).required() - val path: String? by parser.option(ArgType.String, description = "Target directory. Default is './output/project-key'.") + val path: String? by parser.option( + ArgType.String, + description = "Target directory. Default is './output/project-key'." + ) val folderId: String? by parser.option( ArgType.String, description = "FolderId for the folder to export. By default uses project root." ) + val generateHtml by parser.option( + ArgType.Boolean, + description = "If defined, generate HTML directory and zip" + ).default(false) + + val htmlOutputPath by parser.option( + ArgType.String, + description = "Path for html output directory sibling to 'output' directory" + ).default("html") + val clientId by parser.option( ArgType.String, description = "Space application client ID (if not defined, use environment value 'space.clientId')" @@ -48,7 +62,7 @@ suspend fun main(args: Array) { parser.parse(args) - val target: Path = path?.let { Path(it) } ?: Path.of("output/$project") + val target: Path = path?.let { Path(it) } ?: folderId?.let { Path("output") } ?: Path("output/$project") Files.createDirectories(target) @@ -69,5 +83,8 @@ suspend fun main(args: Array) { ProjectIdentifier.Key(project), folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root ) + if (generateHtml) { + generateHtml(target, target.resolveSibling(htmlOutputPath)) + } } } \ No newline at end of file diff --git a/src/main/kotlin/process.kt b/src/main/kotlin/process.kt index 2b8fcb0..a29f72b 100644 --- a/src/main/kotlin/process.kt +++ b/src/main/kotlin/process.kt @@ -3,7 +3,6 @@ package ru.mipt.npm.space.documentextractor import io.ktor.client.request.header import io.ktor.client.request.request import io.ktor.client.request.url -import io.ktor.client.statement.HttpResponse import io.ktor.client.statement.bodyAsChannel import io.ktor.client.statement.readBytes import io.ktor.http.HttpHeaders @@ -23,7 +22,7 @@ import java.nio.file.Path import kotlin.io.path.* import kotlin.streams.toList -private val logger by lazy { LoggerFactory.getLogger("space-extractor") } +internal val logger by lazy { LoggerFactory.getLogger("space-extractor") } /** * Extract single attachment image @@ -70,18 +69,19 @@ private val regex = """!\[(?.*)]\(/d/(?.*)\?f=0""".toRegex() /** * Post-process a markdown document by downloading images and replacing links */ -internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope{ +internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope { val documentBody = path.readText() val logger = LoggerFactory.getLogger("space-document-extractor") logger.info("Processing file $path...") val newText = documentBody.replace(regex) { val id = it.groups["id"]?.value ?: error("Unexpected reference format: ${it.value}") val alt = it.groups["alt"]?.value - logger.info("Downloading image $id as images/$id") + val imageName = alt ?: id + logger.info("Downloading image $id as images/$imageName") launch(Dispatchers.IO) { - extractImage(path.parent, id, id) + extractImage(path.parent, id, imageName) } - "![$alt](images/$id" + "![$alt](images/$imageName" } path.writeText(newText) } @@ -117,10 +117,12 @@ internal suspend fun SpaceClient.downloadDocument( extractFile(directory, document.id, document.title) } } + is TextDocument -> { val markdownFilePath = directory.resolve(document.title + ".md") markdownFilePath.writeText(body.text, Charsets.UTF_8) } + else -> { LoggerFactory.getLogger("space-extractor") .warn("Can't extract document ${document.title} with type ${document.bodyType}") @@ -165,7 +167,7 @@ suspend fun SpaceClient.downloadAndProcessDocumentsInProject( directory: Path, projectId: ProjectIdentifier, rootFolder: FolderIdentifier = FolderIdentifier.Root, -) = withContext(Dispatchers.IO){ +) = withContext(Dispatchers.IO) { logger.info("Processing project ${projectId.compactId} to $directory") downloadDocumentFolder(directory, projectId, rootFolder) processMarkdownInDirectory(directory) diff --git a/src/main/resources/links-to-html.lua b/src/main/resources/links-to-html.lua new file mode 100644 index 0000000..0e203ea --- /dev/null +++ b/src/main/resources/links-to-html.lua @@ -0,0 +1,5 @@ +# links-to-html.lua +function Link(el) + el.target = string.gsub(el.target, "%.md", ".html") + return el +end \ No newline at end of file