Add pandoc conversion
This commit is contained in:
parent
62f6436cf6
commit
3c1a1bd99d
53
src/main/kotlin/html.kt
Normal file
53
src/main/kotlin/html.kt
Normal file
@ -0,0 +1,53 @@
|
||||
package ru.mipt.npm.space.documentextractor
|
||||
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.StandardOpenOption
|
||||
import java.nio.file.attribute.FileAttribute
|
||||
import kotlin.io.path.*
|
||||
|
||||
@OptIn(ExperimentalPathApi::class)
|
||||
fun generateHtml(inputPath: Path, outputPath: Path) {
|
||||
val scriptPath = inputPath.resolveSibling("scripts").resolve("links-to-html.lua")
|
||||
if(!scriptPath.exists()) {
|
||||
scriptPath.parent.createDirectories()
|
||||
scriptPath.writeText(
|
||||
{}.javaClass.getResource("/links-to-html.lua")!!.readText(),
|
||||
Charsets.UTF_8,
|
||||
StandardOpenOption.CREATE
|
||||
)
|
||||
}
|
||||
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
|
||||
if (source.isRegularFile() && source.extension == "md") {
|
||||
val targetPath = outputPath.resolve(source.fileName.nameWithoutExtension + ".html")
|
||||
|
||||
ProcessBuilder(
|
||||
"pandoc",
|
||||
"--standalone",
|
||||
"--mathjax",
|
||||
"--metadata=title: ${source.nameWithoutExtension}",
|
||||
"--from=markdown",
|
||||
"--to=html5",
|
||||
"--lua-filter=${scriptPath.absolute()}",
|
||||
"--output=${targetPath.absolute()}",
|
||||
"${source.absolute()}",
|
||||
|
||||
).also {
|
||||
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
||||
}.inheritIO().start().waitFor()
|
||||
CopyActionResult.CONTINUE
|
||||
} else {
|
||||
source.copyToIgnoringExistingDirectory(target, false)
|
||||
}
|
||||
}
|
||||
|
||||
// ZipOutputStream(zipFileName.outputStream().buffered()).use { zipStream ->
|
||||
// outputPath.walk().forEach { file ->
|
||||
// val zipEntryPath = file.absolute().relativize(inputPath.absolute())
|
||||
// val entry = ZipEntry("$zipEntryPath${(if (file.isDirectory()) "/" else "")}")
|
||||
// zipStream.putNextEntry(entry)
|
||||
// if (file.isRegularFile()) {
|
||||
// file.inputStream().copyTo(zipStream)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
@ -3,6 +3,7 @@ package ru.mipt.npm.space.documentextractor
|
||||
import io.ktor.client.engine.cio.CIO
|
||||
import kotlinx.cli.ArgParser
|
||||
import kotlinx.cli.ArgType
|
||||
import kotlinx.cli.default
|
||||
import kotlinx.cli.required
|
||||
import kotlinx.coroutines.coroutineScope
|
||||
import space.jetbrains.api.runtime.SpaceAppInstance
|
||||
@ -29,13 +30,26 @@ suspend fun main(args: Array<String>) {
|
||||
description = "The key of the exported project"
|
||||
).required()
|
||||
|
||||
val path: String? by parser.option(ArgType.String, description = "Target directory. Default is './output/project-key'.")
|
||||
val path: String? by parser.option(
|
||||
ArgType.String,
|
||||
description = "Target directory. Default is './output/project-key'."
|
||||
)
|
||||
|
||||
val folderId: String? by parser.option(
|
||||
ArgType.String,
|
||||
description = "FolderId for the folder to export. By default uses project root."
|
||||
)
|
||||
|
||||
val generateHtml by parser.option(
|
||||
ArgType.Boolean,
|
||||
description = "If defined, generate HTML directory and zip"
|
||||
).default(false)
|
||||
|
||||
val htmlOutputPath by parser.option(
|
||||
ArgType.String,
|
||||
description = "Path for html output directory sibling to 'output' directory"
|
||||
).default("html")
|
||||
|
||||
val clientId by parser.option(
|
||||
ArgType.String,
|
||||
description = "Space application client ID (if not defined, use environment value 'space.clientId')"
|
||||
@ -48,7 +62,7 @@ suspend fun main(args: Array<String>) {
|
||||
|
||||
parser.parse(args)
|
||||
|
||||
val target: Path = path?.let { Path(it) } ?: Path.of("output/$project")
|
||||
val target: Path = path?.let { Path(it) } ?: folderId?.let { Path("output") } ?: Path("output/$project")
|
||||
|
||||
Files.createDirectories(target)
|
||||
|
||||
@ -69,5 +83,8 @@ suspend fun main(args: Array<String>) {
|
||||
ProjectIdentifier.Key(project),
|
||||
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
|
||||
)
|
||||
if (generateHtml) {
|
||||
generateHtml(target, target.resolveSibling(htmlOutputPath))
|
||||
}
|
||||
}
|
||||
}
|
@ -3,7 +3,6 @@ package ru.mipt.npm.space.documentextractor
|
||||
import io.ktor.client.request.header
|
||||
import io.ktor.client.request.request
|
||||
import io.ktor.client.request.url
|
||||
import io.ktor.client.statement.HttpResponse
|
||||
import io.ktor.client.statement.bodyAsChannel
|
||||
import io.ktor.client.statement.readBytes
|
||||
import io.ktor.http.HttpHeaders
|
||||
@ -23,7 +22,7 @@ import java.nio.file.Path
|
||||
import kotlin.io.path.*
|
||||
import kotlin.streams.toList
|
||||
|
||||
private val logger by lazy { LoggerFactory.getLogger("space-extractor") }
|
||||
internal val logger by lazy { LoggerFactory.getLogger("space-extractor") }
|
||||
|
||||
/**
|
||||
* Extract single attachment image
|
||||
@ -77,11 +76,12 @@ internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutin
|
||||
val newText = documentBody.replace(regex) {
|
||||
val id = it.groups["id"]?.value ?: error("Unexpected reference format: ${it.value}")
|
||||
val alt = it.groups["alt"]?.value
|
||||
logger.info("Downloading image $id as images/$id")
|
||||
val imageName = alt ?: id
|
||||
logger.info("Downloading image $id as images/$imageName")
|
||||
launch(Dispatchers.IO) {
|
||||
extractImage(path.parent, id, id)
|
||||
extractImage(path.parent, id, imageName)
|
||||
}
|
||||
"![$alt](images/$id"
|
||||
"![$alt](images/$imageName"
|
||||
}
|
||||
path.writeText(newText)
|
||||
}
|
||||
@ -117,10 +117,12 @@ internal suspend fun SpaceClient.downloadDocument(
|
||||
extractFile(directory, document.id, document.title)
|
||||
}
|
||||
}
|
||||
|
||||
is TextDocument -> {
|
||||
val markdownFilePath = directory.resolve(document.title + ".md")
|
||||
markdownFilePath.writeText(body.text, Charsets.UTF_8)
|
||||
}
|
||||
|
||||
else -> {
|
||||
LoggerFactory.getLogger("space-extractor")
|
||||
.warn("Can't extract document ${document.title} with type ${document.bodyType}")
|
||||
|
5
src/main/resources/links-to-html.lua
Normal file
5
src/main/resources/links-to-html.lua
Normal file
@ -0,0 +1,5 @@
|
||||
# links-to-html.lua
|
||||
function Link(el)
|
||||
el.target = string.gsub(el.target, "%.md", ".html")
|
||||
return el
|
||||
end
|
Loading…
Reference in New Issue
Block a user