Add pandoc conversion
This commit is contained in:
parent
62f6436cf6
commit
3c1a1bd99d
53
src/main/kotlin/html.kt
Normal file
53
src/main/kotlin/html.kt
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package ru.mipt.npm.space.documentextractor
|
||||||
|
|
||||||
|
import java.nio.file.Path
|
||||||
|
import java.nio.file.StandardOpenOption
|
||||||
|
import java.nio.file.attribute.FileAttribute
|
||||||
|
import kotlin.io.path.*
|
||||||
|
|
||||||
|
@OptIn(ExperimentalPathApi::class)
|
||||||
|
fun generateHtml(inputPath: Path, outputPath: Path) {
|
||||||
|
val scriptPath = inputPath.resolveSibling("scripts").resolve("links-to-html.lua")
|
||||||
|
if(!scriptPath.exists()) {
|
||||||
|
scriptPath.parent.createDirectories()
|
||||||
|
scriptPath.writeText(
|
||||||
|
{}.javaClass.getResource("/links-to-html.lua")!!.readText(),
|
||||||
|
Charsets.UTF_8,
|
||||||
|
StandardOpenOption.CREATE
|
||||||
|
)
|
||||||
|
}
|
||||||
|
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
|
||||||
|
if (source.isRegularFile() && source.extension == "md") {
|
||||||
|
val targetPath = outputPath.resolve(source.fileName.nameWithoutExtension + ".html")
|
||||||
|
|
||||||
|
ProcessBuilder(
|
||||||
|
"pandoc",
|
||||||
|
"--standalone",
|
||||||
|
"--mathjax",
|
||||||
|
"--metadata=title: ${source.nameWithoutExtension}",
|
||||||
|
"--from=markdown",
|
||||||
|
"--to=html5",
|
||||||
|
"--lua-filter=${scriptPath.absolute()}",
|
||||||
|
"--output=${targetPath.absolute()}",
|
||||||
|
"${source.absolute()}",
|
||||||
|
|
||||||
|
).also {
|
||||||
|
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
||||||
|
}.inheritIO().start().waitFor()
|
||||||
|
CopyActionResult.CONTINUE
|
||||||
|
} else {
|
||||||
|
source.copyToIgnoringExistingDirectory(target, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ZipOutputStream(zipFileName.outputStream().buffered()).use { zipStream ->
|
||||||
|
// outputPath.walk().forEach { file ->
|
||||||
|
// val zipEntryPath = file.absolute().relativize(inputPath.absolute())
|
||||||
|
// val entry = ZipEntry("$zipEntryPath${(if (file.isDirectory()) "/" else "")}")
|
||||||
|
// zipStream.putNextEntry(entry)
|
||||||
|
// if (file.isRegularFile()) {
|
||||||
|
// file.inputStream().copyTo(zipStream)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
}
|
@ -3,6 +3,7 @@ package ru.mipt.npm.space.documentextractor
|
|||||||
import io.ktor.client.engine.cio.CIO
|
import io.ktor.client.engine.cio.CIO
|
||||||
import kotlinx.cli.ArgParser
|
import kotlinx.cli.ArgParser
|
||||||
import kotlinx.cli.ArgType
|
import kotlinx.cli.ArgType
|
||||||
|
import kotlinx.cli.default
|
||||||
import kotlinx.cli.required
|
import kotlinx.cli.required
|
||||||
import kotlinx.coroutines.coroutineScope
|
import kotlinx.coroutines.coroutineScope
|
||||||
import space.jetbrains.api.runtime.SpaceAppInstance
|
import space.jetbrains.api.runtime.SpaceAppInstance
|
||||||
@ -29,13 +30,26 @@ suspend fun main(args: Array<String>) {
|
|||||||
description = "The key of the exported project"
|
description = "The key of the exported project"
|
||||||
).required()
|
).required()
|
||||||
|
|
||||||
val path: String? by parser.option(ArgType.String, description = "Target directory. Default is './output/project-key'.")
|
val path: String? by parser.option(
|
||||||
|
ArgType.String,
|
||||||
|
description = "Target directory. Default is './output/project-key'."
|
||||||
|
)
|
||||||
|
|
||||||
val folderId: String? by parser.option(
|
val folderId: String? by parser.option(
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
description = "FolderId for the folder to export. By default uses project root."
|
description = "FolderId for the folder to export. By default uses project root."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
val generateHtml by parser.option(
|
||||||
|
ArgType.Boolean,
|
||||||
|
description = "If defined, generate HTML directory and zip"
|
||||||
|
).default(false)
|
||||||
|
|
||||||
|
val htmlOutputPath by parser.option(
|
||||||
|
ArgType.String,
|
||||||
|
description = "Path for html output directory sibling to 'output' directory"
|
||||||
|
).default("html")
|
||||||
|
|
||||||
val clientId by parser.option(
|
val clientId by parser.option(
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
description = "Space application client ID (if not defined, use environment value 'space.clientId')"
|
description = "Space application client ID (if not defined, use environment value 'space.clientId')"
|
||||||
@ -48,7 +62,7 @@ suspend fun main(args: Array<String>) {
|
|||||||
|
|
||||||
parser.parse(args)
|
parser.parse(args)
|
||||||
|
|
||||||
val target: Path = path?.let { Path(it) } ?: Path.of("output/$project")
|
val target: Path = path?.let { Path(it) } ?: folderId?.let { Path("output") } ?: Path("output/$project")
|
||||||
|
|
||||||
Files.createDirectories(target)
|
Files.createDirectories(target)
|
||||||
|
|
||||||
@ -69,5 +83,8 @@ suspend fun main(args: Array<String>) {
|
|||||||
ProjectIdentifier.Key(project),
|
ProjectIdentifier.Key(project),
|
||||||
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
|
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
|
||||||
)
|
)
|
||||||
|
if (generateHtml) {
|
||||||
|
generateHtml(target, target.resolveSibling(htmlOutputPath))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -3,7 +3,6 @@ package ru.mipt.npm.space.documentextractor
|
|||||||
import io.ktor.client.request.header
|
import io.ktor.client.request.header
|
||||||
import io.ktor.client.request.request
|
import io.ktor.client.request.request
|
||||||
import io.ktor.client.request.url
|
import io.ktor.client.request.url
|
||||||
import io.ktor.client.statement.HttpResponse
|
|
||||||
import io.ktor.client.statement.bodyAsChannel
|
import io.ktor.client.statement.bodyAsChannel
|
||||||
import io.ktor.client.statement.readBytes
|
import io.ktor.client.statement.readBytes
|
||||||
import io.ktor.http.HttpHeaders
|
import io.ktor.http.HttpHeaders
|
||||||
@ -23,7 +22,7 @@ import java.nio.file.Path
|
|||||||
import kotlin.io.path.*
|
import kotlin.io.path.*
|
||||||
import kotlin.streams.toList
|
import kotlin.streams.toList
|
||||||
|
|
||||||
private val logger by lazy { LoggerFactory.getLogger("space-extractor") }
|
internal val logger by lazy { LoggerFactory.getLogger("space-extractor") }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract single attachment image
|
* Extract single attachment image
|
||||||
@ -70,18 +69,19 @@ private val regex = """!\[(?<alt>.*)]\(/d/(?<id>.*)\?f=0""".toRegex()
|
|||||||
/**
|
/**
|
||||||
* Post-process a markdown document by downloading images and replacing links
|
* Post-process a markdown document by downloading images and replacing links
|
||||||
*/
|
*/
|
||||||
internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope{
|
internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope {
|
||||||
val documentBody = path.readText()
|
val documentBody = path.readText()
|
||||||
val logger = LoggerFactory.getLogger("space-document-extractor")
|
val logger = LoggerFactory.getLogger("space-document-extractor")
|
||||||
logger.info("Processing file $path...")
|
logger.info("Processing file $path...")
|
||||||
val newText = documentBody.replace(regex) {
|
val newText = documentBody.replace(regex) {
|
||||||
val id = it.groups["id"]?.value ?: error("Unexpected reference format: ${it.value}")
|
val id = it.groups["id"]?.value ?: error("Unexpected reference format: ${it.value}")
|
||||||
val alt = it.groups["alt"]?.value
|
val alt = it.groups["alt"]?.value
|
||||||
logger.info("Downloading image $id as images/$id")
|
val imageName = alt ?: id
|
||||||
|
logger.info("Downloading image $id as images/$imageName")
|
||||||
launch(Dispatchers.IO) {
|
launch(Dispatchers.IO) {
|
||||||
extractImage(path.parent, id, id)
|
extractImage(path.parent, id, imageName)
|
||||||
}
|
}
|
||||||
"![$alt](images/$id"
|
"![$alt](images/$imageName"
|
||||||
}
|
}
|
||||||
path.writeText(newText)
|
path.writeText(newText)
|
||||||
}
|
}
|
||||||
@ -117,10 +117,12 @@ internal suspend fun SpaceClient.downloadDocument(
|
|||||||
extractFile(directory, document.id, document.title)
|
extractFile(directory, document.id, document.title)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
is TextDocument -> {
|
is TextDocument -> {
|
||||||
val markdownFilePath = directory.resolve(document.title + ".md")
|
val markdownFilePath = directory.resolve(document.title + ".md")
|
||||||
markdownFilePath.writeText(body.text, Charsets.UTF_8)
|
markdownFilePath.writeText(body.text, Charsets.UTF_8)
|
||||||
}
|
}
|
||||||
|
|
||||||
else -> {
|
else -> {
|
||||||
LoggerFactory.getLogger("space-extractor")
|
LoggerFactory.getLogger("space-extractor")
|
||||||
.warn("Can't extract document ${document.title} with type ${document.bodyType}")
|
.warn("Can't extract document ${document.title} with type ${document.bodyType}")
|
||||||
@ -165,7 +167,7 @@ suspend fun SpaceClient.downloadAndProcessDocumentsInProject(
|
|||||||
directory: Path,
|
directory: Path,
|
||||||
projectId: ProjectIdentifier,
|
projectId: ProjectIdentifier,
|
||||||
rootFolder: FolderIdentifier = FolderIdentifier.Root,
|
rootFolder: FolderIdentifier = FolderIdentifier.Root,
|
||||||
) = withContext(Dispatchers.IO){
|
) = withContext(Dispatchers.IO) {
|
||||||
logger.info("Processing project ${projectId.compactId} to $directory")
|
logger.info("Processing project ${projectId.compactId} to $directory")
|
||||||
downloadDocumentFolder(directory, projectId, rootFolder)
|
downloadDocumentFolder(directory, projectId, rootFolder)
|
||||||
processMarkdownInDirectory(directory)
|
processMarkdownInDirectory(directory)
|
||||||
|
5
src/main/resources/links-to-html.lua
Normal file
5
src/main/resources/links-to-html.lua
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# links-to-html.lua
|
||||||
|
function Link(el)
|
||||||
|
el.target = string.gsub(el.target, "%.md", ".html")
|
||||||
|
return el
|
||||||
|
end
|
Loading…
Reference in New Issue
Block a user