From da6ba9ab0193e2b58ab286f426d4a5fcc932f8df Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Mon, 3 Jul 2023 10:03:36 +0300 Subject: [PATCH] Use full link instead of fragments --- build.gradle.kts | 12 ++-- src/main/kotlin/converters.kt | 79 ++++++++++++------------ src/main/kotlin/main.kt | 90 ++++++++++++++++++---------- src/main/kotlin/process.kt | 12 +++- src/main/kotlin/server.kt | 76 +++++++++++++++++++++++ src/main/resources/links-to-html.lua | 1 - 6 files changed, 190 insertions(+), 80 deletions(-) create mode 100644 src/main/kotlin/server.kt diff --git a/build.gradle.kts b/build.gradle.kts index 706861a..459451a 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -7,7 +7,7 @@ plugins { id("com.github.johnrengelman.shadow") version "8.1.1" } -group = "ru.mipt.npm" +group = "center.sciprog" version = "1.0-SNAPSHOT" repositories { @@ -15,9 +15,13 @@ repositories { maven("https://maven.pkg.jetbrains.space/public/p/space/maven") } +val ktorVersion = "2.3.1" + dependencies { - implementation("org.jetbrains:space-sdk-jvm:159302-beta") - implementation("io.ktor:ktor-client-cio-jvm:2.3.1") + implementation("org.jetbrains:space-sdk-jvm:163093-beta") + implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion") + implementation("io.ktor:ktor-server-core-jvm:$ktorVersion") + implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion") implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5") implementation("ch.qos.logback:logback-classic:1.4.8") testImplementation(kotlin("test")) @@ -32,7 +36,7 @@ kotlin { } application { - mainClass.set("ru.mipt.npm.space.documentextractor.MainKt") + mainClass.set("center.sciprog.space.documentextractor.MainKt") } tasks.withType{ diff --git a/src/main/kotlin/converters.kt b/src/main/kotlin/converters.kt index 6f44554..9f65c42 100644 --- a/src/main/kotlin/converters.kt +++ b/src/main/kotlin/converters.kt @@ -1,11 +1,11 @@ -package ru.mipt.npm.space.documentextractor +package center.sciprog.space.documentextractor import java.nio.file.Path import java.nio.file.StandardOpenOption import kotlin.io.path.* -internal fun prepareScripts(inputPath: Path): Path { - val scriptPath = inputPath.resolveSibling("scripts").resolve("links-to-html.lua") +internal fun prepareScripts(outputPath: Path): Path { + val scriptPath = outputPath.resolveSibling("@scripts").resolve("links-to-html.lua") if (!scriptPath.exists()) { scriptPath.parent.createDirectories() scriptPath.writeText( @@ -18,27 +18,20 @@ internal fun prepareScripts(inputPath: Path): Path { } +/** + * Convert a directory of markdown files to docx, copying other files as is. + */ @OptIn(ExperimentalPathApi::class) -fun convert(inputPath: Path, outputPath: Path) { - val scriptPath = prepareScripts(inputPath) +fun convertToHtml(inputPath: Path, outputPath: Path, indexFileName: String = "index") { + val scriptPath = prepareScripts(outputPath) inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path -> if (source.isRegularFile() && source.extension == "md") { - val docxPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".docx") + val htmlFileName = source.fileName.nameWithoutExtension.let { + if (it == indexFileName) "index" else it + } - ProcessBuilder( - "pandoc", - "--from=markdown", - "--to=docx", - "--lua-filter=${scriptPath.absolute()}", - "--output=${docxPath.absolute()}", - "${source.absolute()}", - - ).also { - logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}") - }.directory(source.parent.toFile()).inheritIO().start().waitFor() - - val htmlPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".html") + val htmlPath = target.parent.resolve("$htmlFileName.html") ProcessBuilder( "pandoc", @@ -50,10 +43,9 @@ fun convert(inputPath: Path, outputPath: Path) { "--lua-filter=${scriptPath.absolute()}", "--output=${htmlPath.absolute()}", "${source.absolute()}", - - ).also { + ).also { logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}") - }.inheritIO().start().waitFor() + }.directory(source.parent.toFile()).inheritIO().start().waitFor() CopyActionResult.CONTINUE @@ -61,24 +53,33 @@ fun convert(inputPath: Path, outputPath: Path) { source.copyToIgnoringExistingDirectory(target, false) } } +} -// inputPath.walk().filter { it.extension == "md" }.forEach { source -> -// -// val docxPath = -// (outputPath / source.relativize(inputPath)).resolveSibling(source.fileName.nameWithoutExtension + ".docx") -// -// ProcessBuilder( -// "pandoc", -// "--from=markdown", -// "--to=docx", -// "--lua-filter=${scriptPath.absolute()}", -// "--output=${docxPath.absolute()}", -// "${source.absolute()}", -// -// ).also { -// logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}") -// }.inheritIO().start().waitFor() -// } +/** + * Convert a directory of markdown files to docx files, ignoring other files + */ +@OptIn(ExperimentalPathApi::class) +fun convertToDocX(inputPath: Path, outputPath: Path) { + inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path -> + if (source.isRegularFile() && source.extension == "md") { + + val docxPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".docx") + + ProcessBuilder( + "pandoc", + "--standalone", + "--from=markdown", + "--to=docx", + "--output=${docxPath.absolute()}", + "${source.absolute()}", + ).also { + logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}") + }.directory(source.parent.toFile()).inheritIO().start().waitFor() + CopyActionResult.CONTINUE + } else { + source.copyToIgnoringExistingDirectory(target, false) + } + } } diff --git a/src/main/kotlin/main.kt b/src/main/kotlin/main.kt index e94562d..612a855 100644 --- a/src/main/kotlin/main.kt +++ b/src/main/kotlin/main.kt @@ -1,4 +1,4 @@ -package ru.mipt.npm.space.documentextractor +package center.sciprog.space.documentextractor import io.ktor.client.engine.cio.CIO import kotlinx.cli.ArgParser @@ -16,39 +16,43 @@ import space.jetbrains.api.runtime.types.ProjectIdentifier import java.nio.file.Files import java.nio.file.Path import kotlin.io.path.Path +import kotlin.io.path.createDirectories + +internal val urlRegex = + """(?https?:\/\/[^\/]*)\/p\/(?[^\/]*)\/.*-(?.*)${'$'}""".toRegex() suspend fun main(args: Array) { val parser = ArgParser("space-document-extractor") - val spaceUrl by parser.option( + val url by parser.option( ArgType.String, - description = "Url of the space instance like 'https://mipt-npm.jetbrains.space'" - ).required() - - val project by parser.option( - ArgType.String, - description = "The key of the exported project" + description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'" ).required() val path: String? by parser.option( ArgType.String, - description = "Target directory. Default is './output/project-key'." + description = "Target directory. Default is './markdown/'." ) - val folderId: String? by parser.option( - ArgType.String, - description = "FolderId for the folder to export. By default uses project root." - ) - - val convert by parser.option( + val html by parser.option( ArgType.Boolean, - description = "If defined, convert result to HTML and DOCX on download" + description = "Convert Markdown to HTML via pandoc" ).default(false) - val convertOutputPath by parser.option( + val htmlPath by parser.option( ArgType.String, - description = "Path for html and docx output directory sibling to 'output' directory" - ).default("converted") + description = "Path for html output. Default is './html/" + ) + + val docx by parser.option( + ArgType.Boolean, + description = "Convert Markdown to DOCX via pandoc" + ).default(false) + + val docxPath by parser.option( + ArgType.String, + description = "Path for docx output. Default is './docx/" + ) val clientId by parser.option( ArgType.String, @@ -62,29 +66,49 @@ suspend fun main(args: Array) { parser.parse(args) - val target: Path = path?.let { Path(it) } ?: folderId?.let { Path("output") } ?: Path("output/$project") + val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern") - Files.createDirectories(target) + val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized") - val space: SpaceClient = SpaceClient( + val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized") + + val folderId = urlMatch.groups["folderId"]?.value ?: error("Folder or document token not recognized") + + val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId") + + Files.createDirectories(markdownPath) + + val appInstance = SpaceAppInstance( + clientId ?: System.getProperty("space.clientId"), + clientSecret ?: System.getProperty("space.clientSecret"), + spaceUrl + ) + + + val spaceClient: SpaceClient = SpaceClient( ktorClientForSpace(CIO), - SpaceAppInstance( - clientId ?: System.getProperty("space.clientId"), - clientSecret ?: System.getProperty("space.clientSecret"), - spaceUrl - ), + appInstance, SpaceAuth.ClientCredentials() ) coroutineScope { - println("Processing project \"${space.projects.getProject(ProjectIdentifier.Key(project)).name}\"") - space.downloadAndProcessDocumentsInProject( - target, + println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"") + spaceClient.downloadAndProcessDocumentsInProject( + markdownPath, ProjectIdentifier.Key(project), - folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root + FolderIdentifier.Id(folderId) ) - if (convert) { - convert(target, target.resolveSibling(convertOutputPath)) + if (html) { + val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html") + ?: Path(htmlPath ?: "html/$folderId") + htmlTargetPath.createDirectories() + convertToHtml(markdownPath, htmlTargetPath) + } + if (docx) { + val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx") + ?: Path(docxPath ?: "docx/$folderId") + docxTargetPath.createDirectories() + convertToDocX(markdownPath, docxTargetPath) } } } \ No newline at end of file diff --git a/src/main/kotlin/process.kt b/src/main/kotlin/process.kt index a29f72b..a676da1 100644 --- a/src/main/kotlin/process.kt +++ b/src/main/kotlin/process.kt @@ -1,4 +1,4 @@ -package ru.mipt.npm.space.documentextractor +package center.sciprog.space.documentextractor import io.ktor.client.request.header import io.ktor.client.request.request @@ -67,7 +67,7 @@ internal suspend fun SpaceClient.extractFile( private val regex = """!\[(?.*)]\(/d/(?.*)\?f=0""".toRegex() /** - * Post-process a markdown document by downloading images and replacing links + * Post-process a Markdown document by downloading images and replacing links */ internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope { val documentBody = path.readText() @@ -87,7 +87,11 @@ internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutine } /** - * Download images for markdown documents in the directory + * Download images for Markdown documents in the directory + * + * Images are always stored in the same directory as files themselves + * + * @param recursive turn recursive mode on or off */ internal suspend fun SpaceClient.processMarkdownInDirectory( path: Path, @@ -162,6 +166,8 @@ internal suspend fun SpaceClient.downloadDocumentFolder( /** * Download all documents in a project or a folder with given [rootFolder] and postprocess files + * + * @param directory target directory */ suspend fun SpaceClient.downloadAndProcessDocumentsInProject( directory: Path, diff --git a/src/main/kotlin/server.kt b/src/main/kotlin/server.kt new file mode 100644 index 0000000..63b637e --- /dev/null +++ b/src/main/kotlin/server.kt @@ -0,0 +1,76 @@ +package center.sciprog.space.documentextractor +// +//import io.ktor.http.HttpStatusCode +//import io.ktor.server.application.Application +//import io.ktor.server.application.call +//import io.ktor.server.cio.CIO +//import io.ktor.server.engine.embeddedServer +//import io.ktor.server.request.* +//import io.ktor.server.response.respond +//import io.ktor.server.routing.* +//import space.jetbrains.api.runtime.SpaceAppInstance +//import space.jetbrains.api.runtime.SpaceAuth +//import space.jetbrains.api.runtime.SpaceClient +//import space.jetbrains.api.runtime.helpers.readPayload +//import space.jetbrains.api.runtime.helpers.verifyWithPublicKey +//import space.jetbrains.api.runtime.ktorClientForSpace +//import space.jetbrains.api.runtime.types.ListCommandsPayload +//import space.jetbrains.api.runtime.types.MessagePayload +// +//fun Application.configureRouting(spaceClient: SpaceClient) { +// val appInstance = SpaceAppInstance( +// environment.config.property("space.clientId"), +// +// clientSecret ?: System.getProperty("space.clientSecret"), +// spaceUrl +// ) +// +// +// val spaceClient: SpaceClient = SpaceClient( +// ktorClientForSpace(io.ktor.client.engine.cio.CIO), +// appInstance, +// SpaceAuth.ClientCredentials() +// ) +// +// routing { +// post("api/space") { +// // read request body +// val body = call.receiveText() +// +// // read headers required for Space verification +// val signature = call.request.header("X-Space-Public-Key-Signature") +// val timestamp = call.request.header("X-Space-Timestamp")?.toLongOrNull() +// // verifyWithPublicKey gets a key from Space, uses it to generate message hash +// // and compares the generated hash to the hash in a message +// if (signature.isNullOrBlank() || timestamp == null || !spaceClient.verifyWithPublicKey( +// body, timestamp, signature +// ) +// ) { +// call.respond(HttpStatusCode.Unauthorized) +// return@post +// } +// +// // analyze the message payload +// // MessagePayload = user sends a command +// // ListCommandsPayload = user types a slash or a char +// when (val payload = readPayload(body)) { +// is MessagePayload -> { +// runHelpCommand(payload) +// call.respond(HttpStatusCode.OK, "") +// } +// +// is ListCommandsPayload -> { +// +// } +// } +// } +// } +//} +// +//fun main() { +// embeddedServer(CIO, port = 8080) { +// val +// +// configureRouting() +// }.start(wait = true) +//} \ No newline at end of file diff --git a/src/main/resources/links-to-html.lua b/src/main/resources/links-to-html.lua index 0e203ea..a02306a 100644 --- a/src/main/resources/links-to-html.lua +++ b/src/main/resources/links-to-html.lua @@ -1,4 +1,3 @@ -# links-to-html.lua function Link(el) el.target = string.gsub(el.target, "%.md", ".html") return el