Use full link instead of fragments

This commit is contained in:
Alexander Nozik 2023-07-03 10:03:36 +03:00
parent 0d158269a0
commit da6ba9ab01
6 changed files with 190 additions and 80 deletions

View File

@ -7,7 +7,7 @@ plugins {
id("com.github.johnrengelman.shadow") version "8.1.1"
}
group = "ru.mipt.npm"
group = "center.sciprog"
version = "1.0-SNAPSHOT"
repositories {
@ -15,9 +15,13 @@ repositories {
maven("https://maven.pkg.jetbrains.space/public/p/space/maven")
}
val ktorVersion = "2.3.1"
dependencies {
implementation("org.jetbrains:space-sdk-jvm:159302-beta")
implementation("io.ktor:ktor-client-cio-jvm:2.3.1")
implementation("org.jetbrains:space-sdk-jvm:163093-beta")
implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion")
implementation("io.ktor:ktor-server-core-jvm:$ktorVersion")
implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion")
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
implementation("ch.qos.logback:logback-classic:1.4.8")
testImplementation(kotlin("test"))
@ -32,7 +36,7 @@ kotlin {
}
application {
mainClass.set("ru.mipt.npm.space.documentextractor.MainKt")
mainClass.set("center.sciprog.space.documentextractor.MainKt")
}
tasks.withType<ShadowJar>{

View File

@ -1,11 +1,11 @@
package ru.mipt.npm.space.documentextractor
package center.sciprog.space.documentextractor
import java.nio.file.Path
import java.nio.file.StandardOpenOption
import kotlin.io.path.*
internal fun prepareScripts(inputPath: Path): Path {
val scriptPath = inputPath.resolveSibling("scripts").resolve("links-to-html.lua")
internal fun prepareScripts(outputPath: Path): Path {
val scriptPath = outputPath.resolveSibling("@scripts").resolve("links-to-html.lua")
if (!scriptPath.exists()) {
scriptPath.parent.createDirectories()
scriptPath.writeText(
@ -18,27 +18,20 @@ internal fun prepareScripts(inputPath: Path): Path {
}
/**
* Convert a directory of markdown files to docx, copying other files as is.
*/
@OptIn(ExperimentalPathApi::class)
fun convert(inputPath: Path, outputPath: Path) {
val scriptPath = prepareScripts(inputPath)
fun convertToHtml(inputPath: Path, outputPath: Path, indexFileName: String = "index") {
val scriptPath = prepareScripts(outputPath)
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
if (source.isRegularFile() && source.extension == "md") {
val docxPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".docx")
val htmlFileName = source.fileName.nameWithoutExtension.let {
if (it == indexFileName) "index" else it
}
ProcessBuilder(
"pandoc",
"--from=markdown",
"--to=docx",
"--lua-filter=${scriptPath.absolute()}",
"--output=${docxPath.absolute()}",
"${source.absolute()}",
).also {
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
val htmlPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".html")
val htmlPath = target.parent.resolve("$htmlFileName.html")
ProcessBuilder(
"pandoc",
@ -50,10 +43,9 @@ fun convert(inputPath: Path, outputPath: Path) {
"--lua-filter=${scriptPath.absolute()}",
"--output=${htmlPath.absolute()}",
"${source.absolute()}",
).also {
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
}.inheritIO().start().waitFor()
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
CopyActionResult.CONTINUE
@ -61,24 +53,33 @@ fun convert(inputPath: Path, outputPath: Path) {
source.copyToIgnoringExistingDirectory(target, false)
}
}
}
// inputPath.walk().filter { it.extension == "md" }.forEach { source ->
//
// val docxPath =
// (outputPath / source.relativize(inputPath)).resolveSibling(source.fileName.nameWithoutExtension + ".docx")
//
// ProcessBuilder(
// "pandoc",
// "--from=markdown",
// "--to=docx",
// "--lua-filter=${scriptPath.absolute()}",
// "--output=${docxPath.absolute()}",
// "${source.absolute()}",
//
// ).also {
// logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
// }.inheritIO().start().waitFor()
// }
/**
* Convert a directory of markdown files to docx files, ignoring other files
*/
@OptIn(ExperimentalPathApi::class)
fun convertToDocX(inputPath: Path, outputPath: Path) {
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
if (source.isRegularFile() && source.extension == "md") {
val docxPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".docx")
ProcessBuilder(
"pandoc",
"--standalone",
"--from=markdown",
"--to=docx",
"--output=${docxPath.absolute()}",
"${source.absolute()}",
).also {
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
CopyActionResult.CONTINUE
} else {
source.copyToIgnoringExistingDirectory(target, false)
}
}
}

View File

@ -1,4 +1,4 @@
package ru.mipt.npm.space.documentextractor
package center.sciprog.space.documentextractor
import io.ktor.client.engine.cio.CIO
import kotlinx.cli.ArgParser
@ -16,39 +16,43 @@ import space.jetbrains.api.runtime.types.ProjectIdentifier
import java.nio.file.Files
import java.nio.file.Path
import kotlin.io.path.Path
import kotlin.io.path.createDirectories
internal val urlRegex =
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
suspend fun main(args: Array<String>) {
val parser = ArgParser("space-document-extractor")
val spaceUrl by parser.option(
val url by parser.option(
ArgType.String,
description = "Url of the space instance like 'https://mipt-npm.jetbrains.space'"
).required()
val project by parser.option(
ArgType.String,
description = "The key of the exported project"
description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'"
).required()
val path: String? by parser.option(
ArgType.String,
description = "Target directory. Default is './output/project-key'."
description = "Target directory. Default is './markdown/<id>'."
)
val folderId: String? by parser.option(
ArgType.String,
description = "FolderId for the folder to export. By default uses project root."
)
val convert by parser.option(
val html by parser.option(
ArgType.Boolean,
description = "If defined, convert result to HTML and DOCX on download"
description = "Convert Markdown to HTML via pandoc"
).default(false)
val convertOutputPath by parser.option(
val htmlPath by parser.option(
ArgType.String,
description = "Path for html and docx output directory sibling to 'output' directory"
).default("converted")
description = "Path for html output. Default is './html/<id>"
)
val docx by parser.option(
ArgType.Boolean,
description = "Convert Markdown to DOCX via pandoc"
).default(false)
val docxPath by parser.option(
ArgType.String,
description = "Path for docx output. Default is './docx/<id>"
)
val clientId by parser.option(
ArgType.String,
@ -62,29 +66,49 @@ suspend fun main(args: Array<String>) {
parser.parse(args)
val target: Path = path?.let { Path(it) } ?: folderId?.let { Path("output") } ?: Path("output/$project")
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
Files.createDirectories(target)
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
val space: SpaceClient = SpaceClient(
ktorClientForSpace(CIO),
SpaceAppInstance(
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
val folderId = urlMatch.groups["folderId"]?.value ?: error("Folder or document token not recognized")
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
Files.createDirectories(markdownPath)
val appInstance = SpaceAppInstance(
clientId ?: System.getProperty("space.clientId"),
clientSecret ?: System.getProperty("space.clientSecret"),
spaceUrl
),
)
val spaceClient: SpaceClient = SpaceClient(
ktorClientForSpace(CIO),
appInstance,
SpaceAuth.ClientCredentials()
)
coroutineScope {
println("Processing project \"${space.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
space.downloadAndProcessDocumentsInProject(
target,
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
spaceClient.downloadAndProcessDocumentsInProject(
markdownPath,
ProjectIdentifier.Key(project),
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
FolderIdentifier.Id(folderId)
)
if (convert) {
convert(target, target.resolveSibling(convertOutputPath))
if (html) {
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
?: Path(htmlPath ?: "html/$folderId")
htmlTargetPath.createDirectories()
convertToHtml(markdownPath, htmlTargetPath)
}
if (docx) {
val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx")
?: Path(docxPath ?: "docx/$folderId")
docxTargetPath.createDirectories()
convertToDocX(markdownPath, docxTargetPath)
}
}
}

View File

@ -1,4 +1,4 @@
package ru.mipt.npm.space.documentextractor
package center.sciprog.space.documentextractor
import io.ktor.client.request.header
import io.ktor.client.request.request
@ -67,7 +67,7 @@ internal suspend fun SpaceClient.extractFile(
private val regex = """!\[(?<alt>.*)]\(/d/(?<id>.*)\?f=0""".toRegex()
/**
* Post-process a markdown document by downloading images and replacing links
* Post-process a Markdown document by downloading images and replacing links
*/
internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope {
val documentBody = path.readText()
@ -87,7 +87,11 @@ internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutine
}
/**
* Download images for markdown documents in the directory
* Download images for Markdown documents in the directory
*
* Images are always stored in the same directory as files themselves
*
* @param recursive turn recursive mode on or off
*/
internal suspend fun SpaceClient.processMarkdownInDirectory(
path: Path,
@ -162,6 +166,8 @@ internal suspend fun SpaceClient.downloadDocumentFolder(
/**
* Download all documents in a project or a folder with given [rootFolder] and postprocess files
*
* @param directory target directory
*/
suspend fun SpaceClient.downloadAndProcessDocumentsInProject(
directory: Path,

76
src/main/kotlin/server.kt Normal file
View File

@ -0,0 +1,76 @@
package center.sciprog.space.documentextractor
//
//import io.ktor.http.HttpStatusCode
//import io.ktor.server.application.Application
//import io.ktor.server.application.call
//import io.ktor.server.cio.CIO
//import io.ktor.server.engine.embeddedServer
//import io.ktor.server.request.*
//import io.ktor.server.response.respond
//import io.ktor.server.routing.*
//import space.jetbrains.api.runtime.SpaceAppInstance
//import space.jetbrains.api.runtime.SpaceAuth
//import space.jetbrains.api.runtime.SpaceClient
//import space.jetbrains.api.runtime.helpers.readPayload
//import space.jetbrains.api.runtime.helpers.verifyWithPublicKey
//import space.jetbrains.api.runtime.ktorClientForSpace
//import space.jetbrains.api.runtime.types.ListCommandsPayload
//import space.jetbrains.api.runtime.types.MessagePayload
//
//fun Application.configureRouting(spaceClient: SpaceClient) {
// val appInstance = SpaceAppInstance(
// environment.config.property("space.clientId"),
//
// clientSecret ?: System.getProperty("space.clientSecret"),
// spaceUrl
// )
//
//
// val spaceClient: SpaceClient = SpaceClient(
// ktorClientForSpace(io.ktor.client.engine.cio.CIO),
// appInstance,
// SpaceAuth.ClientCredentials()
// )
//
// routing {
// post("api/space") {
// // read request body
// val body = call.receiveText()
//
// // read headers required for Space verification
// val signature = call.request.header("X-Space-Public-Key-Signature")
// val timestamp = call.request.header("X-Space-Timestamp")?.toLongOrNull()
// // verifyWithPublicKey gets a key from Space, uses it to generate message hash
// // and compares the generated hash to the hash in a message
// if (signature.isNullOrBlank() || timestamp == null || !spaceClient.verifyWithPublicKey(
// body, timestamp, signature
// )
// ) {
// call.respond(HttpStatusCode.Unauthorized)
// return@post
// }
//
// // analyze the message payload
// // MessagePayload = user sends a command
// // ListCommandsPayload = user types a slash or a char
// when (val payload = readPayload(body)) {
// is MessagePayload -> {
// runHelpCommand(payload)
// call.respond(HttpStatusCode.OK, "")
// }
//
// is ListCommandsPayload -> {
//
// }
// }
// }
// }
//}
//
//fun main() {
// embeddedServer(CIO, port = 8080) {
// val
//
// configureRouting()
// }.start(wait = true)
//}

View File

@ -1,4 +1,3 @@
# links-to-html.lua
function Link(el)
el.target = string.gsub(el.target, "%.md", ".html")
return el