Use full link instead of fragments
This commit is contained in:
parent
0d158269a0
commit
da6ba9ab01
@ -7,7 +7,7 @@ plugins {
|
||||
id("com.github.johnrengelman.shadow") version "8.1.1"
|
||||
}
|
||||
|
||||
group = "ru.mipt.npm"
|
||||
group = "center.sciprog"
|
||||
version = "1.0-SNAPSHOT"
|
||||
|
||||
repositories {
|
||||
@ -15,9 +15,13 @@ repositories {
|
||||
maven("https://maven.pkg.jetbrains.space/public/p/space/maven")
|
||||
}
|
||||
|
||||
val ktorVersion = "2.3.1"
|
||||
|
||||
dependencies {
|
||||
implementation("org.jetbrains:space-sdk-jvm:159302-beta")
|
||||
implementation("io.ktor:ktor-client-cio-jvm:2.3.1")
|
||||
implementation("org.jetbrains:space-sdk-jvm:163093-beta")
|
||||
implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion")
|
||||
implementation("io.ktor:ktor-server-core-jvm:$ktorVersion")
|
||||
implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion")
|
||||
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
|
||||
implementation("ch.qos.logback:logback-classic:1.4.8")
|
||||
testImplementation(kotlin("test"))
|
||||
@ -32,7 +36,7 @@ kotlin {
|
||||
}
|
||||
|
||||
application {
|
||||
mainClass.set("ru.mipt.npm.space.documentextractor.MainKt")
|
||||
mainClass.set("center.sciprog.space.documentextractor.MainKt")
|
||||
}
|
||||
|
||||
tasks.withType<ShadowJar>{
|
||||
|
@ -1,11 +1,11 @@
|
||||
package ru.mipt.npm.space.documentextractor
|
||||
package center.sciprog.space.documentextractor
|
||||
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.StandardOpenOption
|
||||
import kotlin.io.path.*
|
||||
|
||||
internal fun prepareScripts(inputPath: Path): Path {
|
||||
val scriptPath = inputPath.resolveSibling("scripts").resolve("links-to-html.lua")
|
||||
internal fun prepareScripts(outputPath: Path): Path {
|
||||
val scriptPath = outputPath.resolveSibling("@scripts").resolve("links-to-html.lua")
|
||||
if (!scriptPath.exists()) {
|
||||
scriptPath.parent.createDirectories()
|
||||
scriptPath.writeText(
|
||||
@ -18,27 +18,20 @@ internal fun prepareScripts(inputPath: Path): Path {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a directory of markdown files to docx, copying other files as is.
|
||||
*/
|
||||
@OptIn(ExperimentalPathApi::class)
|
||||
fun convert(inputPath: Path, outputPath: Path) {
|
||||
val scriptPath = prepareScripts(inputPath)
|
||||
fun convertToHtml(inputPath: Path, outputPath: Path, indexFileName: String = "index") {
|
||||
val scriptPath = prepareScripts(outputPath)
|
||||
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
|
||||
if (source.isRegularFile() && source.extension == "md") {
|
||||
|
||||
val docxPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".docx")
|
||||
val htmlFileName = source.fileName.nameWithoutExtension.let {
|
||||
if (it == indexFileName) "index" else it
|
||||
}
|
||||
|
||||
ProcessBuilder(
|
||||
"pandoc",
|
||||
"--from=markdown",
|
||||
"--to=docx",
|
||||
"--lua-filter=${scriptPath.absolute()}",
|
||||
"--output=${docxPath.absolute()}",
|
||||
"${source.absolute()}",
|
||||
|
||||
).also {
|
||||
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
||||
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
|
||||
|
||||
val htmlPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".html")
|
||||
val htmlPath = target.parent.resolve("$htmlFileName.html")
|
||||
|
||||
ProcessBuilder(
|
||||
"pandoc",
|
||||
@ -50,10 +43,9 @@ fun convert(inputPath: Path, outputPath: Path) {
|
||||
"--lua-filter=${scriptPath.absolute()}",
|
||||
"--output=${htmlPath.absolute()}",
|
||||
"${source.absolute()}",
|
||||
|
||||
).also {
|
||||
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
||||
}.inheritIO().start().waitFor()
|
||||
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
|
||||
|
||||
|
||||
CopyActionResult.CONTINUE
|
||||
@ -61,24 +53,33 @@ fun convert(inputPath: Path, outputPath: Path) {
|
||||
source.copyToIgnoringExistingDirectory(target, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// inputPath.walk().filter { it.extension == "md" }.forEach { source ->
|
||||
//
|
||||
// val docxPath =
|
||||
// (outputPath / source.relativize(inputPath)).resolveSibling(source.fileName.nameWithoutExtension + ".docx")
|
||||
//
|
||||
// ProcessBuilder(
|
||||
// "pandoc",
|
||||
// "--from=markdown",
|
||||
// "--to=docx",
|
||||
// "--lua-filter=${scriptPath.absolute()}",
|
||||
// "--output=${docxPath.absolute()}",
|
||||
// "${source.absolute()}",
|
||||
//
|
||||
// ).also {
|
||||
// logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
||||
// }.inheritIO().start().waitFor()
|
||||
// }
|
||||
/**
|
||||
* Convert a directory of markdown files to docx files, ignoring other files
|
||||
*/
|
||||
@OptIn(ExperimentalPathApi::class)
|
||||
fun convertToDocX(inputPath: Path, outputPath: Path) {
|
||||
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
|
||||
if (source.isRegularFile() && source.extension == "md") {
|
||||
|
||||
val docxPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".docx")
|
||||
|
||||
ProcessBuilder(
|
||||
"pandoc",
|
||||
"--standalone",
|
||||
"--from=markdown",
|
||||
"--to=docx",
|
||||
"--output=${docxPath.absolute()}",
|
||||
"${source.absolute()}",
|
||||
).also {
|
||||
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
||||
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
|
||||
CopyActionResult.CONTINUE
|
||||
} else {
|
||||
source.copyToIgnoringExistingDirectory(target, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
package ru.mipt.npm.space.documentextractor
|
||||
package center.sciprog.space.documentextractor
|
||||
|
||||
import io.ktor.client.engine.cio.CIO
|
||||
import kotlinx.cli.ArgParser
|
||||
@ -16,39 +16,43 @@ import space.jetbrains.api.runtime.types.ProjectIdentifier
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.Path
|
||||
import kotlin.io.path.createDirectories
|
||||
|
||||
internal val urlRegex =
|
||||
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
|
||||
|
||||
suspend fun main(args: Array<String>) {
|
||||
val parser = ArgParser("space-document-extractor")
|
||||
|
||||
val spaceUrl by parser.option(
|
||||
val url by parser.option(
|
||||
ArgType.String,
|
||||
description = "Url of the space instance like 'https://mipt-npm.jetbrains.space'"
|
||||
).required()
|
||||
|
||||
val project by parser.option(
|
||||
ArgType.String,
|
||||
description = "The key of the exported project"
|
||||
description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'"
|
||||
).required()
|
||||
|
||||
val path: String? by parser.option(
|
||||
ArgType.String,
|
||||
description = "Target directory. Default is './output/project-key'."
|
||||
description = "Target directory. Default is './markdown/<id>'."
|
||||
)
|
||||
|
||||
val folderId: String? by parser.option(
|
||||
ArgType.String,
|
||||
description = "FolderId for the folder to export. By default uses project root."
|
||||
)
|
||||
|
||||
val convert by parser.option(
|
||||
val html by parser.option(
|
||||
ArgType.Boolean,
|
||||
description = "If defined, convert result to HTML and DOCX on download"
|
||||
description = "Convert Markdown to HTML via pandoc"
|
||||
).default(false)
|
||||
|
||||
val convertOutputPath by parser.option(
|
||||
val htmlPath by parser.option(
|
||||
ArgType.String,
|
||||
description = "Path for html and docx output directory sibling to 'output' directory"
|
||||
).default("converted")
|
||||
description = "Path for html output. Default is './html/<id>"
|
||||
)
|
||||
|
||||
val docx by parser.option(
|
||||
ArgType.Boolean,
|
||||
description = "Convert Markdown to DOCX via pandoc"
|
||||
).default(false)
|
||||
|
||||
val docxPath by parser.option(
|
||||
ArgType.String,
|
||||
description = "Path for docx output. Default is './docx/<id>"
|
||||
)
|
||||
|
||||
val clientId by parser.option(
|
||||
ArgType.String,
|
||||
@ -62,29 +66,49 @@ suspend fun main(args: Array<String>) {
|
||||
|
||||
parser.parse(args)
|
||||
|
||||
val target: Path = path?.let { Path(it) } ?: folderId?.let { Path("output") } ?: Path("output/$project")
|
||||
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||
|
||||
Files.createDirectories(target)
|
||||
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||
|
||||
val space: SpaceClient = SpaceClient(
|
||||
ktorClientForSpace(CIO),
|
||||
SpaceAppInstance(
|
||||
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||
|
||||
val folderId = urlMatch.groups["folderId"]?.value ?: error("Folder or document token not recognized")
|
||||
|
||||
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
|
||||
|
||||
Files.createDirectories(markdownPath)
|
||||
|
||||
val appInstance = SpaceAppInstance(
|
||||
clientId ?: System.getProperty("space.clientId"),
|
||||
clientSecret ?: System.getProperty("space.clientSecret"),
|
||||
spaceUrl
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
val spaceClient: SpaceClient = SpaceClient(
|
||||
ktorClientForSpace(CIO),
|
||||
appInstance,
|
||||
SpaceAuth.ClientCredentials()
|
||||
)
|
||||
|
||||
coroutineScope {
|
||||
println("Processing project \"${space.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
|
||||
space.downloadAndProcessDocumentsInProject(
|
||||
target,
|
||||
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
|
||||
spaceClient.downloadAndProcessDocumentsInProject(
|
||||
markdownPath,
|
||||
ProjectIdentifier.Key(project),
|
||||
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
|
||||
FolderIdentifier.Id(folderId)
|
||||
)
|
||||
if (convert) {
|
||||
convert(target, target.resolveSibling(convertOutputPath))
|
||||
if (html) {
|
||||
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
|
||||
?: Path(htmlPath ?: "html/$folderId")
|
||||
htmlTargetPath.createDirectories()
|
||||
convertToHtml(markdownPath, htmlTargetPath)
|
||||
}
|
||||
if (docx) {
|
||||
val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx")
|
||||
?: Path(docxPath ?: "docx/$folderId")
|
||||
docxTargetPath.createDirectories()
|
||||
convertToDocX(markdownPath, docxTargetPath)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package ru.mipt.npm.space.documentextractor
|
||||
package center.sciprog.space.documentextractor
|
||||
|
||||
import io.ktor.client.request.header
|
||||
import io.ktor.client.request.request
|
||||
@ -67,7 +67,7 @@ internal suspend fun SpaceClient.extractFile(
|
||||
private val regex = """!\[(?<alt>.*)]\(/d/(?<id>.*)\?f=0""".toRegex()
|
||||
|
||||
/**
|
||||
* Post-process a markdown document by downloading images and replacing links
|
||||
* Post-process a Markdown document by downloading images and replacing links
|
||||
*/
|
||||
internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope {
|
||||
val documentBody = path.readText()
|
||||
@ -87,7 +87,11 @@ internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutine
|
||||
}
|
||||
|
||||
/**
|
||||
* Download images for markdown documents in the directory
|
||||
* Download images for Markdown documents in the directory
|
||||
*
|
||||
* Images are always stored in the same directory as files themselves
|
||||
*
|
||||
* @param recursive turn recursive mode on or off
|
||||
*/
|
||||
internal suspend fun SpaceClient.processMarkdownInDirectory(
|
||||
path: Path,
|
||||
@ -162,6 +166,8 @@ internal suspend fun SpaceClient.downloadDocumentFolder(
|
||||
|
||||
/**
|
||||
* Download all documents in a project or a folder with given [rootFolder] and postprocess files
|
||||
*
|
||||
* @param directory target directory
|
||||
*/
|
||||
suspend fun SpaceClient.downloadAndProcessDocumentsInProject(
|
||||
directory: Path,
|
||||
|
76
src/main/kotlin/server.kt
Normal file
76
src/main/kotlin/server.kt
Normal file
@ -0,0 +1,76 @@
|
||||
package center.sciprog.space.documentextractor
|
||||
//
|
||||
//import io.ktor.http.HttpStatusCode
|
||||
//import io.ktor.server.application.Application
|
||||
//import io.ktor.server.application.call
|
||||
//import io.ktor.server.cio.CIO
|
||||
//import io.ktor.server.engine.embeddedServer
|
||||
//import io.ktor.server.request.*
|
||||
//import io.ktor.server.response.respond
|
||||
//import io.ktor.server.routing.*
|
||||
//import space.jetbrains.api.runtime.SpaceAppInstance
|
||||
//import space.jetbrains.api.runtime.SpaceAuth
|
||||
//import space.jetbrains.api.runtime.SpaceClient
|
||||
//import space.jetbrains.api.runtime.helpers.readPayload
|
||||
//import space.jetbrains.api.runtime.helpers.verifyWithPublicKey
|
||||
//import space.jetbrains.api.runtime.ktorClientForSpace
|
||||
//import space.jetbrains.api.runtime.types.ListCommandsPayload
|
||||
//import space.jetbrains.api.runtime.types.MessagePayload
|
||||
//
|
||||
//fun Application.configureRouting(spaceClient: SpaceClient) {
|
||||
// val appInstance = SpaceAppInstance(
|
||||
// environment.config.property("space.clientId"),
|
||||
//
|
||||
// clientSecret ?: System.getProperty("space.clientSecret"),
|
||||
// spaceUrl
|
||||
// )
|
||||
//
|
||||
//
|
||||
// val spaceClient: SpaceClient = SpaceClient(
|
||||
// ktorClientForSpace(io.ktor.client.engine.cio.CIO),
|
||||
// appInstance,
|
||||
// SpaceAuth.ClientCredentials()
|
||||
// )
|
||||
//
|
||||
// routing {
|
||||
// post("api/space") {
|
||||
// // read request body
|
||||
// val body = call.receiveText()
|
||||
//
|
||||
// // read headers required for Space verification
|
||||
// val signature = call.request.header("X-Space-Public-Key-Signature")
|
||||
// val timestamp = call.request.header("X-Space-Timestamp")?.toLongOrNull()
|
||||
// // verifyWithPublicKey gets a key from Space, uses it to generate message hash
|
||||
// // and compares the generated hash to the hash in a message
|
||||
// if (signature.isNullOrBlank() || timestamp == null || !spaceClient.verifyWithPublicKey(
|
||||
// body, timestamp, signature
|
||||
// )
|
||||
// ) {
|
||||
// call.respond(HttpStatusCode.Unauthorized)
|
||||
// return@post
|
||||
// }
|
||||
//
|
||||
// // analyze the message payload
|
||||
// // MessagePayload = user sends a command
|
||||
// // ListCommandsPayload = user types a slash or a char
|
||||
// when (val payload = readPayload(body)) {
|
||||
// is MessagePayload -> {
|
||||
// runHelpCommand(payload)
|
||||
// call.respond(HttpStatusCode.OK, "")
|
||||
// }
|
||||
//
|
||||
// is ListCommandsPayload -> {
|
||||
//
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//fun main() {
|
||||
// embeddedServer(CIO, port = 8080) {
|
||||
// val
|
||||
//
|
||||
// configureRouting()
|
||||
// }.start(wait = true)
|
||||
//}
|
@ -1,4 +1,3 @@
|
||||
# links-to-html.lua
|
||||
function Link(el)
|
||||
el.target = string.gsub(el.target, "%.md", ".html")
|
||||
return el
|
||||
|
Loading…
Reference in New Issue
Block a user