Use full link instead of fragments
This commit is contained in:
parent
0d158269a0
commit
da6ba9ab01
@ -7,7 +7,7 @@ plugins {
|
|||||||
id("com.github.johnrengelman.shadow") version "8.1.1"
|
id("com.github.johnrengelman.shadow") version "8.1.1"
|
||||||
}
|
}
|
||||||
|
|
||||||
group = "ru.mipt.npm"
|
group = "center.sciprog"
|
||||||
version = "1.0-SNAPSHOT"
|
version = "1.0-SNAPSHOT"
|
||||||
|
|
||||||
repositories {
|
repositories {
|
||||||
@ -15,9 +15,13 @@ repositories {
|
|||||||
maven("https://maven.pkg.jetbrains.space/public/p/space/maven")
|
maven("https://maven.pkg.jetbrains.space/public/p/space/maven")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val ktorVersion = "2.3.1"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.jetbrains:space-sdk-jvm:159302-beta")
|
implementation("org.jetbrains:space-sdk-jvm:163093-beta")
|
||||||
implementation("io.ktor:ktor-client-cio-jvm:2.3.1")
|
implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion")
|
||||||
|
implementation("io.ktor:ktor-server-core-jvm:$ktorVersion")
|
||||||
|
implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion")
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
|
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
|
||||||
implementation("ch.qos.logback:logback-classic:1.4.8")
|
implementation("ch.qos.logback:logback-classic:1.4.8")
|
||||||
testImplementation(kotlin("test"))
|
testImplementation(kotlin("test"))
|
||||||
@ -32,7 +36,7 @@ kotlin {
|
|||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
mainClass.set("ru.mipt.npm.space.documentextractor.MainKt")
|
mainClass.set("center.sciprog.space.documentextractor.MainKt")
|
||||||
}
|
}
|
||||||
|
|
||||||
tasks.withType<ShadowJar>{
|
tasks.withType<ShadowJar>{
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
package ru.mipt.npm.space.documentextractor
|
package center.sciprog.space.documentextractor
|
||||||
|
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import java.nio.file.StandardOpenOption
|
import java.nio.file.StandardOpenOption
|
||||||
import kotlin.io.path.*
|
import kotlin.io.path.*
|
||||||
|
|
||||||
internal fun prepareScripts(inputPath: Path): Path {
|
internal fun prepareScripts(outputPath: Path): Path {
|
||||||
val scriptPath = inputPath.resolveSibling("scripts").resolve("links-to-html.lua")
|
val scriptPath = outputPath.resolveSibling("@scripts").resolve("links-to-html.lua")
|
||||||
if (!scriptPath.exists()) {
|
if (!scriptPath.exists()) {
|
||||||
scriptPath.parent.createDirectories()
|
scriptPath.parent.createDirectories()
|
||||||
scriptPath.writeText(
|
scriptPath.writeText(
|
||||||
@ -18,27 +18,20 @@ internal fun prepareScripts(inputPath: Path): Path {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a directory of markdown files to docx, copying other files as is.
|
||||||
|
*/
|
||||||
@OptIn(ExperimentalPathApi::class)
|
@OptIn(ExperimentalPathApi::class)
|
||||||
fun convert(inputPath: Path, outputPath: Path) {
|
fun convertToHtml(inputPath: Path, outputPath: Path, indexFileName: String = "index") {
|
||||||
val scriptPath = prepareScripts(inputPath)
|
val scriptPath = prepareScripts(outputPath)
|
||||||
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
|
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
|
||||||
if (source.isRegularFile() && source.extension == "md") {
|
if (source.isRegularFile() && source.extension == "md") {
|
||||||
|
|
||||||
val docxPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".docx")
|
val htmlFileName = source.fileName.nameWithoutExtension.let {
|
||||||
|
if (it == indexFileName) "index" else it
|
||||||
|
}
|
||||||
|
|
||||||
ProcessBuilder(
|
val htmlPath = target.parent.resolve("$htmlFileName.html")
|
||||||
"pandoc",
|
|
||||||
"--from=markdown",
|
|
||||||
"--to=docx",
|
|
||||||
"--lua-filter=${scriptPath.absolute()}",
|
|
||||||
"--output=${docxPath.absolute()}",
|
|
||||||
"${source.absolute()}",
|
|
||||||
|
|
||||||
).also {
|
|
||||||
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
|
||||||
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
|
|
||||||
|
|
||||||
val htmlPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".html")
|
|
||||||
|
|
||||||
ProcessBuilder(
|
ProcessBuilder(
|
||||||
"pandoc",
|
"pandoc",
|
||||||
@ -50,10 +43,9 @@ fun convert(inputPath: Path, outputPath: Path) {
|
|||||||
"--lua-filter=${scriptPath.absolute()}",
|
"--lua-filter=${scriptPath.absolute()}",
|
||||||
"--output=${htmlPath.absolute()}",
|
"--output=${htmlPath.absolute()}",
|
||||||
"${source.absolute()}",
|
"${source.absolute()}",
|
||||||
|
|
||||||
).also {
|
).also {
|
||||||
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
||||||
}.inheritIO().start().waitFor()
|
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
|
||||||
|
|
||||||
|
|
||||||
CopyActionResult.CONTINUE
|
CopyActionResult.CONTINUE
|
||||||
@ -61,24 +53,33 @@ fun convert(inputPath: Path, outputPath: Path) {
|
|||||||
source.copyToIgnoringExistingDirectory(target, false)
|
source.copyToIgnoringExistingDirectory(target, false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// inputPath.walk().filter { it.extension == "md" }.forEach { source ->
|
/**
|
||||||
//
|
* Convert a directory of markdown files to docx files, ignoring other files
|
||||||
// val docxPath =
|
*/
|
||||||
// (outputPath / source.relativize(inputPath)).resolveSibling(source.fileName.nameWithoutExtension + ".docx")
|
@OptIn(ExperimentalPathApi::class)
|
||||||
//
|
fun convertToDocX(inputPath: Path, outputPath: Path) {
|
||||||
// ProcessBuilder(
|
inputPath.copyToRecursively(outputPath, followLinks = false) { source: Path, target: Path ->
|
||||||
// "pandoc",
|
if (source.isRegularFile() && source.extension == "md") {
|
||||||
// "--from=markdown",
|
|
||||||
// "--to=docx",
|
val docxPath = target.parent.resolve(source.fileName.nameWithoutExtension + ".docx")
|
||||||
// "--lua-filter=${scriptPath.absolute()}",
|
|
||||||
// "--output=${docxPath.absolute()}",
|
ProcessBuilder(
|
||||||
// "${source.absolute()}",
|
"pandoc",
|
||||||
//
|
"--standalone",
|
||||||
// ).also {
|
"--from=markdown",
|
||||||
// logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
"--to=docx",
|
||||||
// }.inheritIO().start().waitFor()
|
"--output=${docxPath.absolute()}",
|
||||||
// }
|
"${source.absolute()}",
|
||||||
|
).also {
|
||||||
|
logger.info("Running pandoc: ${it.command().joinToString(separator = " ")}")
|
||||||
|
}.directory(source.parent.toFile()).inheritIO().start().waitFor()
|
||||||
|
CopyActionResult.CONTINUE
|
||||||
|
} else {
|
||||||
|
source.copyToIgnoringExistingDirectory(target, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package ru.mipt.npm.space.documentextractor
|
package center.sciprog.space.documentextractor
|
||||||
|
|
||||||
import io.ktor.client.engine.cio.CIO
|
import io.ktor.client.engine.cio.CIO
|
||||||
import kotlinx.cli.ArgParser
|
import kotlinx.cli.ArgParser
|
||||||
@ -16,39 +16,43 @@ import space.jetbrains.api.runtime.types.ProjectIdentifier
|
|||||||
import java.nio.file.Files
|
import java.nio.file.Files
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import kotlin.io.path.Path
|
import kotlin.io.path.Path
|
||||||
|
import kotlin.io.path.createDirectories
|
||||||
|
|
||||||
|
internal val urlRegex =
|
||||||
|
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
|
||||||
|
|
||||||
suspend fun main(args: Array<String>) {
|
suspend fun main(args: Array<String>) {
|
||||||
val parser = ArgParser("space-document-extractor")
|
val parser = ArgParser("space-document-extractor")
|
||||||
|
|
||||||
val spaceUrl by parser.option(
|
val url by parser.option(
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
description = "Url of the space instance like 'https://mipt-npm.jetbrains.space'"
|
description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'"
|
||||||
).required()
|
|
||||||
|
|
||||||
val project by parser.option(
|
|
||||||
ArgType.String,
|
|
||||||
description = "The key of the exported project"
|
|
||||||
).required()
|
).required()
|
||||||
|
|
||||||
val path: String? by parser.option(
|
val path: String? by parser.option(
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
description = "Target directory. Default is './output/project-key'."
|
description = "Target directory. Default is './markdown/<id>'."
|
||||||
)
|
)
|
||||||
|
|
||||||
val folderId: String? by parser.option(
|
val html by parser.option(
|
||||||
ArgType.String,
|
|
||||||
description = "FolderId for the folder to export. By default uses project root."
|
|
||||||
)
|
|
||||||
|
|
||||||
val convert by parser.option(
|
|
||||||
ArgType.Boolean,
|
ArgType.Boolean,
|
||||||
description = "If defined, convert result to HTML and DOCX on download"
|
description = "Convert Markdown to HTML via pandoc"
|
||||||
).default(false)
|
).default(false)
|
||||||
|
|
||||||
val convertOutputPath by parser.option(
|
val htmlPath by parser.option(
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
description = "Path for html and docx output directory sibling to 'output' directory"
|
description = "Path for html output. Default is './html/<id>"
|
||||||
).default("converted")
|
)
|
||||||
|
|
||||||
|
val docx by parser.option(
|
||||||
|
ArgType.Boolean,
|
||||||
|
description = "Convert Markdown to DOCX via pandoc"
|
||||||
|
).default(false)
|
||||||
|
|
||||||
|
val docxPath by parser.option(
|
||||||
|
ArgType.String,
|
||||||
|
description = "Path for docx output. Default is './docx/<id>"
|
||||||
|
)
|
||||||
|
|
||||||
val clientId by parser.option(
|
val clientId by parser.option(
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
@ -62,29 +66,49 @@ suspend fun main(args: Array<String>) {
|
|||||||
|
|
||||||
parser.parse(args)
|
parser.parse(args)
|
||||||
|
|
||||||
val target: Path = path?.let { Path(it) } ?: folderId?.let { Path("output") } ?: Path("output/$project")
|
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||||
|
|
||||||
Files.createDirectories(target)
|
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||||
|
|
||||||
val space: SpaceClient = SpaceClient(
|
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||||
ktorClientForSpace(CIO),
|
|
||||||
SpaceAppInstance(
|
val folderId = urlMatch.groups["folderId"]?.value ?: error("Folder or document token not recognized")
|
||||||
|
|
||||||
|
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
|
||||||
|
|
||||||
|
Files.createDirectories(markdownPath)
|
||||||
|
|
||||||
|
val appInstance = SpaceAppInstance(
|
||||||
clientId ?: System.getProperty("space.clientId"),
|
clientId ?: System.getProperty("space.clientId"),
|
||||||
clientSecret ?: System.getProperty("space.clientSecret"),
|
clientSecret ?: System.getProperty("space.clientSecret"),
|
||||||
spaceUrl
|
spaceUrl
|
||||||
),
|
)
|
||||||
|
|
||||||
|
|
||||||
|
val spaceClient: SpaceClient = SpaceClient(
|
||||||
|
ktorClientForSpace(CIO),
|
||||||
|
appInstance,
|
||||||
SpaceAuth.ClientCredentials()
|
SpaceAuth.ClientCredentials()
|
||||||
)
|
)
|
||||||
|
|
||||||
coroutineScope {
|
coroutineScope {
|
||||||
println("Processing project \"${space.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
|
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
|
||||||
space.downloadAndProcessDocumentsInProject(
|
spaceClient.downloadAndProcessDocumentsInProject(
|
||||||
target,
|
markdownPath,
|
||||||
ProjectIdentifier.Key(project),
|
ProjectIdentifier.Key(project),
|
||||||
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
|
FolderIdentifier.Id(folderId)
|
||||||
)
|
)
|
||||||
if (convert) {
|
if (html) {
|
||||||
convert(target, target.resolveSibling(convertOutputPath))
|
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
|
||||||
|
?: Path(htmlPath ?: "html/$folderId")
|
||||||
|
htmlTargetPath.createDirectories()
|
||||||
|
convertToHtml(markdownPath, htmlTargetPath)
|
||||||
|
}
|
||||||
|
if (docx) {
|
||||||
|
val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx")
|
||||||
|
?: Path(docxPath ?: "docx/$folderId")
|
||||||
|
docxTargetPath.createDirectories()
|
||||||
|
convertToDocX(markdownPath, docxTargetPath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,4 +1,4 @@
|
|||||||
package ru.mipt.npm.space.documentextractor
|
package center.sciprog.space.documentextractor
|
||||||
|
|
||||||
import io.ktor.client.request.header
|
import io.ktor.client.request.header
|
||||||
import io.ktor.client.request.request
|
import io.ktor.client.request.request
|
||||||
@ -67,7 +67,7 @@ internal suspend fun SpaceClient.extractFile(
|
|||||||
private val regex = """!\[(?<alt>.*)]\(/d/(?<id>.*)\?f=0""".toRegex()
|
private val regex = """!\[(?<alt>.*)]\(/d/(?<id>.*)\?f=0""".toRegex()
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Post-process a markdown document by downloading images and replacing links
|
* Post-process a Markdown document by downloading images and replacing links
|
||||||
*/
|
*/
|
||||||
internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope {
|
internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope {
|
||||||
val documentBody = path.readText()
|
val documentBody = path.readText()
|
||||||
@ -87,7 +87,11 @@ internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutine
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download images for markdown documents in the directory
|
* Download images for Markdown documents in the directory
|
||||||
|
*
|
||||||
|
* Images are always stored in the same directory as files themselves
|
||||||
|
*
|
||||||
|
* @param recursive turn recursive mode on or off
|
||||||
*/
|
*/
|
||||||
internal suspend fun SpaceClient.processMarkdownInDirectory(
|
internal suspend fun SpaceClient.processMarkdownInDirectory(
|
||||||
path: Path,
|
path: Path,
|
||||||
@ -162,6 +166,8 @@ internal suspend fun SpaceClient.downloadDocumentFolder(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Download all documents in a project or a folder with given [rootFolder] and postprocess files
|
* Download all documents in a project or a folder with given [rootFolder] and postprocess files
|
||||||
|
*
|
||||||
|
* @param directory target directory
|
||||||
*/
|
*/
|
||||||
suspend fun SpaceClient.downloadAndProcessDocumentsInProject(
|
suspend fun SpaceClient.downloadAndProcessDocumentsInProject(
|
||||||
directory: Path,
|
directory: Path,
|
||||||
|
76
src/main/kotlin/server.kt
Normal file
76
src/main/kotlin/server.kt
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
package center.sciprog.space.documentextractor
|
||||||
|
//
|
||||||
|
//import io.ktor.http.HttpStatusCode
|
||||||
|
//import io.ktor.server.application.Application
|
||||||
|
//import io.ktor.server.application.call
|
||||||
|
//import io.ktor.server.cio.CIO
|
||||||
|
//import io.ktor.server.engine.embeddedServer
|
||||||
|
//import io.ktor.server.request.*
|
||||||
|
//import io.ktor.server.response.respond
|
||||||
|
//import io.ktor.server.routing.*
|
||||||
|
//import space.jetbrains.api.runtime.SpaceAppInstance
|
||||||
|
//import space.jetbrains.api.runtime.SpaceAuth
|
||||||
|
//import space.jetbrains.api.runtime.SpaceClient
|
||||||
|
//import space.jetbrains.api.runtime.helpers.readPayload
|
||||||
|
//import space.jetbrains.api.runtime.helpers.verifyWithPublicKey
|
||||||
|
//import space.jetbrains.api.runtime.ktorClientForSpace
|
||||||
|
//import space.jetbrains.api.runtime.types.ListCommandsPayload
|
||||||
|
//import space.jetbrains.api.runtime.types.MessagePayload
|
||||||
|
//
|
||||||
|
//fun Application.configureRouting(spaceClient: SpaceClient) {
|
||||||
|
// val appInstance = SpaceAppInstance(
|
||||||
|
// environment.config.property("space.clientId"),
|
||||||
|
//
|
||||||
|
// clientSecret ?: System.getProperty("space.clientSecret"),
|
||||||
|
// spaceUrl
|
||||||
|
// )
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// val spaceClient: SpaceClient = SpaceClient(
|
||||||
|
// ktorClientForSpace(io.ktor.client.engine.cio.CIO),
|
||||||
|
// appInstance,
|
||||||
|
// SpaceAuth.ClientCredentials()
|
||||||
|
// )
|
||||||
|
//
|
||||||
|
// routing {
|
||||||
|
// post("api/space") {
|
||||||
|
// // read request body
|
||||||
|
// val body = call.receiveText()
|
||||||
|
//
|
||||||
|
// // read headers required for Space verification
|
||||||
|
// val signature = call.request.header("X-Space-Public-Key-Signature")
|
||||||
|
// val timestamp = call.request.header("X-Space-Timestamp")?.toLongOrNull()
|
||||||
|
// // verifyWithPublicKey gets a key from Space, uses it to generate message hash
|
||||||
|
// // and compares the generated hash to the hash in a message
|
||||||
|
// if (signature.isNullOrBlank() || timestamp == null || !spaceClient.verifyWithPublicKey(
|
||||||
|
// body, timestamp, signature
|
||||||
|
// )
|
||||||
|
// ) {
|
||||||
|
// call.respond(HttpStatusCode.Unauthorized)
|
||||||
|
// return@post
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // analyze the message payload
|
||||||
|
// // MessagePayload = user sends a command
|
||||||
|
// // ListCommandsPayload = user types a slash or a char
|
||||||
|
// when (val payload = readPayload(body)) {
|
||||||
|
// is MessagePayload -> {
|
||||||
|
// runHelpCommand(payload)
|
||||||
|
// call.respond(HttpStatusCode.OK, "")
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// is ListCommandsPayload -> {
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
//
|
||||||
|
//fun main() {
|
||||||
|
// embeddedServer(CIO, port = 8080) {
|
||||||
|
// val
|
||||||
|
//
|
||||||
|
// configureRouting()
|
||||||
|
// }.start(wait = true)
|
||||||
|
//}
|
@ -1,4 +1,3 @@
|
|||||||
# links-to-html.lua
|
|
||||||
function Link(el)
|
function Link(el)
|
||||||
el.target = string.gsub(el.target, "%.md", ".html")
|
el.target = string.gsub(el.target, "%.md", ".html")
|
||||||
return el
|
return el
|
||||||
|
Loading…
Reference in New Issue
Block a user