Add repositories export
This commit is contained in:
parent
683f46435f
commit
c2a5ace4c4
@ -15,15 +15,16 @@ repositories {
|
||||
maven("https://maven.pkg.jetbrains.space/public/p/space/maven")
|
||||
}
|
||||
|
||||
val ktorVersion = "2.3.1"
|
||||
val ktorVersion = "2.3.3"
|
||||
|
||||
dependencies {
|
||||
implementation("org.jetbrains:space-sdk-jvm:163093-beta")
|
||||
implementation("org.jetbrains:space-sdk-jvm:167818-beta")
|
||||
implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion")
|
||||
implementation("io.ktor:ktor-server-core-jvm:$ktorVersion")
|
||||
implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion")
|
||||
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
|
||||
implementation("ch.qos.logback:logback-classic:1.4.8")
|
||||
implementation("org.eclipse.jgit:org.eclipse.jgit:6.6.0.202305301015-r")
|
||||
testImplementation(kotlin("test"))
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
|
||||
rootProject.name = "space-document-extractor"
|
||||
rootProject.name = "space-export"
|
||||
|
||||
|
@ -115,16 +115,23 @@ internal suspend fun SpaceClient.downloadDocument(
|
||||
directory: Path,
|
||||
document: Document,
|
||||
) = coroutineScope {
|
||||
when (val body = document.documentBody) {
|
||||
is FileDocumentBody -> {
|
||||
when (val body = document.body) {
|
||||
is FileDocumentHttpBody -> {
|
||||
launch(Dispatchers.IO) {
|
||||
extractFile(directory, document.id, document.title)
|
||||
}
|
||||
}
|
||||
|
||||
is TextDocument -> {
|
||||
is TextDocumentHttpBody -> {
|
||||
val markdownFilePath = directory.resolve(document.title + ".md")
|
||||
markdownFilePath.writeText(body.text, Charsets.UTF_8)
|
||||
val content = body.docContent
|
||||
if (content is MdTextDocumentContent) {
|
||||
markdownFilePath.writeText(content.markdown, Charsets.UTF_8)
|
||||
} else {
|
||||
launch(Dispatchers.IO) {
|
||||
extractFile(directory, document.id, document.title)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else -> {
|
||||
@ -151,7 +158,8 @@ internal suspend fun SpaceClient.downloadDocumentFolder(
|
||||
val document = projects.documents.getDocument(projectId, it.id) {
|
||||
id()
|
||||
title()
|
||||
documentBody()
|
||||
// documentBody()
|
||||
body()
|
||||
bodyType()
|
||||
}
|
||||
downloadDocument(directory, document)
|
49
src/main/kotlin/extractRepos.kt
Normal file
49
src/main/kotlin/extractRepos.kt
Normal file
@ -0,0 +1,49 @@
|
||||
package center.sciprog.space.documentextractor
|
||||
|
||||
import space.jetbrains.api.runtime.SpaceClient
|
||||
import space.jetbrains.api.runtime.resources.projects
|
||||
import space.jetbrains.api.runtime.types.ProjectIdentifier
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.createDirectories
|
||||
import kotlin.io.path.div
|
||||
|
||||
private fun cloneRepo(
|
||||
parentDirectory: Path,
|
||||
url: String,
|
||||
) {
|
||||
logger.info("Cloning $url to $parentDirectory")
|
||||
|
||||
ProcessBuilder("git", "clone", url)
|
||||
.directory(parentDirectory.toAbsolutePath().toFile())
|
||||
.inheritIO()
|
||||
.start()
|
||||
.waitFor()
|
||||
|
||||
// Git.cloneRepository()
|
||||
// .setURI(url)
|
||||
// .setDirectory(parentDirectory.resolve(name).toFile())
|
||||
// .call()
|
||||
}
|
||||
|
||||
suspend fun SpaceClient.extractRepos(
|
||||
directory: Path,
|
||||
projectId: ProjectIdentifier,
|
||||
) {
|
||||
val repos = projects.getProject(
|
||||
project = projectId
|
||||
) {
|
||||
repos()
|
||||
}.repos
|
||||
|
||||
repos.forEach { repo ->
|
||||
try {
|
||||
val url = projects.repositories.url(
|
||||
project = projectId,
|
||||
repository = repo.name
|
||||
).sshUrl ?: error("Could not resolve sshUrl for ${repo.name}")
|
||||
cloneRepo(parentDirectory = directory, url)
|
||||
} catch (ex: Exception) {
|
||||
logger.error("Failed ", ex)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,11 +1,11 @@
|
||||
@file:OptIn(ExperimentalCli::class)
|
||||
|
||||
package center.sciprog.space.documentextractor
|
||||
|
||||
import io.ktor.client.engine.cio.CIO
|
||||
import kotlinx.cli.ArgParser
|
||||
import kotlinx.cli.ArgType
|
||||
import kotlinx.cli.default
|
||||
import kotlinx.cli.required
|
||||
import kotlinx.coroutines.coroutineScope
|
||||
import kotlinx.cli.*
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import space.jetbrains.api.runtime.SpaceAppInstance
|
||||
import space.jetbrains.api.runtime.SpaceAuth
|
||||
import space.jetbrains.api.runtime.SpaceClient
|
||||
@ -17,98 +17,225 @@ import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.Path
|
||||
import kotlin.io.path.createDirectories
|
||||
import kotlin.io.path.div
|
||||
|
||||
internal val urlRegex =
|
||||
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
|
||||
private abstract class ExtractCommand(name: String, description: String) : Subcommand(name, description) {
|
||||
|
||||
suspend fun main(args: Array<String>) {
|
||||
val parser = ArgParser("space-document-extractor")
|
||||
|
||||
val url by parser.option(
|
||||
val url by argument(
|
||||
ArgType.String,
|
||||
description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'"
|
||||
).required()
|
||||
|
||||
val path: String? by parser.option(
|
||||
ArgType.String,
|
||||
description = "Target directory. Default is './markdown/<id>'."
|
||||
)
|
||||
|
||||
val html by parser.option(
|
||||
ArgType.Boolean,
|
||||
description = "Convert Markdown to HTML via pandoc"
|
||||
).default(false)
|
||||
|
||||
val htmlPath by parser.option(
|
||||
ArgType.String,
|
||||
description = "Path for html output. Default is './html/<id>"
|
||||
)
|
||||
|
||||
val docx by parser.option(
|
||||
ArgType.Boolean,
|
||||
description = "Convert Markdown to DOCX via pandoc"
|
||||
).default(false)
|
||||
|
||||
val docxPath by parser.option(
|
||||
ArgType.String,
|
||||
description = "Path for docx output. Default is './docx/<id>"
|
||||
)
|
||||
|
||||
val clientId by parser.option(
|
||||
val clientId by option(
|
||||
ArgType.String,
|
||||
description = "Space application client ID (if not defined, use environment value 'space.clientId')"
|
||||
)
|
||||
|
||||
val clientSecret by parser.option(
|
||||
val clientSecret by option(
|
||||
ArgType.String,
|
||||
description = "Space application client secret (if not defined, use environment value 'space.clientSecret')"
|
||||
)
|
||||
}
|
||||
|
||||
parser.parse(args)
|
||||
private class ExtractDocumentsCommand : ExtractCommand("docs", "Extract documents") {
|
||||
|
||||
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||
|
||||
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||
|
||||
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||
|
||||
val folderId = urlMatch.groups["folderId"]?.value ?: error("Folder or document token not recognized")
|
||||
|
||||
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
|
||||
|
||||
Files.createDirectories(markdownPath)
|
||||
|
||||
val appInstance = SpaceAppInstance(
|
||||
clientId ?: System.getProperty("space.clientId"),
|
||||
clientSecret ?: System.getProperty("space.clientSecret"),
|
||||
spaceUrl
|
||||
val path: String? by option(
|
||||
ArgType.String,
|
||||
description = "Target directory. Default is './documents/<id>'."
|
||||
)
|
||||
|
||||
val html by option(
|
||||
ArgType.Boolean,
|
||||
description = "Convert Markdown to HTML via pandoc"
|
||||
).default(false)
|
||||
|
||||
val spaceClient: SpaceClient = SpaceClient(
|
||||
ktorClientForSpace(CIO),
|
||||
appInstance,
|
||||
SpaceAuth.ClientCredentials()
|
||||
val htmlPath by option(
|
||||
ArgType.String,
|
||||
description = "Path for html output. Default is './documents/@html/<id>"
|
||||
)
|
||||
|
||||
coroutineScope {
|
||||
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
|
||||
spaceClient.downloadAndProcessDocumentsInProject(
|
||||
markdownPath,
|
||||
ProjectIdentifier.Key(project),
|
||||
FolderIdentifier.Id(folderId)
|
||||
val docx by option(
|
||||
ArgType.Boolean,
|
||||
description = "Convert Markdown to DOCX via pandoc"
|
||||
).default(false)
|
||||
|
||||
val docxPath by option(
|
||||
ArgType.String,
|
||||
description = "Path for docx output. Default is './documents/@docx/<id>"
|
||||
)
|
||||
|
||||
val exportRepos by option(
|
||||
ArgType.Boolean,
|
||||
description = "Export all repositories in the project."
|
||||
).default(false)
|
||||
|
||||
|
||||
override fun execute() {
|
||||
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||
|
||||
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||
|
||||
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||
|
||||
val folderId: String? = urlMatch.groups["folderId"]?.value
|
||||
|
||||
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
|
||||
|
||||
Files.createDirectories(markdownPath)
|
||||
|
||||
val appInstance = SpaceAppInstance(
|
||||
clientId ?: System.getProperty("space.clientId"),
|
||||
clientSecret ?: System.getProperty("space.clientSecret"),
|
||||
spaceUrl
|
||||
)
|
||||
if (html) {
|
||||
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
|
||||
?: Path(htmlPath ?: "html/$folderId")
|
||||
htmlTargetPath.createDirectories()
|
||||
convertToHtml(markdownPath, htmlTargetPath)
|
||||
}
|
||||
if (docx) {
|
||||
val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx")
|
||||
?: Path(docxPath ?: "docx/$folderId")
|
||||
docxTargetPath.createDirectories()
|
||||
convertToDocX(markdownPath, docxTargetPath)
|
||||
|
||||
|
||||
val spaceClient: SpaceClient = SpaceClient(
|
||||
ktorClientForSpace(CIO),
|
||||
appInstance,
|
||||
SpaceAuth.ClientCredentials()
|
||||
)
|
||||
runBlocking {
|
||||
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
|
||||
spaceClient.downloadAndProcessDocumentsInProject(
|
||||
markdownPath,
|
||||
ProjectIdentifier.Key(project),
|
||||
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
|
||||
)
|
||||
if (html) {
|
||||
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
|
||||
?: Path(htmlPath ?: "html/$folderId")
|
||||
htmlTargetPath.createDirectories()
|
||||
convertToHtml(markdownPath, htmlTargetPath)
|
||||
}
|
||||
if (docx) {
|
||||
val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx")
|
||||
?: Path(docxPath ?: "docx/$folderId")
|
||||
docxTargetPath.createDirectories()
|
||||
convertToDocX(markdownPath, docxTargetPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
companion object {
|
||||
private val urlRegex =
|
||||
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
|
||||
}
|
||||
}
|
||||
|
||||
private class ExtractRepositoriesCommand : ExtractCommand("repos", "Extract repositories") {
|
||||
|
||||
val path: String by option(
|
||||
ArgType.String,
|
||||
description = "Target directory."
|
||||
).default("./repositories")
|
||||
|
||||
override fun execute() {
|
||||
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||
|
||||
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||
|
||||
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||
|
||||
val repoPath: Path = Path(path)
|
||||
|
||||
Files.createDirectories(repoPath)
|
||||
|
||||
val appInstance = SpaceAppInstance(
|
||||
clientId ?: System.getProperty("space.clientId"),
|
||||
clientSecret ?: System.getProperty("space.clientSecret"),
|
||||
spaceUrl
|
||||
)
|
||||
|
||||
val spaceClient: SpaceClient = SpaceClient(
|
||||
ktorClientForSpace(CIO),
|
||||
appInstance,
|
||||
SpaceAuth.ClientCredentials()
|
||||
)
|
||||
|
||||
runBlocking {
|
||||
val key = ProjectIdentifier.Key(project)
|
||||
logger.info("Extracting repositories from project \"${spaceClient.projects.getProject(key).name}\"")
|
||||
spaceClient.extractRepos(
|
||||
repoPath,
|
||||
key,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
companion object {
|
||||
private val urlRegex =
|
||||
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/?""".toRegex()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class ExtractAllCommand : ExtractCommand("all", "Extract all data from a project") {
|
||||
|
||||
val path: String by option(
|
||||
ArgType.String,
|
||||
description = "Target directory."
|
||||
).default(".")
|
||||
|
||||
override fun execute() {
|
||||
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||
|
||||
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||
|
||||
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||
|
||||
val rootPath = Path(path) / "projects" / project
|
||||
|
||||
val documentsPath = rootPath / "documents"
|
||||
Files.createDirectories(documentsPath)
|
||||
|
||||
val repoPath: Path = rootPath / "repositories"
|
||||
|
||||
Files.createDirectories(repoPath)
|
||||
|
||||
val appInstance = SpaceAppInstance(
|
||||
clientId ?: System.getProperty("space.clientId"),
|
||||
clientSecret ?: System.getProperty("space.clientSecret"),
|
||||
spaceUrl
|
||||
)
|
||||
|
||||
val spaceClient: SpaceClient = SpaceClient(
|
||||
ktorClientForSpace(CIO),
|
||||
appInstance,
|
||||
SpaceAuth.ClientCredentials()
|
||||
)
|
||||
|
||||
runBlocking {
|
||||
val key = ProjectIdentifier.Key(project)
|
||||
logger.info("Extracting everything from project \"${spaceClient.projects.getProject(key).name}\"")
|
||||
launch {
|
||||
spaceClient.extractRepos(
|
||||
repoPath,
|
||||
key,
|
||||
)
|
||||
}
|
||||
launch {
|
||||
spaceClient.downloadAndProcessDocumentsInProject(
|
||||
documentsPath,
|
||||
ProjectIdentifier.Key(project),
|
||||
FolderIdentifier.Root
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
companion object {
|
||||
private val urlRegex =
|
||||
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/?""".toRegex()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
fun main(args: Array<String>) {
|
||||
val parser = ArgParser("space-export")
|
||||
|
||||
parser.subcommands(ExtractDocumentsCommand(), ExtractRepositoriesCommand(), ExtractAllCommand())
|
||||
|
||||
parser.parse(args)
|
||||
}
|
Loading…
Reference in New Issue
Block a user