Add repositories export

This commit is contained in:
Alexander Nozik 2023-08-21 20:32:40 +03:00
parent 683f46435f
commit c2a5ace4c4
5 changed files with 269 additions and 84 deletions

View File

@ -15,15 +15,16 @@ repositories {
maven("https://maven.pkg.jetbrains.space/public/p/space/maven")
}
val ktorVersion = "2.3.1"
val ktorVersion = "2.3.3"
dependencies {
implementation("org.jetbrains:space-sdk-jvm:163093-beta")
implementation("org.jetbrains:space-sdk-jvm:167818-beta")
implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion")
implementation("io.ktor:ktor-server-core-jvm:$ktorVersion")
implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion")
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
implementation("ch.qos.logback:logback-classic:1.4.8")
implementation("org.eclipse.jgit:org.eclipse.jgit:6.6.0.202305301015-r")
testImplementation(kotlin("test"))
}

View File

@ -1,3 +1,3 @@
rootProject.name = "space-document-extractor"
rootProject.name = "space-export"

View File

@ -115,16 +115,23 @@ internal suspend fun SpaceClient.downloadDocument(
directory: Path,
document: Document,
) = coroutineScope {
when (val body = document.documentBody) {
is FileDocumentBody -> {
when (val body = document.body) {
is FileDocumentHttpBody -> {
launch(Dispatchers.IO) {
extractFile(directory, document.id, document.title)
}
}
is TextDocument -> {
is TextDocumentHttpBody -> {
val markdownFilePath = directory.resolve(document.title + ".md")
markdownFilePath.writeText(body.text, Charsets.UTF_8)
val content = body.docContent
if (content is MdTextDocumentContent) {
markdownFilePath.writeText(content.markdown, Charsets.UTF_8)
} else {
launch(Dispatchers.IO) {
extractFile(directory, document.id, document.title)
}
}
}
else -> {
@ -151,7 +158,8 @@ internal suspend fun SpaceClient.downloadDocumentFolder(
val document = projects.documents.getDocument(projectId, it.id) {
id()
title()
documentBody()
// documentBody()
body()
bodyType()
}
downloadDocument(directory, document)

View File

@ -0,0 +1,49 @@
package center.sciprog.space.documentextractor
import space.jetbrains.api.runtime.SpaceClient
import space.jetbrains.api.runtime.resources.projects
import space.jetbrains.api.runtime.types.ProjectIdentifier
import java.nio.file.Path
import kotlin.io.path.createDirectories
import kotlin.io.path.div
private fun cloneRepo(
parentDirectory: Path,
url: String,
) {
logger.info("Cloning $url to $parentDirectory")
ProcessBuilder("git", "clone", url)
.directory(parentDirectory.toAbsolutePath().toFile())
.inheritIO()
.start()
.waitFor()
// Git.cloneRepository()
// .setURI(url)
// .setDirectory(parentDirectory.resolve(name).toFile())
// .call()
}
suspend fun SpaceClient.extractRepos(
directory: Path,
projectId: ProjectIdentifier,
) {
val repos = projects.getProject(
project = projectId
) {
repos()
}.repos
repos.forEach { repo ->
try {
val url = projects.repositories.url(
project = projectId,
repository = repo.name
).sshUrl ?: error("Could not resolve sshUrl for ${repo.name}")
cloneRepo(parentDirectory = directory, url)
} catch (ex: Exception) {
logger.error("Failed ", ex)
}
}
}

View File

@ -1,11 +1,11 @@
@file:OptIn(ExperimentalCli::class)
package center.sciprog.space.documentextractor
import io.ktor.client.engine.cio.CIO
import kotlinx.cli.ArgParser
import kotlinx.cli.ArgType
import kotlinx.cli.default
import kotlinx.cli.required
import kotlinx.coroutines.coroutineScope
import kotlinx.cli.*
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import space.jetbrains.api.runtime.SpaceAppInstance
import space.jetbrains.api.runtime.SpaceAuth
import space.jetbrains.api.runtime.SpaceClient
@ -17,98 +17,225 @@ import java.nio.file.Files
import java.nio.file.Path
import kotlin.io.path.Path
import kotlin.io.path.createDirectories
import kotlin.io.path.div
internal val urlRegex =
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
private abstract class ExtractCommand(name: String, description: String) : Subcommand(name, description) {
suspend fun main(args: Array<String>) {
val parser = ArgParser("space-document-extractor")
val url by parser.option(
val url by argument(
ArgType.String,
description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'"
).required()
val path: String? by parser.option(
ArgType.String,
description = "Target directory. Default is './markdown/<id>'."
)
val html by parser.option(
ArgType.Boolean,
description = "Convert Markdown to HTML via pandoc"
).default(false)
val htmlPath by parser.option(
ArgType.String,
description = "Path for html output. Default is './html/<id>"
)
val docx by parser.option(
ArgType.Boolean,
description = "Convert Markdown to DOCX via pandoc"
).default(false)
val docxPath by parser.option(
ArgType.String,
description = "Path for docx output. Default is './docx/<id>"
)
val clientId by parser.option(
val clientId by option(
ArgType.String,
description = "Space application client ID (if not defined, use environment value 'space.clientId')"
)
val clientSecret by parser.option(
val clientSecret by option(
ArgType.String,
description = "Space application client secret (if not defined, use environment value 'space.clientSecret')"
)
}
parser.parse(args)
private class ExtractDocumentsCommand : ExtractCommand("docs", "Extract documents") {
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
val folderId = urlMatch.groups["folderId"]?.value ?: error("Folder or document token not recognized")
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
Files.createDirectories(markdownPath)
val appInstance = SpaceAppInstance(
clientId ?: System.getProperty("space.clientId"),
clientSecret ?: System.getProperty("space.clientSecret"),
spaceUrl
val path: String? by option(
ArgType.String,
description = "Target directory. Default is './documents/<id>'."
)
val html by option(
ArgType.Boolean,
description = "Convert Markdown to HTML via pandoc"
).default(false)
val spaceClient: SpaceClient = SpaceClient(
ktorClientForSpace(CIO),
appInstance,
SpaceAuth.ClientCredentials()
val htmlPath by option(
ArgType.String,
description = "Path for html output. Default is './documents/@html/<id>"
)
coroutineScope {
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
spaceClient.downloadAndProcessDocumentsInProject(
markdownPath,
ProjectIdentifier.Key(project),
FolderIdentifier.Id(folderId)
val docx by option(
ArgType.Boolean,
description = "Convert Markdown to DOCX via pandoc"
).default(false)
val docxPath by option(
ArgType.String,
description = "Path for docx output. Default is './documents/@docx/<id>"
)
val exportRepos by option(
ArgType.Boolean,
description = "Export all repositories in the project."
).default(false)
override fun execute() {
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
val folderId: String? = urlMatch.groups["folderId"]?.value
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
Files.createDirectories(markdownPath)
val appInstance = SpaceAppInstance(
clientId ?: System.getProperty("space.clientId"),
clientSecret ?: System.getProperty("space.clientSecret"),
spaceUrl
)
if (html) {
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
?: Path(htmlPath ?: "html/$folderId")
htmlTargetPath.createDirectories()
convertToHtml(markdownPath, htmlTargetPath)
}
if (docx) {
val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx")
?: Path(docxPath ?: "docx/$folderId")
docxTargetPath.createDirectories()
convertToDocX(markdownPath, docxTargetPath)
val spaceClient: SpaceClient = SpaceClient(
ktorClientForSpace(CIO),
appInstance,
SpaceAuth.ClientCredentials()
)
runBlocking {
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
spaceClient.downloadAndProcessDocumentsInProject(
markdownPath,
ProjectIdentifier.Key(project),
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
)
if (html) {
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
?: Path(htmlPath ?: "html/$folderId")
htmlTargetPath.createDirectories()
convertToHtml(markdownPath, htmlTargetPath)
}
if (docx) {
val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx")
?: Path(docxPath ?: "docx/$folderId")
docxTargetPath.createDirectories()
convertToDocX(markdownPath, docxTargetPath)
}
}
}
companion object {
private val urlRegex =
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
}
}
private class ExtractRepositoriesCommand : ExtractCommand("repos", "Extract repositories") {
val path: String by option(
ArgType.String,
description = "Target directory."
).default("./repositories")
override fun execute() {
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
val repoPath: Path = Path(path)
Files.createDirectories(repoPath)
val appInstance = SpaceAppInstance(
clientId ?: System.getProperty("space.clientId"),
clientSecret ?: System.getProperty("space.clientSecret"),
spaceUrl
)
val spaceClient: SpaceClient = SpaceClient(
ktorClientForSpace(CIO),
appInstance,
SpaceAuth.ClientCredentials()
)
runBlocking {
val key = ProjectIdentifier.Key(project)
logger.info("Extracting repositories from project \"${spaceClient.projects.getProject(key).name}\"")
spaceClient.extractRepos(
repoPath,
key,
)
}
}
companion object {
private val urlRegex =
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/?""".toRegex()
}
}
private class ExtractAllCommand : ExtractCommand("all", "Extract all data from a project") {
val path: String by option(
ArgType.String,
description = "Target directory."
).default(".")
override fun execute() {
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
val rootPath = Path(path) / "projects" / project
val documentsPath = rootPath / "documents"
Files.createDirectories(documentsPath)
val repoPath: Path = rootPath / "repositories"
Files.createDirectories(repoPath)
val appInstance = SpaceAppInstance(
clientId ?: System.getProperty("space.clientId"),
clientSecret ?: System.getProperty("space.clientSecret"),
spaceUrl
)
val spaceClient: SpaceClient = SpaceClient(
ktorClientForSpace(CIO),
appInstance,
SpaceAuth.ClientCredentials()
)
runBlocking {
val key = ProjectIdentifier.Key(project)
logger.info("Extracting everything from project \"${spaceClient.projects.getProject(key).name}\"")
launch {
spaceClient.extractRepos(
repoPath,
key,
)
}
launch {
spaceClient.downloadAndProcessDocumentsInProject(
documentsPath,
ProjectIdentifier.Key(project),
FolderIdentifier.Root
)
}
}
}
companion object {
private val urlRegex =
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/?""".toRegex()
}
}
fun main(args: Array<String>) {
val parser = ArgParser("space-export")
parser.subcommands(ExtractDocumentsCommand(), ExtractRepositoriesCommand(), ExtractAllCommand())
parser.parse(args)
}