diff --git a/build.gradle.kts b/build.gradle.kts index 459451a..3a2ad75 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -15,15 +15,16 @@ repositories { maven("https://maven.pkg.jetbrains.space/public/p/space/maven") } -val ktorVersion = "2.3.1" +val ktorVersion = "2.3.3" dependencies { - implementation("org.jetbrains:space-sdk-jvm:163093-beta") + implementation("org.jetbrains:space-sdk-jvm:167818-beta") implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion") implementation("io.ktor:ktor-server-core-jvm:$ktorVersion") implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion") implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5") implementation("ch.qos.logback:logback-classic:1.4.8") + implementation("org.eclipse.jgit:org.eclipse.jgit:6.6.0.202305301015-r") testImplementation(kotlin("test")) } diff --git a/settings.gradle.kts b/settings.gradle.kts index d10b0f9..1e2d6cf 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -1,3 +1,3 @@ -rootProject.name = "space-document-extractor" +rootProject.name = "space-export" diff --git a/src/main/kotlin/process.kt b/src/main/kotlin/extractDocuments.kt similarity index 91% rename from src/main/kotlin/process.kt rename to src/main/kotlin/extractDocuments.kt index 5ae6f67..0c16530 100644 --- a/src/main/kotlin/process.kt +++ b/src/main/kotlin/extractDocuments.kt @@ -115,16 +115,23 @@ internal suspend fun SpaceClient.downloadDocument( directory: Path, document: Document, ) = coroutineScope { - when (val body = document.documentBody) { - is FileDocumentBody -> { + when (val body = document.body) { + is FileDocumentHttpBody -> { launch(Dispatchers.IO) { extractFile(directory, document.id, document.title) } } - is TextDocument -> { + is TextDocumentHttpBody -> { val markdownFilePath = directory.resolve(document.title + ".md") - markdownFilePath.writeText(body.text, Charsets.UTF_8) + val content = body.docContent + if (content is MdTextDocumentContent) { + markdownFilePath.writeText(content.markdown, Charsets.UTF_8) + } else { + launch(Dispatchers.IO) { + extractFile(directory, document.id, document.title) + } + } } else -> { @@ -151,7 +158,8 @@ internal suspend fun SpaceClient.downloadDocumentFolder( val document = projects.documents.getDocument(projectId, it.id) { id() title() - documentBody() +// documentBody() + body() bodyType() } downloadDocument(directory, document) diff --git a/src/main/kotlin/extractRepos.kt b/src/main/kotlin/extractRepos.kt new file mode 100644 index 0000000..f114d2b --- /dev/null +++ b/src/main/kotlin/extractRepos.kt @@ -0,0 +1,49 @@ +package center.sciprog.space.documentextractor + +import space.jetbrains.api.runtime.SpaceClient +import space.jetbrains.api.runtime.resources.projects +import space.jetbrains.api.runtime.types.ProjectIdentifier +import java.nio.file.Path +import kotlin.io.path.createDirectories +import kotlin.io.path.div + +private fun cloneRepo( + parentDirectory: Path, + url: String, +) { + logger.info("Cloning $url to $parentDirectory") + + ProcessBuilder("git", "clone", url) + .directory(parentDirectory.toAbsolutePath().toFile()) + .inheritIO() + .start() + .waitFor() + +// Git.cloneRepository() +// .setURI(url) +// .setDirectory(parentDirectory.resolve(name).toFile()) +// .call() +} + +suspend fun SpaceClient.extractRepos( + directory: Path, + projectId: ProjectIdentifier, +) { + val repos = projects.getProject( + project = projectId + ) { + repos() + }.repos + + repos.forEach { repo -> + try { + val url = projects.repositories.url( + project = projectId, + repository = repo.name + ).sshUrl ?: error("Could not resolve sshUrl for ${repo.name}") + cloneRepo(parentDirectory = directory, url) + } catch (ex: Exception) { + logger.error("Failed ", ex) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/main.kt b/src/main/kotlin/main.kt index 612a855..ad41ac2 100644 --- a/src/main/kotlin/main.kt +++ b/src/main/kotlin/main.kt @@ -1,11 +1,11 @@ +@file:OptIn(ExperimentalCli::class) + package center.sciprog.space.documentextractor import io.ktor.client.engine.cio.CIO -import kotlinx.cli.ArgParser -import kotlinx.cli.ArgType -import kotlinx.cli.default -import kotlinx.cli.required -import kotlinx.coroutines.coroutineScope +import kotlinx.cli.* +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking import space.jetbrains.api.runtime.SpaceAppInstance import space.jetbrains.api.runtime.SpaceAuth import space.jetbrains.api.runtime.SpaceClient @@ -17,98 +17,225 @@ import java.nio.file.Files import java.nio.file.Path import kotlin.io.path.Path import kotlin.io.path.createDirectories +import kotlin.io.path.div -internal val urlRegex = - """(?https?:\/\/[^\/]*)\/p\/(?[^\/]*)\/.*-(?.*)${'$'}""".toRegex() +private abstract class ExtractCommand(name: String, description: String) : Subcommand(name, description) { -suspend fun main(args: Array) { - val parser = ArgParser("space-document-extractor") - - val url by parser.option( + val url by argument( ArgType.String, description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'" - ).required() - - val path: String? by parser.option( - ArgType.String, - description = "Target directory. Default is './markdown/'." ) - val html by parser.option( - ArgType.Boolean, - description = "Convert Markdown to HTML via pandoc" - ).default(false) - - val htmlPath by parser.option( - ArgType.String, - description = "Path for html output. Default is './html/" - ) - - val docx by parser.option( - ArgType.Boolean, - description = "Convert Markdown to DOCX via pandoc" - ).default(false) - - val docxPath by parser.option( - ArgType.String, - description = "Path for docx output. Default is './docx/" - ) - - val clientId by parser.option( + val clientId by option( ArgType.String, description = "Space application client ID (if not defined, use environment value 'space.clientId')" ) - val clientSecret by parser.option( + val clientSecret by option( ArgType.String, description = "Space application client secret (if not defined, use environment value 'space.clientSecret')" ) +} - parser.parse(args) +private class ExtractDocumentsCommand : ExtractCommand("docs", "Extract documents") { - val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern") - - val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized") - - val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized") - - val folderId = urlMatch.groups["folderId"]?.value ?: error("Folder or document token not recognized") - - val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId") - - Files.createDirectories(markdownPath) - - val appInstance = SpaceAppInstance( - clientId ?: System.getProperty("space.clientId"), - clientSecret ?: System.getProperty("space.clientSecret"), - spaceUrl + val path: String? by option( + ArgType.String, + description = "Target directory. Default is './documents/'." ) + val html by option( + ArgType.Boolean, + description = "Convert Markdown to HTML via pandoc" + ).default(false) - val spaceClient: SpaceClient = SpaceClient( - ktorClientForSpace(CIO), - appInstance, - SpaceAuth.ClientCredentials() + val htmlPath by option( + ArgType.String, + description = "Path for html output. Default is './documents/@html/" ) - coroutineScope { - println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"") - spaceClient.downloadAndProcessDocumentsInProject( - markdownPath, - ProjectIdentifier.Key(project), - FolderIdentifier.Id(folderId) + val docx by option( + ArgType.Boolean, + description = "Convert Markdown to DOCX via pandoc" + ).default(false) + + val docxPath by option( + ArgType.String, + description = "Path for docx output. Default is './documents/@docx/" + ) + + val exportRepos by option( + ArgType.Boolean, + description = "Export all repositories in the project." + ).default(false) + + + override fun execute() { + val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern") + + val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized") + + val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized") + + val folderId: String? = urlMatch.groups["folderId"]?.value + + val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId") + + Files.createDirectories(markdownPath) + + val appInstance = SpaceAppInstance( + clientId ?: System.getProperty("space.clientId"), + clientSecret ?: System.getProperty("space.clientSecret"), + spaceUrl ) - if (html) { - val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html") - ?: Path(htmlPath ?: "html/$folderId") - htmlTargetPath.createDirectories() - convertToHtml(markdownPath, htmlTargetPath) - } - if (docx) { - val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx") - ?: Path(docxPath ?: "docx/$folderId") - docxTargetPath.createDirectories() - convertToDocX(markdownPath, docxTargetPath) + + + val spaceClient: SpaceClient = SpaceClient( + ktorClientForSpace(CIO), + appInstance, + SpaceAuth.ClientCredentials() + ) + runBlocking { + println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"") + spaceClient.downloadAndProcessDocumentsInProject( + markdownPath, + ProjectIdentifier.Key(project), + folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root + ) + if (html) { + val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html") + ?: Path(htmlPath ?: "html/$folderId") + htmlTargetPath.createDirectories() + convertToHtml(markdownPath, htmlTargetPath) + } + if (docx) { + val docxTargetPath = path?.let { Path(it) }?.resolve(docxPath ?: "docx") + ?: Path(docxPath ?: "docx/$folderId") + docxTargetPath.createDirectories() + convertToDocX(markdownPath, docxTargetPath) + } } } + + companion object { + private val urlRegex = + """(?https?:\/\/[^\/]*)\/p\/(?[^\/]*)\/.*-(?.*)${'$'}""".toRegex() + } +} + +private class ExtractRepositoriesCommand : ExtractCommand("repos", "Extract repositories") { + + val path: String by option( + ArgType.String, + description = "Target directory." + ).default("./repositories") + + override fun execute() { + val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern") + + val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized") + + val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized") + + val repoPath: Path = Path(path) + + Files.createDirectories(repoPath) + + val appInstance = SpaceAppInstance( + clientId ?: System.getProperty("space.clientId"), + clientSecret ?: System.getProperty("space.clientSecret"), + spaceUrl + ) + + val spaceClient: SpaceClient = SpaceClient( + ktorClientForSpace(CIO), + appInstance, + SpaceAuth.ClientCredentials() + ) + + runBlocking { + val key = ProjectIdentifier.Key(project) + logger.info("Extracting repositories from project \"${spaceClient.projects.getProject(key).name}\"") + spaceClient.extractRepos( + repoPath, + key, + ) + } + } + + companion object { + private val urlRegex = + """(?https?:\/\/[^\/]*)\/p\/(?[^\/]*)\/?""".toRegex() + } + +} + +private class ExtractAllCommand : ExtractCommand("all", "Extract all data from a project") { + + val path: String by option( + ArgType.String, + description = "Target directory." + ).default(".") + + override fun execute() { + val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern") + + val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized") + + val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized") + + val rootPath = Path(path) / "projects" / project + + val documentsPath = rootPath / "documents" + Files.createDirectories(documentsPath) + + val repoPath: Path = rootPath / "repositories" + + Files.createDirectories(repoPath) + + val appInstance = SpaceAppInstance( + clientId ?: System.getProperty("space.clientId"), + clientSecret ?: System.getProperty("space.clientSecret"), + spaceUrl + ) + + val spaceClient: SpaceClient = SpaceClient( + ktorClientForSpace(CIO), + appInstance, + SpaceAuth.ClientCredentials() + ) + + runBlocking { + val key = ProjectIdentifier.Key(project) + logger.info("Extracting everything from project \"${spaceClient.projects.getProject(key).name}\"") + launch { + spaceClient.extractRepos( + repoPath, + key, + ) + } + launch { + spaceClient.downloadAndProcessDocumentsInProject( + documentsPath, + ProjectIdentifier.Key(project), + FolderIdentifier.Root + ) + } + } + } + + companion object { + private val urlRegex = + """(?https?:\/\/[^\/]*)\/p\/(?[^\/]*)\/?""".toRegex() + } + +} + + +fun main(args: Array) { + val parser = ArgParser("space-export") + + parser.subcommands(ExtractDocumentsCommand(), ExtractRepositoriesCommand(), ExtractAllCommand()) + + parser.parse(args) } \ No newline at end of file