Add repositories export
This commit is contained in:
parent
683f46435f
commit
c2a5ace4c4
@ -15,15 +15,16 @@ repositories {
|
|||||||
maven("https://maven.pkg.jetbrains.space/public/p/space/maven")
|
maven("https://maven.pkg.jetbrains.space/public/p/space/maven")
|
||||||
}
|
}
|
||||||
|
|
||||||
val ktorVersion = "2.3.1"
|
val ktorVersion = "2.3.3"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.jetbrains:space-sdk-jvm:163093-beta")
|
implementation("org.jetbrains:space-sdk-jvm:167818-beta")
|
||||||
implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion")
|
implementation("io.ktor:ktor-client-cio-jvm:$ktorVersion")
|
||||||
implementation("io.ktor:ktor-server-core-jvm:$ktorVersion")
|
implementation("io.ktor:ktor-server-core-jvm:$ktorVersion")
|
||||||
implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion")
|
implementation("io.ktor:ktor-server-cio-jvm:$ktorVersion")
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
|
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
|
||||||
implementation("ch.qos.logback:logback-classic:1.4.8")
|
implementation("ch.qos.logback:logback-classic:1.4.8")
|
||||||
|
implementation("org.eclipse.jgit:org.eclipse.jgit:6.6.0.202305301015-r")
|
||||||
testImplementation(kotlin("test"))
|
testImplementation(kotlin("test"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
|
|
||||||
rootProject.name = "space-document-extractor"
|
rootProject.name = "space-export"
|
||||||
|
|
||||||
|
@ -115,16 +115,23 @@ internal suspend fun SpaceClient.downloadDocument(
|
|||||||
directory: Path,
|
directory: Path,
|
||||||
document: Document,
|
document: Document,
|
||||||
) = coroutineScope {
|
) = coroutineScope {
|
||||||
when (val body = document.documentBody) {
|
when (val body = document.body) {
|
||||||
is FileDocumentBody -> {
|
is FileDocumentHttpBody -> {
|
||||||
launch(Dispatchers.IO) {
|
launch(Dispatchers.IO) {
|
||||||
extractFile(directory, document.id, document.title)
|
extractFile(directory, document.id, document.title)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
is TextDocument -> {
|
is TextDocumentHttpBody -> {
|
||||||
val markdownFilePath = directory.resolve(document.title + ".md")
|
val markdownFilePath = directory.resolve(document.title + ".md")
|
||||||
markdownFilePath.writeText(body.text, Charsets.UTF_8)
|
val content = body.docContent
|
||||||
|
if (content is MdTextDocumentContent) {
|
||||||
|
markdownFilePath.writeText(content.markdown, Charsets.UTF_8)
|
||||||
|
} else {
|
||||||
|
launch(Dispatchers.IO) {
|
||||||
|
extractFile(directory, document.id, document.title)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
else -> {
|
else -> {
|
||||||
@ -151,7 +158,8 @@ internal suspend fun SpaceClient.downloadDocumentFolder(
|
|||||||
val document = projects.documents.getDocument(projectId, it.id) {
|
val document = projects.documents.getDocument(projectId, it.id) {
|
||||||
id()
|
id()
|
||||||
title()
|
title()
|
||||||
documentBody()
|
// documentBody()
|
||||||
|
body()
|
||||||
bodyType()
|
bodyType()
|
||||||
}
|
}
|
||||||
downloadDocument(directory, document)
|
downloadDocument(directory, document)
|
49
src/main/kotlin/extractRepos.kt
Normal file
49
src/main/kotlin/extractRepos.kt
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
package center.sciprog.space.documentextractor
|
||||||
|
|
||||||
|
import space.jetbrains.api.runtime.SpaceClient
|
||||||
|
import space.jetbrains.api.runtime.resources.projects
|
||||||
|
import space.jetbrains.api.runtime.types.ProjectIdentifier
|
||||||
|
import java.nio.file.Path
|
||||||
|
import kotlin.io.path.createDirectories
|
||||||
|
import kotlin.io.path.div
|
||||||
|
|
||||||
|
private fun cloneRepo(
|
||||||
|
parentDirectory: Path,
|
||||||
|
url: String,
|
||||||
|
) {
|
||||||
|
logger.info("Cloning $url to $parentDirectory")
|
||||||
|
|
||||||
|
ProcessBuilder("git", "clone", url)
|
||||||
|
.directory(parentDirectory.toAbsolutePath().toFile())
|
||||||
|
.inheritIO()
|
||||||
|
.start()
|
||||||
|
.waitFor()
|
||||||
|
|
||||||
|
// Git.cloneRepository()
|
||||||
|
// .setURI(url)
|
||||||
|
// .setDirectory(parentDirectory.resolve(name).toFile())
|
||||||
|
// .call()
|
||||||
|
}
|
||||||
|
|
||||||
|
suspend fun SpaceClient.extractRepos(
|
||||||
|
directory: Path,
|
||||||
|
projectId: ProjectIdentifier,
|
||||||
|
) {
|
||||||
|
val repos = projects.getProject(
|
||||||
|
project = projectId
|
||||||
|
) {
|
||||||
|
repos()
|
||||||
|
}.repos
|
||||||
|
|
||||||
|
repos.forEach { repo ->
|
||||||
|
try {
|
||||||
|
val url = projects.repositories.url(
|
||||||
|
project = projectId,
|
||||||
|
repository = repo.name
|
||||||
|
).sshUrl ?: error("Could not resolve sshUrl for ${repo.name}")
|
||||||
|
cloneRepo(parentDirectory = directory, url)
|
||||||
|
} catch (ex: Exception) {
|
||||||
|
logger.error("Failed ", ex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,11 +1,11 @@
|
|||||||
|
@file:OptIn(ExperimentalCli::class)
|
||||||
|
|
||||||
package center.sciprog.space.documentextractor
|
package center.sciprog.space.documentextractor
|
||||||
|
|
||||||
import io.ktor.client.engine.cio.CIO
|
import io.ktor.client.engine.cio.CIO
|
||||||
import kotlinx.cli.ArgParser
|
import kotlinx.cli.*
|
||||||
import kotlinx.cli.ArgType
|
import kotlinx.coroutines.launch
|
||||||
import kotlinx.cli.default
|
import kotlinx.coroutines.runBlocking
|
||||||
import kotlinx.cli.required
|
|
||||||
import kotlinx.coroutines.coroutineScope
|
|
||||||
import space.jetbrains.api.runtime.SpaceAppInstance
|
import space.jetbrains.api.runtime.SpaceAppInstance
|
||||||
import space.jetbrains.api.runtime.SpaceAuth
|
import space.jetbrains.api.runtime.SpaceAuth
|
||||||
import space.jetbrains.api.runtime.SpaceClient
|
import space.jetbrains.api.runtime.SpaceClient
|
||||||
@ -17,62 +17,67 @@ import java.nio.file.Files
|
|||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import kotlin.io.path.Path
|
import kotlin.io.path.Path
|
||||||
import kotlin.io.path.createDirectories
|
import kotlin.io.path.createDirectories
|
||||||
|
import kotlin.io.path.div
|
||||||
|
|
||||||
internal val urlRegex =
|
private abstract class ExtractCommand(name: String, description: String) : Subcommand(name, description) {
|
||||||
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
|
|
||||||
|
|
||||||
suspend fun main(args: Array<String>) {
|
val url by argument(
|
||||||
val parser = ArgParser("space-document-extractor")
|
|
||||||
|
|
||||||
val url by parser.option(
|
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'"
|
description = "Url of the folder like 'https://spc.jetbrains.space/p/mipt-npm/documents/folders?f=SPC-qn7al1VorKp' or 'https://spc.jetbrains.space/p/mipt-npm/documents/SPC/f/SPC-qn7al1VorKp?f=SPC-qn7al1VorKp'"
|
||||||
).required()
|
|
||||||
|
|
||||||
val path: String? by parser.option(
|
|
||||||
ArgType.String,
|
|
||||||
description = "Target directory. Default is './markdown/<id>'."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
val html by parser.option(
|
val clientId by option(
|
||||||
ArgType.Boolean,
|
|
||||||
description = "Convert Markdown to HTML via pandoc"
|
|
||||||
).default(false)
|
|
||||||
|
|
||||||
val htmlPath by parser.option(
|
|
||||||
ArgType.String,
|
|
||||||
description = "Path for html output. Default is './html/<id>"
|
|
||||||
)
|
|
||||||
|
|
||||||
val docx by parser.option(
|
|
||||||
ArgType.Boolean,
|
|
||||||
description = "Convert Markdown to DOCX via pandoc"
|
|
||||||
).default(false)
|
|
||||||
|
|
||||||
val docxPath by parser.option(
|
|
||||||
ArgType.String,
|
|
||||||
description = "Path for docx output. Default is './docx/<id>"
|
|
||||||
)
|
|
||||||
|
|
||||||
val clientId by parser.option(
|
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
description = "Space application client ID (if not defined, use environment value 'space.clientId')"
|
description = "Space application client ID (if not defined, use environment value 'space.clientId')"
|
||||||
)
|
)
|
||||||
|
|
||||||
val clientSecret by parser.option(
|
val clientSecret by option(
|
||||||
ArgType.String,
|
ArgType.String,
|
||||||
description = "Space application client secret (if not defined, use environment value 'space.clientSecret')"
|
description = "Space application client secret (if not defined, use environment value 'space.clientSecret')"
|
||||||
)
|
)
|
||||||
|
}
|
||||||
|
|
||||||
parser.parse(args)
|
private class ExtractDocumentsCommand : ExtractCommand("docs", "Extract documents") {
|
||||||
|
|
||||||
|
val path: String? by option(
|
||||||
|
ArgType.String,
|
||||||
|
description = "Target directory. Default is './documents/<id>'."
|
||||||
|
)
|
||||||
|
|
||||||
|
val html by option(
|
||||||
|
ArgType.Boolean,
|
||||||
|
description = "Convert Markdown to HTML via pandoc"
|
||||||
|
).default(false)
|
||||||
|
|
||||||
|
val htmlPath by option(
|
||||||
|
ArgType.String,
|
||||||
|
description = "Path for html output. Default is './documents/@html/<id>"
|
||||||
|
)
|
||||||
|
|
||||||
|
val docx by option(
|
||||||
|
ArgType.Boolean,
|
||||||
|
description = "Convert Markdown to DOCX via pandoc"
|
||||||
|
).default(false)
|
||||||
|
|
||||||
|
val docxPath by option(
|
||||||
|
ArgType.String,
|
||||||
|
description = "Path for docx output. Default is './documents/@docx/<id>"
|
||||||
|
)
|
||||||
|
|
||||||
|
val exportRepos by option(
|
||||||
|
ArgType.Boolean,
|
||||||
|
description = "Export all repositories in the project."
|
||||||
|
).default(false)
|
||||||
|
|
||||||
|
|
||||||
|
override fun execute() {
|
||||||
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||||
|
|
||||||
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||||
|
|
||||||
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||||
|
|
||||||
val folderId = urlMatch.groups["folderId"]?.value ?: error("Folder or document token not recognized")
|
val folderId: String? = urlMatch.groups["folderId"]?.value
|
||||||
|
|
||||||
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
|
val markdownPath: Path = path?.let { Path(it) } ?: Path("markdown/$folderId")
|
||||||
|
|
||||||
@ -90,13 +95,12 @@ suspend fun main(args: Array<String>) {
|
|||||||
appInstance,
|
appInstance,
|
||||||
SpaceAuth.ClientCredentials()
|
SpaceAuth.ClientCredentials()
|
||||||
)
|
)
|
||||||
|
runBlocking {
|
||||||
coroutineScope {
|
|
||||||
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
|
println("Processing project \"${spaceClient.projects.getProject(ProjectIdentifier.Key(project)).name}\"")
|
||||||
spaceClient.downloadAndProcessDocumentsInProject(
|
spaceClient.downloadAndProcessDocumentsInProject(
|
||||||
markdownPath,
|
markdownPath,
|
||||||
ProjectIdentifier.Key(project),
|
ProjectIdentifier.Key(project),
|
||||||
FolderIdentifier.Id(folderId)
|
folderId?.let { FolderIdentifier.Id(it) } ?: FolderIdentifier.Root
|
||||||
)
|
)
|
||||||
if (html) {
|
if (html) {
|
||||||
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
|
val htmlTargetPath = path?.let { Path(it) }?.resolve(htmlPath ?: "html")
|
||||||
@ -112,3 +116,126 @@ suspend fun main(args: Array<String>) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
private val urlRegex =
|
||||||
|
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/.*-(?<folderId>.*)${'$'}""".toRegex()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ExtractRepositoriesCommand : ExtractCommand("repos", "Extract repositories") {
|
||||||
|
|
||||||
|
val path: String by option(
|
||||||
|
ArgType.String,
|
||||||
|
description = "Target directory."
|
||||||
|
).default("./repositories")
|
||||||
|
|
||||||
|
override fun execute() {
|
||||||
|
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||||
|
|
||||||
|
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||||
|
|
||||||
|
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||||
|
|
||||||
|
val repoPath: Path = Path(path)
|
||||||
|
|
||||||
|
Files.createDirectories(repoPath)
|
||||||
|
|
||||||
|
val appInstance = SpaceAppInstance(
|
||||||
|
clientId ?: System.getProperty("space.clientId"),
|
||||||
|
clientSecret ?: System.getProperty("space.clientSecret"),
|
||||||
|
spaceUrl
|
||||||
|
)
|
||||||
|
|
||||||
|
val spaceClient: SpaceClient = SpaceClient(
|
||||||
|
ktorClientForSpace(CIO),
|
||||||
|
appInstance,
|
||||||
|
SpaceAuth.ClientCredentials()
|
||||||
|
)
|
||||||
|
|
||||||
|
runBlocking {
|
||||||
|
val key = ProjectIdentifier.Key(project)
|
||||||
|
logger.info("Extracting repositories from project \"${spaceClient.projects.getProject(key).name}\"")
|
||||||
|
spaceClient.extractRepos(
|
||||||
|
repoPath,
|
||||||
|
key,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
private val urlRegex =
|
||||||
|
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/?""".toRegex()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ExtractAllCommand : ExtractCommand("all", "Extract all data from a project") {
|
||||||
|
|
||||||
|
val path: String by option(
|
||||||
|
ArgType.String,
|
||||||
|
description = "Target directory."
|
||||||
|
).default(".")
|
||||||
|
|
||||||
|
override fun execute() {
|
||||||
|
val urlMatch = urlRegex.matchEntire(url) ?: error("Url $url does not match space document url pattern")
|
||||||
|
|
||||||
|
val spaceUrl = urlMatch.groups["spaceUrl"]?.value ?: error("Space Url token not recognized")
|
||||||
|
|
||||||
|
val project = urlMatch.groups["projectName"]?.value ?: error("Project name token not recognized")
|
||||||
|
|
||||||
|
val rootPath = Path(path) / "projects" / project
|
||||||
|
|
||||||
|
val documentsPath = rootPath / "documents"
|
||||||
|
Files.createDirectories(documentsPath)
|
||||||
|
|
||||||
|
val repoPath: Path = rootPath / "repositories"
|
||||||
|
|
||||||
|
Files.createDirectories(repoPath)
|
||||||
|
|
||||||
|
val appInstance = SpaceAppInstance(
|
||||||
|
clientId ?: System.getProperty("space.clientId"),
|
||||||
|
clientSecret ?: System.getProperty("space.clientSecret"),
|
||||||
|
spaceUrl
|
||||||
|
)
|
||||||
|
|
||||||
|
val spaceClient: SpaceClient = SpaceClient(
|
||||||
|
ktorClientForSpace(CIO),
|
||||||
|
appInstance,
|
||||||
|
SpaceAuth.ClientCredentials()
|
||||||
|
)
|
||||||
|
|
||||||
|
runBlocking {
|
||||||
|
val key = ProjectIdentifier.Key(project)
|
||||||
|
logger.info("Extracting everything from project \"${spaceClient.projects.getProject(key).name}\"")
|
||||||
|
launch {
|
||||||
|
spaceClient.extractRepos(
|
||||||
|
repoPath,
|
||||||
|
key,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
launch {
|
||||||
|
spaceClient.downloadAndProcessDocumentsInProject(
|
||||||
|
documentsPath,
|
||||||
|
ProjectIdentifier.Key(project),
|
||||||
|
FolderIdentifier.Root
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
private val urlRegex =
|
||||||
|
"""(?<spaceUrl>https?:\/\/[^\/]*)\/p\/(?<projectName>[^\/]*)\/?""".toRegex()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fun main(args: Array<String>) {
|
||||||
|
val parser = ArgParser("space-export")
|
||||||
|
|
||||||
|
parser.subcommands(ExtractDocumentsCommand(), ExtractRepositoriesCommand(), ExtractAllCommand())
|
||||||
|
|
||||||
|
parser.parse(args)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user