Documentation
This commit is contained in:
parent
39cdff3726
commit
f500d4eb61
23
README.md
23
README.md
@ -10,7 +10,7 @@ This project uses Space SDK to organize those steps.
|
||||
|
||||
## Setting up Space Application
|
||||
|
||||
In order to access data in Space, one needs to [create a Space Application](https://www.jetbrains.com/help/space/applications.html) and add appropriate permissions. I am not sure which permissions cover access to images, but here are those that I allowed:
|
||||
In order to access data in Space, one needs to [create a Space Application](https://www.jetbrains.com/help/space/applications.html) and add appropriate permissions. I am not sure which permissions cover access to image, but here are those that I allowed:
|
||||
|
||||
* Provide external attachment unfurls
|
||||
* Provide external inline unfurls
|
||||
@ -18,12 +18,13 @@ In order to access data in Space, one needs to [create a Space Application](http
|
||||
* View book metadata
|
||||
* View content
|
||||
|
||||
For restricted projects, one needs to manually add the project and its permission to allowed.
|
||||
|
||||
Then one needs to copy `clientId` and `clientSecret` for the application and use them as command line parameters.
|
||||
|
||||
## Downloading texts
|
||||
|
||||
Right now Space SDK does not have methods to access documents, so the only way is to copy the markdown and paste it directly to a file. I hope it will change in the future.
|
||||
|
||||
Text and binary documents are processed recursively starting at given `folderId` or project root if it is not defined.
|
||||
## Download images
|
||||
|
||||
The images in space documents are inserted in the following format: `![](/d/aaaabbbbcccc?f=0 "name.png")`. Our aim is to detect those links in files and download appropriate images. Those links could not be replaced directly, because access requires OAuth authentication. For that we need to use access token from Space SDK.
|
||||
@ -34,10 +35,22 @@ After file is successfully downloaded, the reference in file must be replaced wi
|
||||
|
||||
## Command line interface
|
||||
|
||||
```commandline
|
||||
Usage: space-document-extractor options_list
|
||||
Options:
|
||||
--spaceUrl -> Url of the space instance like 'https://mipt-npm.jetbrains.space' (always required) { String }
|
||||
--project -> The key of the exported project (always required) { String }
|
||||
--path -> Target directory. Default is './output/project-key'. { String }
|
||||
--folderId -> FolderId for the folder to export. By default uses project root. { String }
|
||||
--clientId -> Space application client ID (if not defined, use environment value 'space.clientId') { String }
|
||||
--clientSecret -> Space application client secret (if not defined, use environment value 'space.clientSecret') { String }
|
||||
--help, -h -> Usage info
|
||||
```
|
||||
|
||||
Typical application usage:
|
||||
|
||||
```commandline
|
||||
.\space-document-extractor --spaceUrl https://mipt-npm.jetbrains.space --path D:\Work\report\ --clientId "your client ID" --clientSecret "your client secret"
|
||||
.\space-document-extractor --spaceUrl "your space URL" --project "your project key" --clientId "your client ID" --clientSecret "your client secret"
|
||||
```
|
||||
|
||||
It will search the directory (and subdirectories) and replace image links with downloaded image in `./images` directory.
|
||||
It will download all documents and postprocess markdown files, replacing image links with downloaded image in `images` directory (each subdirectory will have its own `images`.
|
@ -28,11 +28,11 @@ suspend fun main(args: Array<String>) {
|
||||
description = "The key of the exported project"
|
||||
).required()
|
||||
|
||||
val path: String? by parser.option(ArgType.String, description = "Target directory. Default is current directory")
|
||||
val path: String? by parser.option(ArgType.String, description = "Target directory. Default is './output/project-key'.")
|
||||
|
||||
val folderId: String? by parser.option(
|
||||
ArgType.String,
|
||||
description = "FolderId for the folder to export"
|
||||
description = "FolderId for the folder to export. By default uses project root."
|
||||
)
|
||||
|
||||
val clientId by parser.option(
|
||||
@ -51,7 +51,6 @@ suspend fun main(args: Array<String>) {
|
||||
|
||||
Files.createDirectories(target)
|
||||
|
||||
|
||||
val space: SpaceClient = SpaceClient(
|
||||
ktorClientForSpace(CIO),
|
||||
SpaceAppInstance(
|
||||
|
@ -11,6 +11,7 @@ import io.ktor.utils.io.jvm.javaio.copyTo
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.coroutineScope
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withContext
|
||||
import org.slf4j.LoggerFactory
|
||||
import space.jetbrains.api.runtime.Batch
|
||||
import space.jetbrains.api.runtime.SpaceClient
|
||||
@ -23,6 +24,9 @@ import kotlin.streams.toList
|
||||
|
||||
private val logger by lazy { LoggerFactory.getLogger("space-extractor") }
|
||||
|
||||
/**
|
||||
* Extract single attachment image
|
||||
*/
|
||||
internal suspend fun SpaceClient.extractImage(
|
||||
parent: Path,
|
||||
imageId: String,
|
||||
@ -39,7 +43,10 @@ internal suspend fun SpaceClient.extractImage(
|
||||
file.writeBytes(response.readBytes())
|
||||
}
|
||||
|
||||
internal suspend fun SpaceClient.extractDocument(
|
||||
/**
|
||||
* Extract single file
|
||||
*/
|
||||
internal suspend fun SpaceClient.extractFile(
|
||||
parent: Path,
|
||||
documentId: String,
|
||||
documentFileName: String,
|
||||
@ -59,6 +66,9 @@ internal suspend fun SpaceClient.extractDocument(
|
||||
|
||||
private val regex = """!\[(?<alt>.*)]\(/d/(?<id>.*)\?f=0""".toRegex()
|
||||
|
||||
/**
|
||||
* Post-process a markdown document by downloading images and replacing links
|
||||
*/
|
||||
internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutineScope{
|
||||
val documentBody = path.readText()
|
||||
val logger = LoggerFactory.getLogger("space-document-extractor")
|
||||
@ -73,7 +83,6 @@ internal suspend fun SpaceClient.processMarkdownDocument(path: Path) = coroutin
|
||||
"![$alt](images/$id"
|
||||
}
|
||||
path.writeText(newText)
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@ -94,6 +103,9 @@ internal suspend fun SpaceClient.processMarkdownInDirectory(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download single Space document
|
||||
*/
|
||||
internal suspend fun SpaceClient.downloadDocument(
|
||||
directory: Path,
|
||||
document: Document,
|
||||
@ -101,7 +113,7 @@ internal suspend fun SpaceClient.downloadDocument(
|
||||
when (val body = document.documentBody) {
|
||||
is FileDocumentBody -> {
|
||||
launch(Dispatchers.IO) {
|
||||
extractDocument(directory, document.id, document.title)
|
||||
extractFile(directory, document.id, document.title)
|
||||
}
|
||||
}
|
||||
is TextDocument -> {
|
||||
@ -115,6 +127,9 @@ internal suspend fun SpaceClient.downloadDocument(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download all documents and subfolders in a folder
|
||||
*/
|
||||
internal suspend fun SpaceClient.downloadDocumentFolder(
|
||||
directory: Path,
|
||||
projectId: ProjectIdentifier,
|
||||
@ -142,11 +157,14 @@ internal suspend fun SpaceClient.downloadDocumentFolder(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download all documents in a project or a folder with given [rootFolder] and postprocess files
|
||||
*/
|
||||
suspend fun SpaceClient.downloadAndProcessDocumentsInProject(
|
||||
directory: Path,
|
||||
projectId: ProjectIdentifier,
|
||||
rootFolder: FolderIdentifier = FolderIdentifier.Root,
|
||||
) {
|
||||
) = withContext(Dispatchers.IO){
|
||||
logger.info("Processing project ${projectId.compactId} to $directory")
|
||||
downloadDocumentFolder(directory, projectId, rootFolder)
|
||||
processMarkdownInDirectory(directory)
|
||||
|
Loading…
Reference in New Issue
Block a user