Add pandoc wrapper

This commit is contained in:
Alexander Nozik 2023-11-04 16:57:34 +03:00
parent 40664db80d
commit 1ac5768b14
11 changed files with 1829 additions and 1 deletions

View File

@ -41,5 +41,6 @@ include(
":snark-gradle-plugin",
":snark-core",
":snark-html",
":snark-ktor"
":snark-ktor",
":snark-pandoc"
)

66
snark-pandoc/README.md Normal file
View File

@ -0,0 +1,66 @@
## Examples
### Simple converting
Convert from INPUT_FILE to OUTPUT_FILE:
```java
PandocWrapper wrapper = new PandocWrapper();
wrapper.use(p -> {
var command = new PandocCommandBuilder(List.of(INPUT_FILE), OUTPUT_FILE);
PandocWrapper.execute(command);
});
```
Equal to:
```
pandoc --output=OUTPUT_FILE INPUT_FILE
```
### Convert and set formats
Convert from INPUT_FILE to OUTPUT_FILE and set INPUT_FORMAT and OUTPUT_FORMAT:
```java
PandocWrapper wrapper = new PandocWrapper();
wrapper.use(p -> {
var command = new PandocCommandBuilder(List.of(INPUT_FILE), OUTPUT_FILE);
command.formatForm(INPUT_FORMAT);
command.formatTo(OUTPUT_FORMAT);
PandocWrapper.execute(command);
});
```
Equal to:
```
pandoc --output=OUTPUT_FILE --from=INPUT_FORMAT --to=OUTPUT_FORMAT INPUT_FILE
```
### Converting with options
Convert from INPUT_FILE to standalone OUTPUT_FILE and set variable KEY to VALUE :
```java
PandocWrapper wrapper = new PandocWrapper();
wrapper.use(p -> {
var command = new PandocCommandBuilder(List.of(INPUT_FILE), OUTPUT_FILE);
command.standalone();
command.setVariable(KEY, VALUE);
PandocWrapper.execute(command);
});
```
Equal to:
```
pandoc --output=OUTPUT_FILE --standalone --variable=KEY:VALUE INPUT_FILE
```
### Write output from pandoc to file
Receive possible input formats in OUTPUT_FILE:
```java
PandocWrapper wrapper = new PandocWrapper();
wrapper.use(p -> {
var command = new PandocCommandBuilder();
command.getInputFormats();
PandocWrapper.execute(command, OUTPUT_FILE);
});
```
Then in OUTPUT_FILE will be a list supported input formats, one per line.
### Write errors from pandoc to file
Receive all from error stream and exit code in ERROR_FILE and output in OUTPUT_FILE:
```java
PandocWrapper wrapper = new PandocWrapper();
wrapper.use(p -> {
var command = new PandocCommandBuilder(List.of(INPUT_FILE), OUTPUT_FILE);
PandocWrapper.execute(command, OUTPUT_FILE, ERROR_FILE);
});
```

View File

@ -0,0 +1,20 @@
plugins {
id("space.kscience.gradle.mpp")
}
kscience {
useSerialization {
json()
}
jvm()
jvmMain {
api(spclibs.slf4j)
implementation("org.apache.commons:commons-exec:1.3")
implementation("org.apache.commons:commons-compress:1.2")
}
jvmTest{
implementation(spclibs.logback.classic)
}
}

View File

@ -0,0 +1,69 @@
package space.kscience.snark.pandoc
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import java.io.BufferedReader
import java.io.IOException
import java.io.InputStreamReader
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.StandardOpenOption
import java.util.concurrent.TimeUnit
import kotlin.io.path.Path
public object Pandoc {
private val logger: Logger = LoggerFactory.getLogger(Pandoc::class.java)
private fun getOrInstallPandoc(pandocExecutablePath: Path): String = try {
ProcessBuilder("pandoc", "--version").start().waitFor()
"pandoc"
} catch (ex: IOException) {
if (Files.exists(pandocExecutablePath)) {
pandocExecutablePath.toAbsolutePath().toString()
} else {
logger.info("Pandoc not found in the system. Installing it from GitHub")
PandocInstaller.installPandoc(pandocExecutablePath).toAbsolutePath().toString()
}
}
/**
* Call pandoc with options described by commandBuilder.
* @param commandBuilder
* @return true if successfully false otherwise
*/
public fun execute(
redirectOutput: Path? = null,
redirectError: Path? = null,
pandocExecutablePath: Path = Path("./pandoc").toAbsolutePath(),
commandBuilder: PandocCommandBuilder.() -> Unit,
): Boolean {
val path = getOrInstallPandoc(pandocExecutablePath)
try {
val commandLine = PandocCommandBuilder().apply(commandBuilder).build(path)
logger.info("Running pandoc: ${commandLine.joinToString(separator = " ")}")
val pandoc = ProcessBuilder(commandLine).apply {
if(redirectOutput!= null){
redirectOutput(redirectOutput.toFile())
}
if(redirectError !=null){
redirectError(redirectError.toFile())
}
}.start()
pandoc.waitFor(1, TimeUnit.SECONDS)
if (pandoc.exitValue() == 0) {
logger.info("Successfully execute")
return true
} else{
return false
}
} catch (e: Exception) {
logger.error("Got problems with executing: " + e.message)
return false
}
}
}

View File

@ -0,0 +1,264 @@
package space.kscience.snark.pandoc
import kotlinx.serialization.json.Json
import org.apache.commons.compress.archivers.ArchiveEntry
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.exec.OS
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import java.io.BufferedInputStream
import java.io.FileNotFoundException
import java.io.IOException
import java.net.*
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.attribute.PosixFilePermission
import java.time.Duration
import java.util.*
import java.util.zip.ZipInputStream
import kotlin.io.path.Path
import kotlin.io.path.inputStream
internal object PandocInstaller {
private val log: Logger = LoggerFactory.getLogger(PandocInstaller::class.java)
private const val TIMEOUT_SECONDS = 2
private const val ATTEMPTS = 3
private enum class OSType(public val assetSuffix: String, public val propertySuffix: String) {
WINDOWS("windows-x86_64.zip", "windows"),
MAC_OS_AMD("x86_64-macOS.zip", "mac.os.amd"),
MAC_OS_ARM("arm64-macOS.zip", "mac.os.arm"),
LINUX_ARM("linux-arm64", "linux.arm"),
LINUX_AMD("linux-amd64", "linux.amd")
}
private val properties = Properties().apply {
load(PandocInstaller.javaClass.getResourceAsStream("/installer.properties")!!)
}
/**
* Install last released pandoc from github
* @return path to executable pandoc
* @throws IOException in case incorrect github url or path of installation directory
*/
public fun installPandoc(targetPath: Path): Path {
log.info("Start install")
return if (OS.isFamilyMac()) {
if (OS.isArch("aarch64")) {
installPandoc(OSType.MAC_OS_ARM, targetPath)
} else {
installPandoc(OSType.MAC_OS_AMD, targetPath)
}
} else if (OS.isFamilyUnix()) {
if (OS.isArch("aarch64")) {
installPandoc(OSType.LINUX_ARM, targetPath)
} else {
installPandoc(OSType.LINUX_AMD, targetPath)
}
} else if (OS.isFamilyWindows()) {
installPandoc(OSType.WINDOWS, targetPath)
} else {
error("Got unexpected os, could not install pandoc")
}
}
private fun installPandoc(os: OSType, targetPath: Path): Path {
val githubResponse = getGithubUrls()
val asset = githubResponse.getAssetByOsSuffix(os.assetSuffix)
val currUrl = asset.browserDownloadUrl
val pandocUrl: URL = URI.create(currUrl).toURL()
val fileToInstall: Path = when (os) {
OSType.LINUX_AMD, OSType.LINUX_ARM -> Path("$targetPath/pandoc.tar.gz")
else -> Path("$targetPath/pandoc.zip")
}
log.info(
"Start installing pandoc os: {}, url: {}, file: {}",
os,
pandocUrl,
fileToInstall
)
val archivePath = downloadWithRetry(pandocUrl) ?: error("Could not save file from github")
val installPath = unPack(archivePath, targetPath, os) ?: error("Could not unzip file")
val pandocExecutablePath = installPath.resolve(
properties.getProperty("path.to.pandoc." + os.propertySuffix).replace(
"{version}",
githubResponse.tagName
)
)
Files.setPosixFilePermissions(pandocExecutablePath, setOf(PosixFilePermission.GROUP_EXECUTE))
return pandocExecutablePath
}
/**
* Downloads from a (http/https) URL and saves to a file.
* @param target File to write. Parent directory will be created if necessary
* @param url http/https url to connect
* @param secsConnectTimeout Seconds to wait for connection establishment
* @param secsReadTimeout Read timeout in seconds - trasmission will abort if it freezes more than this
* @return true if successfully save file and false if:
* connection interrupted, timeout (but something was read)
* server error (500...)
* could not connect: connection timeout java.net.SocketTimeoutException
* could not connect: java.net.ConnectException
* could not resolve host (bad host, or no internet - no dns)
* @throws IOException Only if URL is malformed or if could not create the file
* @throws FileNotFoundException if did not find file for save
*/
@Throws(IOException::class)
private fun downloadUrl(
target: Path,
url: URL,
secsConnectTimeout: Int,
secsReadTimeout: Int,
): Path? {
Files.createDirectories(target.parent) // make sure parent dir exists , this can throw exception
val conn = url.openConnection() // can throw exception if bad url
if (secsConnectTimeout > 0) {
conn.connectTimeout = secsConnectTimeout * 1000
}
if (secsReadTimeout > 0) {
conn.readTimeout = secsReadTimeout * 1000
}
var ret = true
var somethingRead = false
try {
conn.getInputStream().use { `is` ->
BufferedInputStream(`is`).use { `in` ->
Files.newOutputStream(target).use { fout ->
val data = ByteArray(8192)
var count: Int
while ((`in`.read(data).also { count = it }) > 0) {
somethingRead = true
fout.write(data, 0, count)
}
}
}
}
return target
} catch (e: IOException) {
var httpcode = 999
try {
httpcode = (conn as HttpURLConnection).responseCode
} catch (ee: Exception) {
}
if (e is FileNotFoundException) {
throw FileNotFoundException("Did not found file for install")
}
if (somethingRead && e is SocketTimeoutException) {
log.error("Read something, but connection interrupted: {}", e.message, e)
} else if (httpcode >= 400 && httpcode < 600) {
log.error("Got server error, httpcode: {}", httpcode)
} else if (e is SocketTimeoutException) {
log.error("Connection timeout: {}", e.message, e)
} else if (e is ConnectException) {
log.error("Could not connect: {}", e.message, e)
} else if (e is UnknownHostException) {
log.error("Could not resolve host: {}", e.message, e)
} else {
throw e
}
return null
}
}
private fun downloadWithRetry(url: URL): Path? {
val targetPath = Files.createTempFile("pandoc",".tmp")
log.info("Downloading pandoc to $targetPath")
repeat(ATTEMPTS) {
return downloadUrl(
targetPath,
url,
TIMEOUT_SECONDS,
TIMEOUT_SECONDS
)
}
return null
}
private fun unPack(sourcePath: Path, targetPath: Path, os: OSType): Path? {
try {
when (os) {
OSType.LINUX_AMD, OSType.LINUX_ARM -> unTarGz(sourcePath, targetPath)
else -> unZip(sourcePath, targetPath)
}
} catch (e: IOException) {
log.error("Could not perform unpacking: {}", e.message, e)
return null
}
return targetPath
}
private fun unTarGz(sourcePath: Path, targetDir: Path) {
TarArchiveInputStream(
GzipCompressorInputStream(
BufferedInputStream(Files.newInputStream(sourcePath))
)
).use { tarIn ->
var archiveEntry: ArchiveEntry
while ((tarIn.nextEntry.also { archiveEntry = it }) != null) {
val pathEntryOutput = targetDir.resolve(archiveEntry.name)
if (archiveEntry.isDirectory) {
Files.createDirectory(pathEntryOutput)
} else {
Files.copy(tarIn, pathEntryOutput)
}
}
}
}
private fun unZip(sourcePath: Path, targetDir: Path) {
ZipInputStream(sourcePath.inputStream()).use { zis ->
do {
val entry = zis.nextEntry
if (entry == null) continue
val pathEntryOutput = targetDir.resolve(entry.name)
if (entry.isDirectory) {
Files.createDirectories(pathEntryOutput)
} else {
Files.createDirectories(pathEntryOutput.parent)
Files.copy(zis, pathEntryOutput)
}
zis.closeEntry()
} while (entry != null)
}
}
private fun getGithubUrls(): ResponseDto {
val uri = URI.create(properties.getProperty("github.url"))
val client = HttpClient.newHttpClient()
val request = HttpRequest
.newBuilder()
.uri(uri)
.version(HttpClient.Version.HTTP_2)
.timeout(Duration.ofMinutes(1))
.header("Accept", "application/vnd.github+json")
.GET()
.build()
val response = client.send(request, HttpResponse.BodyHandlers.ofString())
log.info("Got response from github, status: {}", response.statusCode())
return Json { ignoreUnknownKeys = true }.decodeFromString(ResponseDto.serializer(), response.body())
}
}

View File

@ -0,0 +1,34 @@
package space.kscience.snark.pandoc
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
/**
* Response from github/releases/latest
*/
@Serializable
internal class ResponseDto(
val assets: Array<AssetDto>,
@SerialName("tag_name") val tagName: String,
) {
/**
* @param osSuffix
* @return asset appropriate to os
*/
fun getAssetByOsSuffix(osSuffix: String?): AssetDto {
for (asset in assets) {
if (asset.name.contains(osSuffix!!)) {
return asset
}
}
throw IllegalArgumentException("Unexpected osSuffix")
}
@Serializable
public class AssetDto(
@SerialName("browser_download_url") val browserDownloadUrl: String,
val name: String
)
}

View File

@ -0,0 +1,8 @@
path.to.pandoc.mac.os.arm=/pandoc-{version}-arm64/bin/pandoc
path.to.pandoc.mac.os.amd=/pandoc-{version}-x86_64/bin/pandoc
path.to.pandoc.windows=/pandoc-{version}/pandoc.exe
path.to.pandoc.linux.amd=/pandoc-{version}/bin/pandoc
path.to.pandoc.linux.arm=/pandoc-{version}/bin/pandoc
github.url=https://api.github.com/repos/jgm/pandoc/releases/latest

View File

@ -0,0 +1,111 @@
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.Test
import space.kscience.snark.pandoc.Pandoc
import java.io.BufferedReader
import java.io.FileReader
import java.io.IOException
import java.nio.file.Files
import java.nio.file.Path
import java.util.stream.Collectors
import kotlin.io.path.Path
import kotlin.io.path.createDirectories
import kotlin.io.path.div
class PandocTest {
@Test
fun when_gotPandocAndCorrectArgs_doConverting() {
try {
val res = Pandoc.execute {
addInputFile(CORRECT_MD)
outputFile(TEX_PATH_TO)
}
assertTrue(res)
assertTrue(TEX_PATH_TO.toFile().exists())
val reader = BufferedReader(FileReader(TEX_PATH_TO.toFile()))
val fileString = reader.lines().collect(Collectors.joining())
assertTrue(fileString.contains("Some simple text"))
assertTrue(fileString.contains("\\subsection{Copy elision}"))
assertTrue(fileString.contains("return"))
Files.delete(TEX_PATH_TO)
} catch (ex: Exception) {
fail<Any>("Unexpected exception during test when_gotPandocAndCorrectArgs_doConverting()", ex)
}
}
@Test
fun when_gotPandocAndNotExistsFromFile_then_error() {
val notExistsFile = Path.of("./src/test/testing_directory/non_exists_test.md")
assertFalse(notExistsFile.toFile().exists())
val res = Pandoc.execute {
addInputFile(notExistsFile)
outputFile(TEX_PATH_TO)
}
assertFalse(res)
}
@Test
fun when_gotPandocAndPassDirectory_then_error() {
assertTrue(TESTING_DIRECTORY.toFile().isDirectory)
val res = Pandoc.execute {
addInputFile(TESTING_DIRECTORY)
outputFile(TEX_PATH_TO)
}
assertFalse(res)
}
@Test
fun when_askVersionToFile_then_Ok() {
val outputFile = TESTING_DIRECTORY/"output.txt"
val res = Pandoc.execute(redirectOutput = outputFile) {
getVersion()
}
val reader = BufferedReader(FileReader(outputFile.toFile()))
val fileString = reader.lines().collect(Collectors.joining())
assertTrue(fileString.contains("pandoc"))
assertTrue(fileString.contains("This is free software"))
assertTrue(res)
}
@Test
fun when_error_then_writeToErrorStream() {
val outputFile = Files.createTempFile(TESTING_DIRECTORY, "output", ".txt")
val errorFile = Files.createTempFile(TESTING_DIRECTORY, "error", ".txt")
val res = Pandoc.execute(outputFile, errorFile) {
addInputFile(Path.of("./simple.txt"))
outputFile(TEX_PATH_TO)
formatFrom("txt")
}
val reader = BufferedReader(FileReader(errorFile.toFile()))
val fileString = reader.lines().collect(Collectors.joining())
assertFalse(res)
assertTrue(fileString.contains("21"))
Files.delete(outputFile)
Files.delete(errorFile)
}
// @Test
// fun when_installPandoc_thenFindIt() {
// PandocInstaller.clearInstallingDirectory()
// assertTrue(Pandoc.installPandoc())
// assertTrue(Pandoc.isPandocInstalled())
// }
companion object {
private val TESTING_DIRECTORY: Path = Path("./testing_directory").apply {
createDirectories()
}
private val CORRECT_MD: Path = TESTING_DIRECTORY.resolve("first_test.md")
private val TEX_PATH_TO: Path = TESTING_DIRECTORY.resolve("output1.tex")
}
}

View File

@ -0,0 +1,15 @@
## Copy elision
### RVO/NRVO
Some simple text
```c++
A f() {
return {5};
}
A g() {
A a(5);
return a;
}
```

View File

@ -0,0 +1 @@
hello