Move tables-kt to DataForge repo

This commit is contained in:
Alexander Nozik 2025-03-16 17:54:42 +03:00
parent 849255f326
commit 5b1955dbf9
46 changed files with 2140 additions and 2 deletions
CHANGELOG.mdbuild.gradle.kts
gradle
settings.gradle.kts
tables-kt
README.mdbuild.gradle.kts
docs
src
tables-kt-csv
README.mdbuild.gradle.kts
src
commonMain/kotlin/space/kscience/tables/csv
jvmMain/kotlin/space/kscience/tables/csv
jvmTest/kotlin
tables-kt-dataframe
README.mdbuild.gradle.kts
src
main/kotlin/space/kscience/dataforge/dataframe
test/kotlin/space/kscience/dataforge/dataframe
tables-kt-exposed
README.mdbuild.gradle.kts
src
main/kotlin/space/kscience/dataforge/exposed
test/kotlin/space/kscience/dataforge/exposed
tables-kt-jupyter

@ -3,6 +3,7 @@
## Unreleased
### Added
- Move Tables-kt to DataForge repository and make it follow DataForge versioning
### Changed

@ -9,7 +9,7 @@ plugins {
allprojects {
group = "space.kscience"
version = "0.10.1"
version = "0.10.2-dev"
}
subprojects {

13
gradle/libs.versions.toml Normal file

@ -0,0 +1,13 @@
[versions]
dataframe = "0.15.0"
exposed = "0.60.0"
[libraries]
attributes = "space.kscience:attributes-kt:0.3.0"
kotlinx-dataframe = { module = "org.jetbrains.kotlinx:dataframe", version.ref = "dataframe" }
csv = "com.jsoizo:kotlin-csv:1.10.0"
exposed-core = { module = "org.jetbrains.exposed:exposed-core", version.ref = "exposed" }
exposed-jdbc = { module = "org.jetbrains.exposed:exposed-jdbc", version.ref = "exposed" }
[plugins]

@ -47,5 +47,9 @@ include(
":dataforge-context",
":dataforge-data",
":dataforge-workspace",
":dataforge-scripting"
":dataforge-scripting",
":tables-kt:tables-kt-exposed",
":tables-kt:tables-kt-dataframe",
":tables-kt:tables-kt-jupyter",
":tables-kt:tables-kt-csv"
)

52
tables-kt/README.md Normal file

@ -0,0 +1,52 @@
[![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub)
# Tables.kt
Tables.kt is a lightweight Kotlin-Multiplatform library to work with tables of any origin. It is **not** intended as an alternative to [DataFrame](https://github.com/Kotlin/dataframe) library. On the contrary, you can use it together with the [provided module](tables-lt-dataframe). The aim of this library to provide an API for a various tables and row lists.
Another aim is to provide integration between different types of tables. For example load database table with Exposed and convert it to a DataFrame.
The performance could vary depending on the type of the table. For example row-based access to column-based table could be slow and vise-versa. Though in principle, the implementations could be tweaked to be very fast.
The library is intended as multiplatform. It supports JVM, JS-IR and Native targets.
## Installation
## Artifact:
The Maven coordinates of this project are `space.kscience:tables-kt:0.4.1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:tables-kt:0.4.1")
}
```
## Features
## Modules
### [tables-kt-csv](tables-kt-csv)
>
> **Maturity**: EXPERIMENTAL
### [tables-kt-dataframe](tables-kt-dataframe)
>
> **Maturity**: PROTOTYPE
### [tables-kt-exposed](tables-kt-exposed)
>
> **Maturity**: EXPERIMENTAL
### [tables-kt-jupyter](tables-kt-jupyter)
>
> **Maturity**: EXPERIMENTAL

@ -0,0 +1,26 @@
plugins {
id("space.kscience.gradle.mpp")
`maven-publish`
}
description = "A lightweight multiplatform library for tables"
allprojects {
group = "space.kscience"
version = "0.4.1"
}
kscience{
jvm()
js()
native()
wasm()
useContextReceivers()
dependencies {
api(projects.dataforgeIo)
}
}
readme {
maturity = space.kscience.gradle.Maturity.EXPERIMENTAL
}

@ -0,0 +1,23 @@
[![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub)
# Tables.kt
Tables.kt is a lightweight Kotlin-Multiplatform library to work with tables of any origin. It is **not** intended as an alternative to [DataFrame](https://github.com/Kotlin/dataframe) library. On the contrary, you can use it together with the [provided module](tables-lt-dataframe). The aim of this library to provide an API for a various tables and row lists.
Another aim is to provide integration between different types of tables. For example load database table with Exposed and convert it to a DataFrame.
The performance could vary depending on the type of the table. For example row-based access to column-based table could be slow and vise-versa. Though in principle, the implementations could be tweaked to be very fast.
The library is intended as multiplatform. It supports JVM, JS-IR and Native targets.
## Installation
${artifact}
## Features
${features}
## Modules
${modules}

@ -0,0 +1,73 @@
package space.kscience.tables
import space.kscience.dataforge.meta.*
import kotlin.properties.ReadOnlyProperty
import kotlin.reflect.KType
import kotlin.reflect.typeOf
public typealias TableHeader<C> = List<ColumnHeader<C>>
public typealias ValueTableHeader = List<ColumnHeader<Value>>
/**
* A header for a column including [name], column [type] and column metadata
*/
public interface ColumnHeader<out T> {
public val name: String
public val type: KType
/**
* Column metadata. Common structure defined by [ColumnScheme]
*/
public val meta: Meta
public companion object {
/**
* A delegated builder for typed column header
*/
public inline fun <reified T> typed(
crossinline builder: ColumnScheme.() -> Unit = {},
): ReadOnlyProperty<Any?, ColumnHeader<T>> = ReadOnlyProperty { _, property ->
ColumnHeader(property.name, builder)
}
/**
* A delegate for a [Value] based column header
*/
public fun value(
valueType: ValueType = ValueType.STRING,
builder: ValueColumnScheme.() -> Unit = {},
): ReadOnlyProperty<Any?, ColumnHeader<Value>> = ReadOnlyProperty { _, property ->
ColumnHeader(property.name, valueType, builder)
}
}
}
public inline fun <reified T> ColumnHeader(
name: String,
builder: ColumnScheme.() -> Unit = {},
): ColumnHeader<T> = SimpleColumnHeader(name, typeOf<T>(), ColumnScheme(builder).meta)
/**
* Create a [Value]-typed column header
*/
public fun ColumnHeader(
name: String,
valueType: ValueType,
builder: ValueColumnScheme.() -> Unit = {},
): ColumnHeader<Value> = SimpleColumnHeader(
name, typeOf<Value>(), ValueColumnScheme {
this.valueType = valueType
builder()
}.meta
)
public data class SimpleColumnHeader<T>(
override val name: String,
override val type: KType,
override val meta: Meta,
) : ColumnHeader<T>
public val ColumnHeader<Value>.valueType: ValueType?
get() = meta[ValueColumnScheme::valueType.name].enum<ValueType>()

@ -0,0 +1,19 @@
package space.kscience.tables
import space.kscience.dataforge.meta.*
public open class ColumnScheme : Scheme() {
public var title: String? by string()
public companion object : SchemeSpec<ColumnScheme>(::ColumnScheme)
}
public val ColumnHeader<*>.properties: ColumnScheme get() = ColumnScheme.read(meta)
public class ValueColumnScheme : ColumnScheme() {
public var valueType: ValueType by enum(ValueType.STRING)
public companion object : SchemeSpec<ValueColumnScheme>(::ValueColumnScheme)
}
public val ColumnHeader<Value>.properties: ValueColumnScheme get() = ValueColumnScheme.read(meta)

@ -0,0 +1,47 @@
package space.kscience.tables
/**
* A table based on column-wise data representation.
*
* @param T boundary type for all columns in the table
*/
public open class ColumnTable<out T>(final override val columns: Collection<Column<T>>) : Table<T> {
private val rowsNum get() = columns.first().size
init {
require(columns.all { it.size == rowsNum }) { "All columns must be of the same size" }
}
override val rows: List<Row<T>>
get() = (0 until rowsNum).map { VirtualRow(this, it) }
override fun getOrNull(row: Int, column: String): T? = columns[column].getOrNull(row)
}
public fun <T> ColumnTable(vararg columns: Column<T>): ColumnTable<T> = ColumnTable(columns.toList())
public inline fun <T> ColumnTable(size: Int, builder: ColumnTableBuilder<T>.() -> Unit): ColumnTable<T> =
ColumnTableBuilder<T>(size).apply(builder)
public val <T> Table<T>.rowsSize: Int get() = columns.firstOrNull()?.size ?: 0
public operator fun <T, R : T> Collection<Column<T>>.get(header: ColumnHeader<R>): Column<R> {
val res = find { it.name == header.name } ?: error("Column with name ${header.name} not present")
require(header.type == res.type) { "Column type mismatch. Expected ${header.type}, but found ${res.type}" }
@Suppress("UNCHECKED_CAST") return res as Column<R>
}
internal class VirtualRow<T>(val table: Table<T>, val index: Int) : Row<T> {
override fun getOrNull(column: String): T? = table.getOrNull(index, column)
}
/**
* Convert table to a column-based representation or return itself if it is already column-based.
* This method is used only for performance.
*
* The resulting table does not in general follow changes of the initial table.
*/
public fun <T> Table<T>.toColumnTable(): ColumnTable<T> = (this as? ColumnTable<T>) ?: ColumnTable(
columns.map { c -> c as? ListColumn ?: ListColumn(c, c.listValues()) }
)

@ -0,0 +1,111 @@
package space.kscience.tables
/**
* A table with columns that could be reordered. Column content could not be changed after creation.
*/
public class ColumnTableBuilder<T>(
public val rowsSize: Int,
private val _columns: MutableList<Column<T>> = ArrayList(),
) : ColumnTable<T>(_columns) {
override val rows: List<Row<T>>
get() = (0 until rowsSize).map {
VirtualRow(this, it)
}
override fun getOrNull(row: Int, column: String): T? = columns.getOrNull(column)?.getOrNull(row)
/**
* Add or insert at [index] a fixed column
*/
public fun addColumn(column: Column<T>, index: Int? = null) {
require(column.size == this.rowsSize) { "Required column size $rowsSize, but found ${column.size}" }
require(_columns.find { it.name == column.name } == null) { "Column with name ${column.name} already exists" }
if (index == null) {
_columns.add(column)
} else {
_columns.add(index, column)
}
}
/**
* Remove a column
*/
public fun removeColumn(name: String) {
_columns.removeAll { it.name == name }
}
/**
* Get or set values for given column. The size of column must be the same as table [rowsNum]
*/
public var <R : T> ColumnHeader<R>.values: Collection<R?>
get() = columns[this].listValues()
set(value) {
val newColumn = ListColumn(this, value.toList())
removeColumn(name)
addColumn(newColumn)
}
}
/**
* Set or replace column using given [expression]
*/
public fun <T, R : T> ColumnTableBuilder<T>.transform(
header: ColumnHeader<R>,
index: Int? = null,
expression: (Row<T>) -> R,
) {
val column = rowsToColumn(header, false, expression)
removeColumn(header.name)
addColumn(column, index)
}
/**
* Set or replace column using column name
*/
public inline fun <T, reified R : T> ColumnTableBuilder<T>.transform(
name: String,
index: Int? = null,
noinline expression: (Row<T>) -> R,
): Unit = transform(ColumnHeader<R>(name), index, expression)
/**
* Adds or replaces a column in the ColumnTableBuilder with the given header and data.
*
* @param header the header of the column to be added or replaced
* @param data the data for the column
*/
public fun <T, R : T> ColumnTableBuilder<T>.column(header: ColumnHeader<R>, data: Iterable<R>) {
removeColumn(header.name)
val column = ListColumn(header.name, data.toList(), header.type, header.meta)
addColumn(column)
}
/**
* Adds a column with the given header to the table. Optionally, the column can be inserted at a specific index.
* The column is filled with data using the provided data builder function.
*
* @param header The header of the column to be added.
* @param index The index at which the column should be inserted. If null, the column is added to the end of the table.
* @param dataBuilder A function that takes an index and returns the data to fill the column at that index.
* @return The newly added column.
*/
public fun <T, R : T> ColumnTableBuilder<T>.fill(header: ColumnHeader<R>, index: Int? = null, dataBuilder: (Int) -> R?): Column<R> {
//TODO use specialized columns if possible
val column = ListColumn(header, rowsSize, dataBuilder)
addColumn(column, index)
return column
}
/**
* Shallow copy table to a new [ColumnTableBuilder]
*/
public fun <T> ColumnTable<T>.builder(): ColumnTableBuilder<T> =
ColumnTableBuilder<T>(rowsSize, columns.toMutableList())
/**
* Shallow copy and edit [Table] and edit it as [ColumnTable]
*/
public fun <T> Table<T>.withColumns(block: ColumnTableBuilder<T>.() -> Unit): ColumnTable<T> =
ColumnTableBuilder<T>(rowsSize, columns.toMutableList()).apply(block)

@ -0,0 +1,48 @@
@file:Suppress("FunctionName")
package space.kscience.tables
import space.kscience.dataforge.meta.Meta
import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* A column with data represented as [List]. Could have missing data
*/
public class ListColumn<T>(
override val name: String,
public val data: List<T?>,
override val type: KType,
override val meta: Meta,
) : Column<T> {
override val size: Int get() = data.size
override fun getOrNull(index: Int): T? = if (index in data.indices) data[index] else null
}
public fun <T> ListColumn(header: ColumnHeader<T>, data: List<T?>): ListColumn<T> =
ListColumn(header.name, data, header.type, header.meta)
public inline fun <reified T> ListColumn(
name: String,
def: ColumnScheme,
data: List<T?>,
): ListColumn<T> = ListColumn(name, data, typeOf<T>(), def.toMeta())
public fun <T> ListColumn(
header: ColumnHeader<T>,
size: Int,
dataBuilder: (Int) -> T?,
): ListColumn<T> = ListColumn(header.name, List(size, dataBuilder), header.type, header.meta)
public inline fun <reified T> ListColumn(
name: String,
def: ColumnScheme,
size: Int,
dataBuilder: (Int) -> T?,
): ListColumn<T> = ListColumn(name, List(size, dataBuilder), typeOf<T>(), def.toMeta())
public inline fun <T, reified R : Any> Column<T>.map(meta: Meta = this.meta, noinline block: (T?) -> R): Column<R> {
val data = List(size) { block(getOrNull(it)) }
return ListColumn(name, data, typeOf<R>(), meta)
}

@ -0,0 +1,22 @@
package space.kscience.tables
import space.kscience.dataforge.meta.Value
/**
* A table with random-access mutable cells
*/
public interface MutableTable<T> : Table<T> {
public operator fun set(row: Int, column: String, value: T?)
}
public operator fun <T, R : T> MutableTable<T>.set(row: Int, column: ColumnHeader<R>, value: R?) {
set(row, column.name, value)
}
public operator fun <T> MutableTable<T>.set(column: String, values: Iterable<T>) {
values.forEachIndexed { index, value -> set(index, column, value) }
}
public operator fun MutableTable<Value>.set(row: Int, column: String, value: Any?) {
set(row, column, Value.of(value))
}

@ -0,0 +1,84 @@
package space.kscience.tables
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.MetaRepr
import space.kscience.dataforge.meta.Value
import space.kscience.dataforge.meta.getValue
import kotlin.jvm.JvmInline
import kotlin.reflect.KType
/**
* A [Row] of data represented by map.
*/
@JvmInline
public value class MapRow<C>(public val values: Map<String, C?>) : Row<C> {
override fun getOrNull(column: String): C? = values[column]
}
/**
* Create a [MapRow] using pairs of [ColumnHeader] and values
*/
@Suppress("FunctionName")
public fun <T> Row(vararg pairs: Pair<ColumnHeader<T>, T>): MapRow<T> =
MapRow(pairs.associate { it.first.name to it.second })
/**
* A [Row] represented by [Meta]
*/
@JvmInline
public value class MetaRow(public val meta: Meta) : Row<Value> {
override fun getOrNull(column: String): Value? = meta.getValue(column)
}
/**
* Represent [MetaRepr] as a [Row]
*/
public fun MetaRepr.asRow(): MetaRow = MetaRow(toMeta())
/**
* A column in a [RowTable]
*/
internal class RowTableColumn<T, R : T>(val table: Table<T>, val header: ColumnHeader<R>) : Column<R> {
init {
require(header in table.headers) { "Header $header does not belong to $table" }
}
override val name: String get() = header.name
override val type: KType get() = header.type
override val meta: Meta get() = header.meta
override val size: Int get() = table.rows.size
@Suppress("UNCHECKED_CAST")
override fun getOrNull(index: Int): R? = table.getOrNull(index, name)?.let { it as R }
}
/**
* A row-based table
*/
public open class RowTable<C>(
override val headers: List<ColumnHeader<C>>,
override val rows: List<Row<C>>,
) : Table<C> {
override fun getOrNull(row: Int, column: String): C? = rows[row].getOrNull(column)
override val columns: List<Column<C>> get() = headers.map { RowTableColumn(this, it) }
}
/**
* Create Row table with given headers
*/
public inline fun <T> RowTable(vararg headers: ColumnHeader<T>, block: RowTableBuilder<T>.() -> Unit): RowTable<T> =
RowTableBuilder<T>(arrayListOf(), headers.toMutableList()).apply(block)
/**
* Collect [Rows] to a [Table]
*/
public fun <C> Rows<C>.collect(): Table<C> = this as? Table<C> ?: RowTable(headers, rowSequence().toList())
/**
* If this is a [RowTable], return this, otherwise create a new [Row]-based table from its rows.
* This method is used only for performance.
*
* The resulting table does not in general follow changes of the initial table.
*/
public fun <T> Table<T>.toRowTable(): RowTable<T> = this as? RowTable<T> ?: RowTable(headers, rows)

@ -0,0 +1,107 @@
package space.kscience.tables
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.Value
import space.kscience.dataforge.meta.ValueType
import kotlin.properties.PropertyDelegateProvider
import kotlin.properties.ReadOnlyProperty
import kotlin.reflect.KType
import kotlin.reflect.typeOf
public class RowTableBuilder<C>(
override val rows: MutableList<Row<C>>,
override val headers: MutableList<ColumnHeader<C>>,
) : RowTable<C>(headers, rows) {
public fun <T : C> addColumn(header: ColumnHeader<C>) {
headers.add(header)
}
/**
* Create a new column header for the table
*/
@PublishedApi
internal fun <T : C> newColumn(name: String, type: KType, meta: Meta, index: Int?): ColumnHeader<T> {
val header = SimpleColumnHeader<T>(name, type, meta)
if (index == null) {
headers.add(header)
} else {
headers.add(index, header)
}
return header
}
public fun addRow(row: Row<C>): Row<C> {
rows.add(row)
return row
}
public inline fun <reified T : C> newColumn(
name: String,
index: Int? = null,
noinline columnMetaBuilder: ColumnScheme.() -> Unit = {},
): ColumnHeader<T> = newColumn(name, typeOf<T>(), ColumnScheme(columnMetaBuilder).toMeta(), index)
public inline fun <reified T : C> column(
index: Int? = null,
noinline columnMetaBuilder: ColumnScheme.() -> Unit = {},
): PropertyDelegateProvider<Any?, ReadOnlyProperty<Any?, ColumnHeader<T>>> =
PropertyDelegateProvider { _, property ->
val res: ColumnHeader<T> = newColumn(property.name, index, columnMetaBuilder)
ReadOnlyProperty { _, _ -> res }
}
public fun row(map: Map<String, C?>): Row<C> {
val row = MapRow(map)
rows.add(row)
return row
}
public fun <T : C> row(vararg pairs: Pair<ColumnHeader<T>, T>): Row<C> = addRow(Row(*pairs))
}
public fun RowTableBuilder<in Value>.newColumn(
name: String,
valueType: ValueType,
index: Int? = null,
columnMetaBuilder: ValueColumnScheme.() -> Unit = {},
): ColumnHeader<Value> = newColumn(
name,
typeOf<Value>(),
ValueColumnScheme(columnMetaBuilder).also { it.valueType = valueType }.toMeta(),
index
)
public fun RowTableBuilder<in Value>.column(
valueType: ValueType,
index: Int? = null,
columnMetaBuilder: ValueColumnScheme.() -> Unit = {},
): PropertyDelegateProvider<Any?, ReadOnlyProperty<Any?, ColumnHeader<Value>>> =
PropertyDelegateProvider { _, property ->
val res = newColumn(property.name, valueType, index, columnMetaBuilder)
ReadOnlyProperty { _, _ -> res }
}
public fun RowTableBuilder<Value>.valueRow(vararg pairs: Pair<ColumnHeader<Value>, Any?>): Row<Value> =
row(pairs.associate { it.first.name to Value.of(it.second) })
/**
* Add a row represented by Meta
*/
public fun RowTableBuilder<Value>.row(meta: Meta): Row<Value> {
val row = MetaRow(meta)
rows.add(row)
return row
}
/**
* Shallow copy table to a new [RowTableBuilder]
*/
public fun <T> RowTable<T>.toMutableRowTable(): RowTableBuilder<T> =
RowTableBuilder(rows.toMutableList(), headers.toMutableList())
/**
* Shallow copy and edit [Table] and edit it as [RowTable]
*/
public fun <T> Table<T>.withRows(block: RowTableBuilder<T>.() -> Unit): RowTable<T> =
RowTableBuilder(rows.toMutableList(), headers.toMutableList()).apply(block)

@ -0,0 +1,76 @@
package space.kscience.tables
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.meta.Value
import kotlin.jvm.JvmName
import kotlin.reflect.KType
import kotlin.reflect.typeOf
private fun cellId(row: Int, column: String) = "$column[$row]"
public data class SpreadSheetCell<T>(val row: Int, val column: String, val value: T) {
val id: String get() = cellId(row, column)
}
public class SpreadSheetTable<T>(
private val cellValueType: KType,
public val cellMap: MutableMap<String, SpreadSheetCell<T>> = HashMap(),
public val columnDefs: MutableMap<String, ColumnHeader<T>> = HashMap(),
) : MutableTable<T> {
public val cells: Collection<SpreadSheetCell<T>> get() = cellMap.values
override fun getOrNull(row: Int, column: String): T? = cellMap[cellId(row, column)]?.value
public override fun set(row: Int, column: String, value: T?) {
val cellId = cellId(row, column)
if (value == null) {
cellMap.remove(cellId)
} else {
cellMap[cellId] = SpreadSheetCell(row, column, value)
}
}
override val columns: Collection<Column<T>>
get() = cells.groupBy(
keySelector = { it.column },
valueTransform = { it.value }
).entries.map { (key, values) ->
val header = columnDefs[key] ?: SimpleColumnHeader(key, cellValueType, Meta.EMPTY)
ListColumn(header, values)
}
override val rows: List<Row<T>>
get() = cells.groupBy { it.row }.map {
MapRow(it.value.associate { cell -> cell.column to cell.value })
}
public inline fun <reified R : T> defineColumn(name: String, schemeBuilder: ColumnScheme.() -> Unit) {
columnDefs[name] = SimpleColumnHeader(name, typeOf<R>(), ColumnScheme(schemeBuilder).toMeta())
}
}
public inline fun <reified T> SpreadSheetTable(builder: SpreadSheetTable<T>.() -> Unit): SpreadSheetTable<T> =
SpreadSheetTable<T>(typeOf<T>()).apply(builder)
@JvmName("setValue")
public operator fun SpreadSheetTable<Value>.set(row: Int, column: ColumnHeader<Value>, value: Any?) {
columnDefs[column.name] = column
set(row, column.name, Value.of(value))
}
/**
* Update values and header for given column
*/
public operator fun <T> SpreadSheetTable<T>.set(column: ColumnHeader<T>, values: List<T>) {
columnDefs[column.name] = column
values.forEachIndexed { index, value -> set(index, column, value) }
}
@JvmName("setValues")
public operator fun SpreadSheetTable<Value>.set(column: ColumnHeader<Value>, values: List<Any?>) {
columnDefs[column.name] = column
values.forEachIndexed { index, value -> set(index, column, value) }
}

@ -0,0 +1,102 @@
package space.kscience.tables
import kotlinx.coroutines.flow.Flow
/**
* Finite or infinite row set. Rows are produced in a lazy suspendable [Flow].
* Each row must contain at least all the fields mentioned in [headers].
*/
public interface Rows<out T> {
/**
* An ordered list of headers that *must* be present.
*/
public val headers: TableHeader<T>
/**
* A lazy sequence of rows.
*/
public fun rowSequence(): Sequence<Row<T>>
}
/**
* A basic abstraction for a table. The abstraction does not specify how the data is stored.
*/
public interface Table<out T> : Rows<T> {
public fun getOrNull(row: Int, column: String): T?
public val columns: Collection<Column<T>>
override val headers: TableHeader<T> get() = columns.toList()
public val rows: List<Row<T>>
override fun rowSequence(): Sequence<Row<T>> = rows.asSequence()
public companion object
}
public operator fun <T> Table<T>.get(row: Int, column: String): T =
getOrNull(row, column) ?: error("Element with column $column and row $row not found in $this")
public fun <T> Collection<Column<T>>.getOrNull(name: String): Column<T>? = find { it.name == name }
public operator fun <T> Collection<Column<T>>.get(name: String): Column<T> = first { it.name == name }
public inline operator fun <T, reified R : T> Table<T>.get(row: Int, column: ColumnHeader<R>): R? {
//require(headers.contains(column)) { "Column $column is not in table headers" }
return getOrNull(row, column.name) as? R
}
public interface Column<out T> : ColumnHeader<T> {
public val size: Int
public fun getOrNull(index: Int): T?
}
public operator fun <T> Column<T>.get(index: Int): T =
getOrNull(index) ?: error("Element with index $index not found in $this")
public inline fun <T> Column<T>.contentEquals(
other: Column<T>,
criterion: (l: T?, r: T?) -> Boolean = { l, r -> l == r },
): Boolean = this.indices == other.indices && indices.all { criterion(getOrNull(it), other.getOrNull(it)) }
public val Column<*>.indices: IntRange get() = (0 until size)
public operator fun <T> Column<T>.iterator(): Iterator<T?> = iterator {
for (i in indices) {
yield(getOrNull(i))
}
}
public fun <T> Column<T>.sequence(): Sequence<T?> = sequence {
for (i in indices) {
yield(getOrNull(i))
}
}
public fun <T> Column<T>.listValues(): List<T?> = if (this is ListColumn) {
this.data
} else {
sequence().toList()
}
/**
* A common abstraction for a row of data. Fields could be accessed by its name
*/
public interface Row<out T> {
/**
* Get a value in [Row] by name or return null if value is missing.
*
* Note that null value for nullable [T] is indistinguishable from missing value.
*/
public fun getOrNull(column: String): T?
}
/**
* Get a [Row] value or throw error if it is null
*/
public operator fun <T: Any> Row<T>.get(column: String): T =
getOrNull(column) ?: error("Element with column name $column not found in $this")
/**
* Get a value by a column header and cast it if possible to a column type
*/
public inline operator fun <T, reified R : T> Row<T>.get(column: ColumnHeader<R>): R = getOrNull(column.name) as? R
?: error("Type conversion to ${R::class} failed for ${getOrNull(column.name)}")

@ -0,0 +1,79 @@
package space.kscience.tables
import space.kscience.dataforge.meta.Meta
import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* A virtual column obtained by transforming Given row to a single value
*/
public class TransformationColumn<T, R>(
public val table: Table<T>,
override val type: KType,
override val name: String,
override val meta: Meta,
public val mapper: (Row<T>) -> R?,
) : Column<R> {
override val size: Int get() = table.rows.size
override fun getOrNull(index: Int): R? = mapper(table.rows[index])
}
/**
* A virtual column obtained via transformation of a [Row] with caching results on call (evaluation is lazy).
*
* Calls are not thread safe
*/
public class CachedTransformationColumn<T, R>(
public val table: Table<T>,
override val type: KType,
override val name: String,
override val meta: Meta,
public val mapper: (Row<T>) -> R?,
) : Column<R> {
override val size: Int get() = table.rows.size
private val values: HashMap<Int, R?> = HashMap()
override fun getOrNull(index: Int): R? = values.getOrPut(index) { mapper(table.rows[index]) }
}
/**
* Create a virtual column from a given column
*/
public inline fun <T, reified R> Table<T>.rowsToColumn(
name: String,
meta: Meta = Meta.EMPTY,
cache: Boolean = false,
noinline mapper: (Row<T>) -> R?,
): Column<R> = if (cache) {
CachedTransformationColumn(this, typeOf<R>(), name, meta, mapper)
} else {
TransformationColumn(this, typeOf<R>(), name, meta, mapper)
}
public fun <T, R> Table<T>.rowsToColumn(
header: ColumnHeader<R>,
cache: Boolean = false,
mapper: (Row<T>) -> R?,
): Column<R> = if (cache) {
CachedTransformationColumn(this, header.type, header.name, header.meta, mapper)
} else {
TransformationColumn(this, header.type, header.name, header.meta, mapper)
}
public fun <T> Table<T>.rowsToDoubleColumn(
name: String,
meta: Meta = Meta.EMPTY,
block: (Row<T>) -> Double,
): DoubleColumn {
val data = DoubleArray(rows.size) { block(rows[it]) }
return DoubleColumn(name, data, meta)
}
public fun <T> Table<T>.rowsToIntColumn(
name: String,
meta: Meta = Meta.EMPTY,
block: (Row<T>) -> Int,
): IntColumn {
val data = IntArray(rows.size) { block(rows[it]) }
return IntColumn(name, data, meta)
}

@ -0,0 +1,52 @@
package space.kscience.tables.io
import kotlinx.io.readByteArray
import space.kscience.dataforge.io.Binary
import space.kscience.dataforge.meta.Value
import space.kscience.dataforge.meta.lazyParseValue
import space.kscience.tables.MapRow
import space.kscience.tables.Row
import space.kscience.tables.Rows
import space.kscience.tables.ValueTableHeader
/**
* Read a line as a fixed width [Row]
*/
internal fun String.readRow(header: ValueTableHeader, delimiter: Regex): Row<Value> {
val values = trim().split(delimiter).map { it.lazyParseValue() }
if (values.size == header.size) {
val map = header.map { it.name }.zip(values).toMap()
return MapRow(map)
} else {
error("Can't read line \"${this}\". Expected ${header.size} values in a line, but found ${values.size}")
}
}
/**
* Finite or infinite [Rows] created from a fixed width text binary
*/
internal class TextRows(
override val headers: ValueTableHeader,
private val binary: Binary,
private val delimiter: Regex,
) : Rows<Value> {
override fun rowSequence(): Sequence<Row<Value>> = binary.read {
val text = readByteArray().decodeToString()
text.lineSequence()
.map { it.trim() }
.filter { it.isNotEmpty() }
.map { it.readRow(headers, delimiter) }
// flow {
// do {
// val line = readUTF8Line()
// if (!line.isNullOrBlank()) {
// val row = readRow(headers, line)
// emit(row)
// }
// } while (!endOfInput)
// }
}
}

@ -0,0 +1,77 @@
package space.kscience.tables.io
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.asFlow
import kotlinx.coroutines.flow.toList
import kotlinx.io.readByteArray
import kotlinx.io.readLine
import space.kscience.dataforge.io.Binary
import space.kscience.dataforge.meta.Value
import space.kscience.tables.*
/**
* Finite table created from [Binary] with fixed width text table
*/
internal class TextTable(
override val headers: ValueTableHeader,
private val binary: Binary,
val index: List<Int>,
val delimiter: Regex = "\\s+".toRegex(),
) : Table<Value> {
override val columns: Collection<Column<Value>> get() = headers.map { RowTableColumn(this, it) }
override val rows: List<Row<Value>> get() = index.map { readAt(it) }
override fun rowSequence(): Sequence<Row<Value>> = TextRows(headers, binary, delimiter).rowSequence()
private fun readAt(offset: Int): Row<Value> = binary.read(offset) {
val line = readLine() ?: error("Line not found")
return@read line.readRow(headers, delimiter)
}
override fun getOrNull(row: Int, column: String): Value? {
val offset = index[row]
return readAt(offset).getOrNull(column)
}
}
/**
* A flow of indexes of string start offsets ignoring empty strings
*/
private fun Binary.lineIndexFlow(): Flow<Int> = read {
//TODO replace by line reader
val text = readByteArray().decodeToString()
text.lineSequence()
.map { it.trim() }
.filter { it.isNotEmpty() }
.scan(0) { acc, str -> acc + str.length }.asFlow()
// var counter: Int = 0
// flow {
// do {
// val line = readUTF8Line()
// counter += line?.length ?: 0
// if (!line.isNullOrBlank()) {
// emit(counter)
// }
// } while (!endOfInput)
// }
}
/**
* Create a row offset index for [TextRows]
*/
private suspend fun Binary.buildRowIndex(): List<Int> = lineIndexFlow().toList()
/**
* Read given binary as TSV [Value] table.
* This method does not read the whole table into memory. Instead, it reads it ones and saves line offset index. Then
* it reads specific lines on-demand.
*/
public suspend fun Binary.readTextTable(header: ValueTableHeader): Table<Value> {
val index = buildRowIndex()
return TextTable(header, this, index)
}

@ -0,0 +1,47 @@
package space.kscience.tables.io
import space.kscience.dataforge.io.Binary
import space.kscience.dataforge.io.Envelope
import space.kscience.dataforge.meta.*
import space.kscience.dataforge.names.NameToken
import space.kscience.dataforge.names.asName
import space.kscience.tables.Rows
import space.kscience.tables.SimpleColumnHeader
import space.kscience.tables.Table
import kotlin.reflect.typeOf
/**
* Convert given [Table] to a TSV-based envelope, encoding header in Meta
*/
public fun Table<Value>.toTextEnvelope(): Envelope = Envelope {
meta {
headers.forEachIndexed { index, columnHeader ->
set(NameToken("column", index.toString()), Meta {
"name" put columnHeader.name
if (!columnHeader.meta.isEmpty()) {
"meta" put columnHeader.meta
}
})
}
}
type = "table.value"
dataID = "valueTable[${this@toTextEnvelope.hashCode()}]"
data = Binary {
writeTextRows(this@toTextEnvelope)
}
}
/**
* Read TSV rows from given envelope
*/
public fun Envelope.readTextRows(delimiter: Regex = "\\s+".toRegex()): Rows<Value> {
val header = meta.getIndexed("column".asName())
.entries.sortedBy { it.key?.toInt() }
.map { (_, item) ->
SimpleColumnHeader<Value>(item["name"].string!!, typeOf<Value>(), item["meta"] ?: Meta.EMPTY)
}
return TextRows(header, data ?: Binary.EMPTY, delimiter)
}

@ -0,0 +1,59 @@
package space.kscience.tables.io
import kotlinx.io.Sink
import kotlinx.io.writeString
import space.kscience.dataforge.meta.*
import space.kscience.tables.ColumnHeader
import space.kscience.tables.Rows
import space.kscience.tables.get
import space.kscience.tables.valueType
/**
* Write a fixed width value to the output
*/
private fun Sink.writeValue(value: Value, width: Int, left: Boolean = true) {
require(width > 5) { "Width could not be less than 5" }
val str: String = when (value.type) {
ValueType.NUMBER -> value.numberOrNull.toString() //TODO apply decimal format
ValueType.STRING, ValueType.LIST -> value.string.take(width)
ValueType.BOOLEAN -> if (value.boolean) {
"true"
} else {
"false"
}
ValueType.NULL -> "@null"
}
val padded = if (left) {
str.padEnd(width)
} else {
str.padStart(width)
}
writeString(padded)
}
public val ColumnHeader<Value>.textWidth: Int
get() = meta["columnWidth"].int ?: when (valueType) {
ValueType.NUMBER -> 8
ValueType.STRING -> 16
ValueType.BOOLEAN -> 5
ValueType.NULL -> 5
ValueType.LIST -> 32
null -> 16
}
/**
* Write TSV (or in more general case use [delimiter]) rows without header to the output.
*/
public fun Sink.writeTextRows(rows: Rows<Value>, delimiter: String = "\t") {
val widths: List<Int> = rows.headers.map {
it.textWidth
}
rows.rowSequence().forEach { row ->
rows.headers.forEachIndexed { index, columnHeader ->
writeValue(row[columnHeader], widths[index])
writeString(delimiter)
}
// appendLine()
writeString("\r\n")
}
}

@ -0,0 +1,94 @@
package space.kscience.tables
import space.kscience.dataforge.meta.Meta
import kotlin.reflect.KType
import kotlin.reflect.typeOf
/**
* Optimized primitive-holding column
*/
public class DoubleColumn(
override val name: String,
public val data: DoubleArray,
override val meta: Meta = Meta.EMPTY
) : Column<Double> {
override val type: KType get() = typeOf<Double>()
override val size: Int get() = data.size
override fun getOrNull(index: Int): Double = data[index]
/**
* Performance-optimized get method
*/
public fun getDouble(index: Int): Double = data[index]
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is DoubleColumn) return false
if (name != other.name) return false
if (!data.contentEquals(other.data)) return false
if (meta != other.meta) return false
return true
}
override fun hashCode(): Int {
var result = name.hashCode()
result = 31 * result + data.contentHashCode()
result = 31 * result + meta.hashCode()
return result
}
public companion object {
public inline operator fun <reified T : Any> invoke(
name: String,
data: DoubleArray,
noinline metaBuilder: ColumnScheme.() -> Unit
): DoubleColumn = DoubleColumn(name, data, ColumnScheme(metaBuilder).toMeta())
}
}
public class IntColumn(
override val name: String,
public val data: IntArray,
override val meta: Meta = Meta.EMPTY
) : Column<Int> {
override val type: KType get() = typeOf<Int>()
override val size: Int get() = data.size
override fun getOrNull(index: Int): Int = data[index]
/**
* Performance optimized get method
*/
public fun getInt(index: Int): Int = data[index]
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is IntColumn) return false
if (name != other.name) return false
if (!data.contentEquals(other.data)) return false
if (meta != other.meta) return false
return true
}
override fun hashCode(): Int {
var result = name.hashCode()
result = 31 * result + data.contentHashCode()
result = 31 * result + meta.hashCode()
return result
}
public companion object {
public inline operator fun <reified T : Any> invoke(
name: String,
data: IntArray,
noinline metaBuilder: ColumnScheme.() -> Unit
): IntColumn = IntColumn(name, data, ColumnScheme(metaBuilder).toMeta())
}
}

@ -0,0 +1,24 @@
package space.kscience.tables
import kotlin.test.Test
import kotlin.test.assertTrue
class ColumnTableTest {
@Test
fun columnBuilder() {
val columnTable = ColumnTable<Double>(100) {
val a by ColumnHeader.typed<Double>()
val b by ColumnHeader.typed<Double>()
// fill column with a new value
fill(a) { it.toDouble() }
// set column with pre-filled values
column(b, List(100) { it.toDouble() })
// add a virtual column with values transformed from rows
transform("c") { it[a] - it[b] }
}
assertTrue {
columnTable.columns["c"].listValues().all { it == 0.0 }
}
}
}

@ -0,0 +1,26 @@
package space.kscience.tables
import space.kscience.dataforge.meta.Value
import space.kscience.dataforge.meta.ValueType
import space.kscience.dataforge.meta.int
import kotlin.test.Test
import kotlin.test.assertEquals
internal class SpreadSheetTest {
@Test
fun spreadsheetWriteRead() {
val a by ColumnHeader.value(ValueType.STRING)
val b by ColumnHeader.value(ValueType.NUMBER)
val c by ColumnHeader.value(ValueType.NUMBER)
val ss = SpreadSheetTable<Value> {
set(a, listOf("1", "2", "3"))
set(b, listOf(1, 2, 3))
set(2, c, 22)
}
assertEquals(22, ss[2, c]?.int)
assertEquals(6, ss.columns["b"].sequence().sumOf { it?.int ?: 0 })
}
}

@ -0,0 +1,37 @@
package space.kscience.tables
import space.kscience.dataforge.meta.Meta
import kotlin.properties.ReadOnlyProperty
import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
@Suppress("UNCHECKED_CAST")
public fun <T : Any> Column<*>.cast(type: KType): Column<T> {
return if (type.isSubtypeOf(this.type)) {
this as Column<T>
} else {
CastColumn(this, type)
}
}
private class CastColumn<T : Any>(private val origin: Column<*>, override val type: KType) : Column<T> {
override val name: String get() = origin.name
override val meta: Meta get() = origin.meta
override val size: Int get() = origin.size
@Suppress("UNCHECKED_CAST")
override fun getOrNull(index: Int): T? = origin.getOrNull(index)?.let {
it as T
}
}
public class ColumnProperty<C: Any, T : C>(public val table: Table<C>, public val type: KType) : ReadOnlyProperty<Any?, Column<T>> {
override fun getValue(thisRef: Any?, property: KProperty<*>): Column<T> {
val name = property.name
return (table.columns.getOrNull(name) ?: error("Column with name $name not found in the table")).cast(type)
}
}
public operator fun <C: Any, T : C> Collection<Column<C>>.get(header: ColumnHeader<T>): Column<T>? =
find { it.name == header.name }?.cast(header.type)

@ -0,0 +1,36 @@
package space.kscience.tables.io
import kotlinx.coroutines.runBlocking
import space.kscience.dataforge.io.toByteArray
import space.kscience.dataforge.meta.Value
import space.kscience.dataforge.meta.ValueType
import space.kscience.dataforge.meta.int
import space.kscience.dataforge.meta.string
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.tables.RowTable
import space.kscience.tables.column
import space.kscience.tables.valueRow
import kotlin.test.Test
import kotlin.test.assertEquals
@DFExperimental
class TextRowsTest {
val table = RowTable<Value> {
val a by column(ValueType.NUMBER)
val b by column(ValueType.STRING)
valueRow(a to 1, b to "b1")
valueRow(a to 2, b to "b2")
}
@Test
fun testTableWriteRead() = runBlocking {
val envelope = table.toTextEnvelope()
val string = envelope.data!!.toByteArray().decodeToString()
println(string)
val table = envelope.readTextRows()
val rows = table.rowSequence().toList()
assertEquals(1, rows[0].getOrNull("a")?.int)
assertEquals("b2", rows[1].getOrNull("b")?.string)
}
}

@ -0,0 +1,21 @@
# Module tables-kt-csv
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:tables-kt-csv:0.4.1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:tables-kt-csv:0.4.1")
}
```

@ -0,0 +1,17 @@
plugins {
id("space.kscience.gradle.mpp")
`maven-publish`
}
kscience {
jvm()
js()
commonMain {
api(projects.tablesKt)
api(libs.csv)
}
}
readme {
maturity = space.kscience.gradle.Maturity.EXPERIMENTAL
}

@ -0,0 +1,37 @@
package space.kscience.tables.csv
import com.github.doyaaaaaken.kotlincsv.dsl.context.CsvReaderContext
import com.github.doyaaaaaken.kotlincsv.dsl.context.CsvWriterContext
import com.github.doyaaaaaken.kotlincsv.dsl.csvReader
import space.kscience.dataforge.meta.Meta
import space.kscience.tables.*
import kotlin.reflect.typeOf
internal fun Map<String, String>.extractHeader(): TableHeader<String> = keys.map {
SimpleColumnHeader(it, typeOf<String>(), Meta.EMPTY)
}
public object CsvFormats {
public val tsvReader: CsvReaderContext.() -> Unit = {
quoteChar = '"'
delimiter = '\t'
escapeChar = '\\'
}
public val tsvWriter: CsvWriterContext.() -> Unit = {
delimiter = '\t'
}
}
public fun Table.Companion.readCsvString(
string: String,
format: CsvReaderContext.() -> Unit = {},
): Table<String> {
val data = csvReader(format).readAllWithHeader(string)
if (data.isEmpty()) error("Can't read empty table")
return RowTable(
headers = data.first().extractHeader(),
data.map { MapRow(it) }
)
}

@ -0,0 +1,62 @@
package space.kscience.tables.csv
import com.github.doyaaaaaken.kotlincsv.dsl.context.CsvReaderContext
import com.github.doyaaaaaken.kotlincsv.dsl.context.CsvWriterContext
import com.github.doyaaaaaken.kotlincsv.dsl.csvReader
import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter
import space.kscience.tables.*
import java.nio.file.Path
import kotlin.io.path.inputStream
import kotlin.io.path.outputStream
public fun Table.Companion.readCsv(
path: Path,
format: CsvReaderContext.() -> Unit = {},
): Table<String> {
path.inputStream().use { inputStream ->
val data = csvReader(format).readAllWithHeader(inputStream)
if (data.isEmpty()) error("Can't read empty table")
return RowTable(
headers = data.first().extractHeader(),
data.map { MapRow(it) }
)
}
}
public fun Table.Companion.readCsvRows(
path: Path,
format: CsvReaderContext.() -> Unit = {},
): Rows<String> {
path.inputStream().use { inputStream ->
val sequence = csvReader(format).open(inputStream) {
readAllWithHeaderAsSequence()
}
val firstRow = sequence.take(1).first()
val header: List<ColumnHeader<String>> = firstRow.extractHeader()
return object : Rows<String> {
override val headers: TableHeader<String> get() = header
override fun rowSequence(): Sequence<Row<String>> = sequence {
yield(MapRow(firstRow))
yieldAll(sequence.map { MapRow(it) })
}
}
}
}
public fun Table.Companion.writeCsvFile(
path: Path,
table: Table<Any?>,
format: CsvWriterContext.() -> Unit = {},
) {
val writer = csvWriter(format)
path.outputStream().use { outputStream ->
val headerString = table.headers.joinToString(
separator = writer.delimiter.toString(),
postfix = writer.lineTerminator
) { it.name }
outputStream.write(headerString.encodeToByteArray())
writer.writeAll(table.rows.map { row -> table.headers.map { row[it] } }, outputStream)
}
}

@ -0,0 +1,26 @@
package space.kscience.tables.csv
import com.github.doyaaaaaken.kotlincsv.dsl.context.CsvReaderContext
import com.github.doyaaaaaken.kotlincsv.dsl.context.CsvWriterContext
import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter
import space.kscience.tables.Table
import space.kscience.tables.get
import java.net.URL
public fun Table.Companion.readCsv(
url: URL,
format: CsvReaderContext.() -> Unit = {},
): Table<String> = readCsvString(url.readText(), format)
public fun Table.Companion.writeCsvString(
table: Table<Any?>,
format: CsvWriterContext.() -> Unit = {},
): String {
val writer = csvWriter(format)
val headerString = table.headers.joinToString(
separator = writer.delimiter.toString(),
postfix = writer.lineTerminator
) { it.name }
return headerString + writer.writeAllAsString(table.rows.map { row -> table.headers.map { row[it] } })
}

@ -0,0 +1,36 @@
package space.kscience.tables.csv
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
import space.kscience.dataforge.meta.Value
import space.kscience.tables.RowTable
import space.kscience.tables.Table
import space.kscience.tables.get
import space.kscience.tables.valueRow
internal class StringReadWrite {
val table = RowTable<Value> {
val a by column<Value>()
val b by column<Value>()
valueRow(a to 1, b to "b1")
valueRow(a to 2, b to "b2")
}
@Test
fun writeRead() {
val string = Table.writeCsvString(table)
println(string)
val reconstructed = Table.readCsvString(string)
assertEquals("b2", reconstructed[1, "b"])
}
@Test
fun writeReadTsv() {
val string = Table.writeCsvString(table, CsvFormats.tsvWriter)
println(string)
val reconstructed = Table.readCsvString(string, CsvFormats.tsvReader)
assertEquals("b2", reconstructed[1, "b"])
}
}

@ -0,0 +1,21 @@
# Module tables-kt-dataframe
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:tables-kt-dataframe:0.4.1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:tables-kt-dataframe:0.4.1")
}
```

@ -0,0 +1,13 @@
plugins {
id("space.kscience.gradle.jvm")
`maven-publish`
}
dependencies {
api(libs.kotlinx.dataframe)
api(projects.tablesKt)
}
readme {
maturity = space.kscience.gradle.Maturity.PROTOTYPE
}

@ -0,0 +1,63 @@
package space.kscience.dataforge.dataframe
import org.jetbrains.kotlinx.dataframe.*
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.column
import org.jetbrains.kotlinx.dataframe.api.getColumn
import org.jetbrains.kotlinx.dataframe.api.rows
import space.kscience.dataforge.meta.Meta
import space.kscience.tables.Column
import space.kscience.tables.ColumnHeader
import space.kscience.tables.Row
import space.kscience.tables.Table
import kotlin.reflect.KType
@JvmInline
internal value class DataColumnAsColumn<T>(val column: DataColumn<T>) : Column<T> {
override val name: String get() = column.name
override val meta: Meta get() = Meta.EMPTY
override val type: KType get() = column.type
override val size: Int get() = column.size
override fun getOrNull(index: Int): T = column[index]
}
internal fun <T> DataColumn<T>.toTableColumn(): Column<T> = if (this is ColumnAsDataColumn) {
this.column
} else {
DataColumnAsColumn(this)
}
@JvmInline
private value class DataRowAsRow<T>(val row: DataRow<T>) : Row<T> {
@Suppress("UNCHECKED_CAST")
override fun getOrNull(column: String): T? = row[column] as? T
}
@JvmInline
internal value class DataFrameAsTable<T>(private val dataFrame: DataFrame<T>) : Table<T> {
@Suppress("UNCHECKED_CAST")
override fun getOrNull(row: Int, column: String): T? = dataFrame.getColumn(column)[row] as? T
override val columns: Collection<Column<T>>
get() = dataFrame.columns().map { it.cast<T>().toTableColumn() }
override val rows: List<Row<T>>
get() = dataFrame.rows().map { DataRowAsRow(it) }
}
/**
* Represent a [DataFrame] as a [Table]
*/
public fun <T> DataFrame<T>.asTable(): Table<T> = DataFrameAsTable(this)
public operator fun <R> DataFrame<*>.get(header: ColumnHeader<R>): DataColumn<R> {
val reference = column<R>(header.name)
return get(reference)
}
public operator fun <R> DataRow<*>.get(header: ColumnHeader<R>): R {
val reference = column<R>(header.name)
return get(reference)
}

@ -0,0 +1,73 @@
package space.kscience.dataforge.dataframe
import org.jetbrains.kotlinx.dataframe.AnyCol
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.count
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.indices
import space.kscience.tables.Table
import space.kscience.tables.get
import space.kscience.tables.indices
import kotlin.reflect.KType
import space.kscience.tables.Column as TableColumn
internal class ColumnAsDataColumn<T>(
val column: TableColumn<T>,
val indexList: List<Int> = column.indices.toList(),
val nameOverride: String = column.name,
) : ValueColumn<T> {
override fun get(indices: Iterable<Int>): ValueColumn<T> {
val newIndices = indices.map { indexList[it] }
return ColumnAsDataColumn<T>(column, newIndices, nameOverride)
}
override fun get(range: IntRange): ValueColumn<T> {
val newIndices = indices.map { indexList[it] }
return ColumnAsDataColumn<T>(column, newIndices, nameOverride)
}
override fun rename(newName: String): ValueColumn<T> = ColumnAsDataColumn<T>(column, indexList, newName)
override fun distinct(): ValueColumn<T> {
val newIndices = indexList.distinctBy { column.getOrNull(it) }
return ColumnAsDataColumn<T>(column, newIndices, nameOverride)
}
override fun contains(value: T): Boolean = indexList.any { column.getOrNull(it) == value }
override fun countDistinct(): Int = distinct().count()
override fun defaultValue(): T? = null
override fun get(index: Int): T = column[indexList[index]]
override fun get(columnName: String): AnyCol =
if (columnName == nameOverride) this else error("Sub-columns are not allowed")
override fun kind(): ColumnKind = ColumnKind.Value
override fun size(): Int = indexList.size
override fun toSet(): Set<T> = indexList.map { column[it] }.toSet()
override fun type(): KType = column.type
override fun values(): Iterable<T> = indexList.asSequence().map { column[it] }.asIterable()
override fun name(): String = nameOverride
}
internal fun <T> TableColumn<T>.asDataColumn(): AnyCol = if (this is DataColumnAsColumn) {
this.column
} else {
ColumnAsDataColumn(this)
}
//TODO convert typed value columns to primitive columns
@Suppress("UNCHECKED_CAST")
public fun <T> Table<T>.toDataFrame(): DataFrame<T> =
dataFrameOf(columns.map { it.asDataColumn() }) as DataFrame<T>

@ -0,0 +1,49 @@
package space.kscience.dataforge.dataframe
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.column
import org.junit.jupiter.api.Test
import space.kscience.dataforge.misc.DFExperimental
import space.kscience.tables.*
import kotlin.math.pow
import kotlin.test.assertEquals
import kotlin.test.assertTrue
@OptIn(DFExperimental::class)
internal class DataFrameTableTest {
@Test
fun convertTableToDataFrame() {
val x by ColumnHeader.typed<Double>()
val x2 by ColumnHeader.typed<Double>()
val y by ColumnHeader.typed<Double>()
val table = ColumnTable<Double?>(100) {
//filling column with double values equal to index
fill(x) { it.toDouble() }
//virtual column filled with x^2
transform(x2) { it[x].pow(2) }
//Fixed column filled with x^2 + 1
column(y, x2.values.map { it?.plus(1) })
}
val dataFrame = table.toDataFrame()
//println( dataFrame)
val z by column<Double>()
val newFrame = dataFrame.add {
z.from { it[x] + it[y] + 1.0 }
}
//println(newFrame)
val newTable = newFrame.asTable()
assertEquals(newTable.columns[x], table.columns[x])
assertTrue {
table.rowsToColumn("z") { it[x] + it[y] + 1.0 }.contentEquals(newTable.columns["z"])
}
}
}

@ -0,0 +1,21 @@
# Module tables-kt-exposed
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:tables-kt-exposed:0.4.1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:tables-kt-exposed:0.4.1")
}
```

@ -0,0 +1,18 @@
plugins {
id("space.kscience.gradle.jvm")
`maven-publish`
}
val exposedVersion = "0.47.0"
dependencies {
api(projects.tablesKt)
api(libs.exposed.core)
testImplementation(libs.exposed.jdbc)
testImplementation("com.h2database:h2:2.3.232")
testImplementation(spclibs.logback.classic)
}
readme {
maturity = space.kscience.gradle.Maturity.EXPERIMENTAL
}

@ -0,0 +1,162 @@
@file:Suppress("FunctionName")
package space.kscience.dataforge.exposed
import org.jetbrains.exposed.dao.id.IntIdTable
import org.jetbrains.exposed.sql.*
import org.jetbrains.exposed.sql.transactions.transaction
import space.kscience.dataforge.meta.Meta
import space.kscience.tables.Column
import space.kscience.tables.Row
import space.kscience.tables.RowTable
import space.kscience.tables.Table
import kotlin.reflect.KType
import kotlin.reflect.typeOf
import org.jetbrains.exposed.sql.Column as SqlColumn
/**
* Exposed based [Column] implementation.
*
* @param T The type of table items.
* @property db The Exposed database.
* @param sqlTable The Exposed table, which must follow the properties defined for [ExposedTable.sqlTable].
* @param sqlColumn The Exposed column.
* @param type The type of [T].
*/
public class ExposedColumn<T : Any>(
public val db: Database,
public val sqlTable: IntIdTable,
public val sqlColumn: SqlColumn<T>,
public override val type: KType,
) : Column<T> {
/**
* The name of this column.
*/
public override val name: String
get() = sqlColumn.name
/**
* Returns [Meta.EMPTY] because it is impossible to store metadata correctly with SQL columns.
*/
public override val meta: Meta
get() = Meta.EMPTY
/**
* Returns the count of rows in the table.
*/
public override val size: Int
get() = transaction(db) { sqlColumn.table.selectAll().count().toInt() }
/**
* Acquires the value of this column in the row [index].
*/
public override fun getOrNull(index: Int): T? = transaction(db) {
sqlTable.selectAll().where { sqlTable.id eq index + 1 }.firstOrNull()?.getOrNull(sqlColumn)
}
}
/**
* Exposed based [Row] implementation.
*
* @param T The type of table items.
* @param db The Exposed database.
* @param sqlTable The Exposed table, which must follow the properties defined for [ExposedTable.sqlTable].
* @param sqlRow The Exposed row.
*/
@Suppress("UNCHECKED_CAST")
public class ExposedRow<T : Any>(
public val db: Database,
public val sqlTable: IntIdTable,
public val sqlRow: ResultRow,
) : Row<T> {
/**
* Acquires the value of [column] in this row.
*/
public override fun getOrNull(column: String): T? = transaction(db) {
val theColumn = sqlTable.columns.find { it.name == column } as SqlColumn<T>? ?: return@transaction null
sqlRow.getOrNull(theColumn)
}
}
/**
* Exposed based [RowTable] implementation.
*
* @property db The Exposed database.
*
* @property sqlTable The Exposed table. It must have the following properties:
* 1. Integer `id` column must be present with auto-increment by sequence 1, 2, 3&hellip;
* 1. All other columns must be of type [T].
*
* @property type The type of [T].
*/
@Suppress("UNCHECKED_CAST")
public class ExposedTable<T : Any>(
public val db: Database,
public val sqlTable: IntIdTable,
public val type: KType
) : Table<T> {
/**
* The list of columns in this table.
*/
public override val columns: List<ExposedColumn<T>> =
sqlTable.columns.filterNot { it.name == "id" }.map { ExposedColumn(db, sqlTable, it as SqlColumn<T>, type) }
/**
* The list of rows in this table.
*/
public override val rows: List<ExposedRow<T>>
get() = transaction(db) {
sqlTable.selectAll().map { ExposedRow(db, sqlTable, it) }
}
public override fun getOrNull(row: Int, column: String): T? = transaction(db) {
val sqlColumn: SqlColumn<T> = sqlTable.columns.find { it.name == column } as SqlColumn<T>?
?: return@transaction null
sqlTable.selectAll().where { sqlTable.id eq row + 1 }.firstOrNull()?.getOrNull(sqlColumn)
}
}
/**
* Constructs [ExposedTable].
*
* @param T The type of table items.
* @param db The Exposed database.
* @param sqlTable The Exposed table, which must follow the properties defined for [ExposedTable.sqlTable].
* @return A new [ExposedTable].
*/
public inline fun <reified T : Any> ExposedTable(
db: Database,
sqlTable: IntIdTable
): ExposedTable<T> = ExposedTable(db, sqlTable, typeOf<T>())
/**
* Constructs [ExposedTable].
*
* @param T The type of table items.
* @param db The Exposed database.
* @param tableName The name of table.
* @param columns The list of columns' names.
* @param sqlColumnType The [IColumnType] for [T].
* @return A new [ExposedTable].
*/
public inline fun <reified T : Any> ExposedTable(
db: Database,
tableName: String,
columns: List<String>,
sqlColumnType: IColumnType<T>,
): ExposedTable<T> {
val table = object : IntIdTable(tableName) {
init {
columns.forEach { registerColumn<T>(it, sqlColumnType) }
}
}
transaction(db) {
SchemaUtils.createMissingTablesAndColumns(table)
}
return ExposedTable(db, table)
}

@ -0,0 +1,41 @@
package space.kscience.dataforge.exposed
import org.jetbrains.exposed.sql.Column
import org.jetbrains.exposed.sql.Database
import org.jetbrains.exposed.sql.IntegerColumnType
import org.jetbrains.exposed.sql.insert
import org.jetbrains.exposed.sql.transactions.transaction
import kotlin.test.Test
import kotlin.test.assertEquals
@Suppress("UNCHECKED_CAST")
internal class ExposedTableTest {
@Test
fun exposedTable() {
val db = Database.connect("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1", driver = "org.h2.Driver")
val table = ExposedTable<Int>(
db,
"test",
listOf("a", "b", "c"),
IntegerColumnType(),
)
transaction(db) {
table.sqlTable.insert {
it[table.sqlTable.columns.find { t -> t.name == "a" } as Column<Int>] = 42
it[table.sqlTable.columns.find { t -> t.name == "b" } as Column<Int>] = 3
it[table.sqlTable.columns.find { t -> t.name == "c" } as Column<Int>] = 7
}
}
assertEquals(42, table.getOrNull(0, "a"))
assertEquals(3, table.getOrNull(0, "b"))
assertEquals(7, table.getOrNull(0, "c"))
assertEquals(3, table.columns.size)
table.columns.forEach { assertEquals(1, it.size) }
assertEquals(1, table.rows.size)
}
}

@ -0,0 +1,21 @@
# Module tables-kt-jupyter
## Usage
## Artifact:
The Maven coordinates of this project are `space.kscience:tables-kt-jupyter:0.4.1`.
**Gradle Kotlin DSL:**
```kotlin
repositories {
maven("https://repo.kotlin.link")
mavenCentral()
}
dependencies {
implementation("space.kscience:tables-kt-jupyter:0.4.1")
}
```

@ -0,0 +1,5 @@
public final class space/kscience/tables/TablesForJupyter : org/jetbrains/kotlinx/jupyter/api/libraries/JupyterIntegration {
public fun <init> ()V
public fun onLoaded (Lorg/jetbrains/kotlinx/jupyter/api/libraries/JupyterIntegration$Builder;)V
}

@ -0,0 +1,17 @@
plugins {
id("space.kscience.gradle.jvm")
`maven-publish`
}
dependencies {
api(projects.tablesKt)
api(spclibs.kotlinx.html)
}
kscience{
jupyterLibrary("space.kscience.tables.TablesForJupyter")
}
readme {
maturity = space.kscience.gradle.Maturity.EXPERIMENTAL
}

@ -0,0 +1,96 @@
package space.kscience.tables
import kotlinx.html.*
import kotlinx.html.stream.createHTML
import org.jetbrains.kotlinx.jupyter.api.HTML
import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
private const val MAX_ROWS = 20
public class TablesForJupyter : JupyterIntegration() {
private fun TagConsumer<*>.appendHeaders(headers: TableHeader<*>){
tr {
classes = classes + "tables-kt-header"
headers.forEach { column ->
th {
+column.name
}
}
}
}
private fun TagConsumer<*>.appendRowValues(headers: TableHeader<*>, row: Row<*>){
tr {
classes = classes + "tables-kt-row"
headers.forEach { column ->
td {
+row[column].toString()
}
}
}
}
override fun Builder.onLoaded() {
repositories("https://repo.kotlin.link")
import(
"space.kscience.tables.*",
"space.kscience.dataforge.meta.*",
"space.kscience.dataforge.values.*"
//"space.kscience.tables.io.*",
)
//TODO replace by advanced widget
render<Table<*>> { table ->
HTML(
createHTML().table {
classes = classes + "tables-kt-table"
consumer.appendHeaders(table.headers)
table.rows.take(MAX_ROWS).forEach { row ->
consumer.appendRowValues(table.headers, row)
}
if (table.rows.size > MAX_ROWS) {
tr {
td {
+"... Displaying first 20 of ${table.rows.size} rows ..."
}
}
}
}
)
}
render<Column<*>> { column ->
HTML(
createHTML().table {
classes = classes + "tables-kt-table"
tr {
classes = classes + "tables-kt-header"
th {
+column.name
}
}
column.sequence().take(MAX_ROWS).forEach { value ->
tr {
classes = classes + "tables-kt-row"
td {
+value.toString()
}
}
}
if (column.size > MAX_ROWS) {
tr {
td {
+"... Displaying first 20 of ${column.size} values ..."
}
}
}
}
)
}
}
}