Merge remote-tracking branch 'github/dev' into dev

This commit is contained in:
Alexander Nozik 2025-04-23 09:13:46 +03:00
commit 2d37a5255f
12 changed files with 459 additions and 2 deletions
benchmarks
README.mdbuild.gradle.kts
src/jvmMain/kotlin/space/kscience/kmath/benchmarks
docs
kmath-stat/src
commonMain/kotlin/space/kscience/kmath/stat
commonTest/kotlin/space/kscience/kmath/stat
jvmTest/kotlin/space/kscience/kmath/stat

@ -84,6 +84,13 @@ C:\Users\altavir\scoop\apps\gradle\current\.gradle\jdks\eclipse_adoptium-17-amd6
|`kmathLupInversion`|4.0E+02 ± 52 ops/s|
|`kmathParallelLupInversion`|4.0E+02 ± 9.6 ops/s|
|`ojalgoInverse`|2.1E+03 ± 3.3E+02 ops/s|
### [MinStatisticBenchmark.kt](src/jvmMain/kotlin/space/kscience/kmath/benchmarks/MinStatisticBenchmark.kt)
| Benchmark | Score |
|:---------:|:-----:|
|`kotlinArrayMin`| 1875.7 ± 401.5 ops/s |
|`minBlocking`| 1357.9 ± 72.0 ops/s |
### [NDFieldBenchmark](src/jvmMain/kotlin/space/kscience/kmath/benchmarks/NDFieldBenchmark.kt)
| Benchmark | Score |

@ -103,6 +103,11 @@ benchmark {
include("BufferBenchmark")
}
configurations.register("minStatistic") {
commonConfiguration()
include("MinStatisticBenchmark")
}
configurations.register("nd") {
commonConfiguration()
include("NDFieldBenchmark")

@ -0,0 +1,44 @@
/*
* Copyright 2018-2025 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.benchmarks
import kotlinx.benchmark.Benchmark
import kotlinx.benchmark.Blackhole
import kotlinx.benchmark.Scope
import kotlinx.benchmark.State
import kotlinx.coroutines.runBlocking
import space.kscience.kmath.operations.Float64Field
import space.kscience.kmath.stat.min
import space.kscience.kmath.structures.*
@State(Scope.Benchmark)
internal class MinStatisticBenchmark {
@Benchmark
fun kotlinArrayMin(blackhole: Blackhole) {
val array = DoubleArray(size) { it.toDouble() }
var res = 0.0
(0 until size).forEach {
res += array.min()
}
blackhole.consume(res)
}
@Benchmark
fun minBlocking(blackhole: Blackhole) {
val buffer = Float64Buffer(size) { it.toDouble() }
var res = 0.0
(0 until size).forEach {
res += Float64Field.min.evaluateBlocking(buffer)
}
blackhole.consume(res)
}
private companion object {
private const val size = 1000
}
}

@ -11,4 +11,6 @@
* [Expressions](expressions.md)
* [Statistics](statistics.md): statistical functions on data [Buffers](buffers.md)
* Commons math integration

34
docs/statistics.md Normal file

@ -0,0 +1,34 @@
# Statistics
Mathematically speaking, a statistic is a measurable numerical function of sample data.
In KMath, a statistic is a function that operates on a [Buffer](buffers.md) and is implemented as the `evaluate` method
of the `Statistic` interface.
There are two subinterfaces of the `Statistic` interface:
* `BlockingStatistic` A statistic that is computed in a synchronous blocking mode
* `ComposableStatistic` A statistic tha could be computed separately on different blocks of data and then composed
## Common statistics and Implementation Status
| Category | Statistic | Description | Implementation Status |
|------------------|-------------------|-------------------------------------|--------------------------------|
| **Basic** | Min | Minimum value | ✅ `ComposableStatistic` |
| | Max | Maximum value | ✅ `ComposableStatistic` |
| | Mean | Arithmetic mean | ✅ `ComposableStatistic` |
| | Sum | Sum of all values | 🚧 Not yet implemented |
| | Product | Product of all values | 🚧 Not yet implemented |
| **Distribution** | Median | Median (50th percentile) | ✅ `BlockingStatistic` |
| | Quantile | Arbitrary percentile (e.g., Q1, Q3) | 🚧 Not yet implemented |
| | Variance | Unbiased sample variance | 🚧 *(Requires `SumOfSquares`)* |
| | StandardDeviation | Population standard deviation (σ) | 🚧 *(Depends on `Variance`)* |
| | Skewness | Measure of distribution asymmetry | 🚧 *(Requires `ThirdMoment`)* |
| | Kurtosis | Measure of distribution tailedness | 🚧 *(Requires `FourthMoment`)* |
| **Advanced** | GeometricMean | Nth root of product of values | 🚧 *(Requires `SumOfLogs`)* |
| | SumOfLogs | Sum of natural logarithms | 🚧 Not yet implemented |
| | SumOfSquares | Sum of squared values | 🚧 *(Blocks `Variance`)* |
| **Moments** | FirstMoment | Mean (same as `Mean`) | ✅ *(Alias for `Mean`)* |
| | SecondMoment | Variance (same as `Variance`) | 🚧 *(Alias for `Variance`)* |
| | ThirdMoment | Used in skewness calculation | 🚧 Not yet implemented |
| | FourthMoment | Used in kurtosis calculation | 🚧 Not yet implemented |
| **Risk Metrics** | SemiVariance | Downside variance | 🚧 *(Depends on `Variance`)* |

@ -0,0 +1,38 @@
/*
* Copyright 2018-2025 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.stat
import space.kscience.kmath.operations.*
import space.kscience.kmath.structures.*
/**
* Maximum element of non-empty input data
*/
public class MaxStatistic<T : Comparable<T>> : BlockingStatistic<T, T>, ComposableStatistic<T, T, T> {
override fun evaluateBlocking(data: Buffer<T>): T {
require(data.size > 0) { "Data must not be empty" }
var res = data[0]
for (i in 1..data.indices.last) {
val e = data[i]
if (e > res) res = e
}
return res
}
override suspend fun computeIntermediate(data: Buffer<T>): T = evaluateBlocking(data)
override suspend fun composeIntermediate(first: T, second: T): T = if (first > second) first else second
override suspend fun toResult(intermediate: T): T = intermediate
override suspend fun evaluate(data: Buffer<T>): T = super<ComposableStatistic>.evaluate(data)
}
// max
public val Float64Field.max: MaxStatistic<Float64> get() = MaxStatistic()
public val Int32Ring.max: MaxStatistic<Int32> get() = MaxStatistic()
public val Int64Ring.max: MaxStatistic<Int64> get() = MaxStatistic()

@ -0,0 +1,38 @@
/*
* Copyright 2018-2025 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.stat
import space.kscience.kmath.operations.*
import space.kscience.kmath.structures.*
/**
* Minimum element of non-empty input data
*/
public class MinStatistic<T : Comparable<T>> : BlockingStatistic<T, T>, ComposableStatistic<T, T, T> {
override fun evaluateBlocking(data: Buffer<T>): T {
require(data.size > 0) { "Data must not be empty" }
var res = data[0]
for (i in 1..data.indices.last) {
val e = data[i]
if (e < res) res = e
}
return res
}
override suspend fun computeIntermediate(data: Buffer<T>): T = evaluateBlocking(data)
override suspend fun composeIntermediate(first: T, second: T): T = if (first < second) first else second
override suspend fun toResult(intermediate: T): T = intermediate
override suspend fun evaluate(data: Buffer<T>): T = super<ComposableStatistic>.evaluate(data)
}
// min
public val Float64Field.min: MinStatistic<Float64> get() = MinStatistic()
public val Int32Ring.min: MinStatistic<Int32> get() = MinStatistic()
public val Int64Ring.min: MinStatistic<Int64> get() = MinStatistic()

@ -0,0 +1,97 @@
/*
* Copyright 2018-2025 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.stat
import space.kscience.kmath.operations.Float64Field
import space.kscience.kmath.structures.Float64Buffer
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith
import kotlinx.coroutines.test.runTest
import space.kscience.kmath.operations.Int32Ring
import space.kscience.kmath.operations.Int64Ring
import space.kscience.kmath.structures.Int32Buffer
import space.kscience.kmath.structures.Int64Buffer
internal class MaxStatisticBasicTest {
// Int32 Tests
@Test
fun singleBlockingInt32Max() = runTest {
val res = Int32Ring.max(Int32Buffer(1, 2, 3))
assertEquals(3, res)
}
@Test
fun int32MaxWithNegativeValues() = runTest {
val res = Int32Ring.max(Int32Buffer(-1, -5, -3))
assertEquals(-1, res)
}
@Test
fun int32SingleElement() = runTest {
val res = Int32Ring.max(Int32Buffer(42))
assertEquals(42, res)
}
// Int64 Tests
@Test
fun singleBlockingInt64Max() = runTest {
val res = Int64Ring.max(Int64Buffer(1L, 2L, 3L))
assertEquals(3L, res)
}
@Test
fun int64MaxWithLargeValues() = runTest {
val res = Int64Ring.max(Int64Buffer(Long.MAX_VALUE, Long.MIN_VALUE, 0L))
assertEquals(Long.MAX_VALUE, res)
}
// Float64 Tests
@Test
fun singleBlockingFloat64Max() = runTest {
val res = Float64Field.max(Float64Buffer(1.0, 2.5, 3.1))
assertEquals(3.1, res)
}
@Test
fun float64MaxWithSpecialValues() = runTest {
val res = Float64Field.max(Float64Buffer(Double.POSITIVE_INFINITY, Double.MAX_VALUE, 0.0))
assertEquals(Double.POSITIVE_INFINITY, res)
}
@Test
fun float64MaxWithNaN() = runTest {
val res = Float64Field.max(Float64Buffer(Double.POSITIVE_INFINITY, Double.MAX_VALUE, Double.NaN))
assertEquals(Double.NaN, res)
}
// Edge Cases
@Test
fun emptyBufferThrowsException() = runTest {
assertFailsWith<IllegalArgumentException> {
Float64Field.max(Float64Buffer())
}
}
@Test
fun allEqualValuesReturnsSame() = runTest {
val res = Int32Ring.max(Int32Buffer(5, 5, 5))
assertEquals(5, res)
}
@Test
fun extremeValueAtBufferStart() = runTest {
val res = Float64Field.max(Float64Buffer(Double.MAX_VALUE, 1.0, 2.0))
assertEquals(Double.MAX_VALUE, res)
}
@Test
fun extremeValueAtBufferEnd() = runTest {
val res = Int64Ring.max(Int64Buffer(1L, 2L, Long.MAX_VALUE))
assertEquals(Long.MAX_VALUE, res)
}
}

@ -0,0 +1,85 @@
/*
* Copyright 2018-2025 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.stat
import space.kscience.kmath.operations.Float64Field
import space.kscience.kmath.structures.Float64Buffer
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith
import kotlinx.coroutines.test.runTest
import space.kscience.kmath.operations.Int32Ring
import space.kscience.kmath.operations.Int64Ring
import space.kscience.kmath.structures.Int32Buffer
import space.kscience.kmath.structures.Int64Buffer
internal class MinStatisticBasicTest {
// Int32 Tests
@Test
fun singleBlockingInt32Min() = runTest {
val res = Int32Ring.min(Int32Buffer(1, 2, 3))
assertEquals(1, res)
}
@Test
fun int32MinWithNegativeValues() = runTest {
val res = Int32Ring.min(Int32Buffer(-1, -5, -3))
assertEquals(-5, res)
}
@Test
fun int32SingleElement() = runTest {
val res = Int32Ring.min(Int32Buffer(42))
assertEquals(42, res)
}
// Int64 Tests
@Test
fun singleBlockingInt64Min() = runTest {
val res = Int64Ring.min(Int64Buffer(1L, 2L, 3L))
assertEquals(1L, res)
}
// Float64 Tests
@Test
fun singleBlockingFloat64Min() = runTest {
val res = Float64Field.min(Float64Buffer(1.0, 2.5, 3.1))
assertEquals(1.0, res)
}
@Test
fun float64MinWithNaN() = runTest {
val res = Float64Field.min(Float64Buffer(Double.NEGATIVE_INFINITY, Double.MIN_VALUE, Double.NaN))
assertEquals(Double.NEGATIVE_INFINITY, res)
}
@Test
fun float64MinWithSpecialValues() = runTest {
val res = Float64Field.min(Float64Buffer(Double.NEGATIVE_INFINITY, Double.MIN_VALUE, 0.0))
assertEquals(Double.NEGATIVE_INFINITY, res)
}
// Edge Cases
@Test
fun emptyBufferThrowsException() = runTest {
assertFailsWith<IllegalArgumentException> {
Float64Field.min(Float64Buffer())
}
}
@Test
fun allEqualValuesReturnsSame() = runTest {
val res = Int32Ring.min(Int32Buffer(5, 5, 5))
assertEquals(5, res)
}
@Test
fun extremeValueAtBufferStart() = runTest {
val res = Float64Field.min(Float64Buffer(Double.MIN_VALUE, 1.0, 2.0))
assertEquals(Double.MIN_VALUE, res)
}
}

@ -0,0 +1,53 @@
/*
* Copyright 2018-2025 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.stat
import kotlinx.coroutines.flow.first
import kotlinx.coroutines.flow.last
import kotlinx.coroutines.flow.take
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.test.runTest
import space.kscience.kmath.operations.Float64Field
import space.kscience.kmath.random.RandomGenerator
import space.kscience.kmath.random.chain
import space.kscience.kmath.streaming.chunked
import kotlin.test.Test
import kotlin.test.assertEquals
internal class MaxStatisticTest {
//create a random number generator.
val generator = RandomGenerator.default(1)
//Create a stateless chain from generator.
val data = generator.chain { nextDouble() }
//Convert a chain to Flow and break it into chunks.
val chunked = data.chunked(1000)
@Test
fun singleBlockingMax() = runTest {
val first = chunked.first()
val res = Float64Field.max(first)
assertEquals(1.0, res, 1e-1)
}
@Test
fun singleSuspendMax() = runTest {
val first = runBlocking { chunked.first() }
val res = Float64Field.max(first)
assertEquals(1.0, res, 1e-1)
}
@Test
fun parallelMax() = runTest {
val maxValue = Float64Field.max
.flow(chunked) //create a flow from evaluated results
.take(100) // Take 100 data chunks from the source and accumulate them
.last() //get the maximum from 1e5 data samples
assertEquals(1.0, maxValue, 1e-2)
}
}

@ -1,5 +1,5 @@
/*
* Copyright 2018-2024 KMath contributors.
* Copyright 2018-2025 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
@ -17,7 +17,7 @@ import space.kscience.kmath.streaming.chunked
import kotlin.test.Test
import kotlin.test.assertEquals
internal class StatisticTest {
internal class MeanStatisticTest {
//create a random number generator.
val generator = RandomGenerator.default(1)

@ -0,0 +1,54 @@
/*
* Copyright 2018-2025 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.stat
import kotlinx.coroutines.flow.first
import kotlinx.coroutines.flow.last
import kotlinx.coroutines.flow.take
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.test.runTest
import space.kscience.kmath.operations.Float64Field
import space.kscience.kmath.random.RandomGenerator
import space.kscience.kmath.random.chain
import space.kscience.kmath.streaming.chunked
import kotlin.test.Test
import kotlin.test.assertEquals
internal class MinStatisticTest {
//create a random number generator.
val generator = RandomGenerator.default(1)
//Create a stateless chain from generator.
val data = generator.chain { nextDouble() }
//Convert a chain to Flow and break it into chunks.
val chunked = data.chunked(1000)
@Test
fun singleBlockingMin() = runTest {
val first = chunked.first()
val res = Float64Field.min(first)
assertEquals(0.0, res, 1e-1)
}
@Test
fun singleSuspendMin() = runTest {
val first = runBlocking { chunked.first() }
val res = Float64Field.min(first)
assertEquals(0.0, res, 1e-1)
}
@Test
fun parallelMin() = runTest {
val average = Float64Field.min
.flow(chunked) //create a flow from evaluated results
.take(100) // Take 100 data chunks from the source and accumulate them
.last() //get 1e5 data samples average
assertEquals(0.0, average, 1e-2)
}
}