forked from kscience/kmath
Fix Mean bug
This commit is contained in:
parent
abae29bbed
commit
fd8a61c852
@ -14,7 +14,6 @@ import space.kscience.kmath.ejml.EjmlLinearSpaceDDRM
|
||||
import space.kscience.kmath.linear.invoke
|
||||
import space.kscience.kmath.linear.linearSpace
|
||||
import space.kscience.kmath.operations.DoubleField
|
||||
import space.kscience.kmath.operations.algebra
|
||||
import space.kscience.kmath.structures.Buffer
|
||||
import kotlin.random.Random
|
||||
|
||||
@ -25,11 +24,11 @@ internal class DotBenchmark {
|
||||
const val dim = 1000
|
||||
|
||||
//creating invertible matrix
|
||||
val matrix1 = Double.algebra.linearSpace.buildMatrix(dim, dim) { i, j ->
|
||||
if (i <= j) random.nextDouble() else 0.0
|
||||
val matrix1 = DoubleField.linearSpace.buildMatrix(dim, dim) { _, _ ->
|
||||
random.nextDouble()
|
||||
}
|
||||
val matrix2 = Double.algebra.linearSpace.buildMatrix(dim, dim) { i, j ->
|
||||
if (i <= j) random.nextDouble() else 0.0
|
||||
val matrix2 = DoubleField.linearSpace.buildMatrix(dim, dim) { _, _ ->
|
||||
random.nextDouble()
|
||||
}
|
||||
|
||||
val cmMatrix1 = CMLinearSpace { matrix1.toCM() }
|
||||
@ -65,7 +64,7 @@ internal class DotBenchmark {
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
fun doubleDot(blackhole: Blackhole) = with(Double.algebra.linearSpace) {
|
||||
fun doubleDot(blackhole: Blackhole) = with(DoubleField.linearSpace) {
|
||||
blackhole.consume(matrix1 dot matrix2)
|
||||
}
|
||||
}
|
||||
|
@ -75,8 +75,9 @@ internal class ExpressionsInterpretersBenchmark {
|
||||
private val algebra = DoubleField
|
||||
private const val times = 1_000_000
|
||||
|
||||
private val functional = DoubleField.expressionInExtendedField {
|
||||
bindSymbol(x) * number(2.0) + number(2.0) / bindSymbol(x) - number(16.0) / sin(bindSymbol(x))
|
||||
private val functional = DoubleField.expression {
|
||||
val x = bindSymbol(Symbol.x)
|
||||
x * number(2.0) + 2.0 / x - 16.0 / sin(x)
|
||||
}
|
||||
|
||||
private val node = MstExtendedField {
|
||||
|
@ -192,3 +192,7 @@ public inline fun <T, A : Field<T>> A.expressionInField(
|
||||
public inline fun <T, A : ExtendedField<T>> A.expressionInExtendedField(
|
||||
block: FunctionalExpressionExtendedField<T, A>.() -> Expression<T>,
|
||||
): Expression<T> = FunctionalExpressionExtendedField(this).block()
|
||||
|
||||
public inline fun DoubleField.expression(
|
||||
block: FunctionalExpressionExtendedField<Double, DoubleField>.() -> Expression<Double>,
|
||||
): Expression<Double> = FunctionalExpressionExtendedField(this).block()
|
||||
|
@ -13,7 +13,6 @@ import space.kscience.kmath.operations.DoubleBufferOperations
|
||||
import space.kscience.kmath.operations.DoubleField
|
||||
import space.kscience.kmath.structures.Buffer
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
import space.kscience.kmath.structures.indices
|
||||
|
||||
public object DoubleLinearSpace : LinearSpace<Double, DoubleField> {
|
||||
|
||||
@ -30,7 +29,6 @@ public object DoubleLinearSpace : LinearSpace<Double, DoubleField> {
|
||||
initializer: DoubleField.(i: Int, j: Int) -> Double
|
||||
): Matrix<Double> = ndRing(rows, columns).produce { (i, j) -> DoubleField.initializer(i, j) }.as2D()
|
||||
|
||||
|
||||
override fun buildVector(size: Int, initializer: DoubleField.(Int) -> Double): DoubleBuffer =
|
||||
DoubleBuffer(size) { DoubleField.initializer(it) }
|
||||
|
||||
@ -50,9 +48,9 @@ public object DoubleLinearSpace : LinearSpace<Double, DoubleField> {
|
||||
|
||||
// Create a continuous in-memory representation of this vector for better memory layout handling
|
||||
private fun Buffer<Double>.linearize() = if (this is DoubleBuffer) {
|
||||
this
|
||||
this.array
|
||||
} else {
|
||||
DoubleBuffer(size) { get(it) }
|
||||
DoubleArray(size) { get(it) }
|
||||
}
|
||||
|
||||
@OptIn(PerformancePitfall::class)
|
||||
|
@ -27,8 +27,13 @@ public class Mean<T>(
|
||||
|
||||
override suspend fun evaluate(data: Buffer<T>): T = super<ComposableStatistic>.evaluate(data)
|
||||
|
||||
override suspend fun computeIntermediate(data: Buffer<T>): Pair<T, Int> =
|
||||
evaluateBlocking(data) to data.size
|
||||
override suspend fun computeIntermediate(data: Buffer<T>): Pair<T, Int> = group {
|
||||
var res = zero
|
||||
for (i in data.indices) {
|
||||
res += data[i]
|
||||
}
|
||||
res to data.size
|
||||
}
|
||||
|
||||
override suspend fun composeIntermediate(first: Pair<T, Int>, second: Pair<T, Int>): Pair<T, Int> =
|
||||
group { first.first + second.first } to (first.second + second.second)
|
||||
@ -38,9 +43,11 @@ public class Mean<T>(
|
||||
}
|
||||
|
||||
public companion object {
|
||||
//TODO replace with optimized version which respects overflow
|
||||
@Deprecated("Use Double.mean instead")
|
||||
public val double: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count }
|
||||
@Deprecated("Use Int.mean instead")
|
||||
public val int: Mean<Int> = Mean(IntRing) { sum, count -> sum / count }
|
||||
@Deprecated("Use Long.mean instead")
|
||||
public val long: Mean<Long> = Mean(LongRing) { sum, count -> sum / count }
|
||||
|
||||
public fun evaluate(buffer: Buffer<Double>): Double = double.evaluateBlocking(buffer)
|
||||
@ -48,3 +55,11 @@ public class Mean<T>(
|
||||
public fun evaluate(buffer: Buffer<Long>): Long = long.evaluateBlocking(buffer)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//TODO replace with optimized version which respects overflow
|
||||
public val Double.Companion.mean: Mean<Double> get() = Mean(DoubleField) { sum, count -> sum / count }
|
||||
public val Int.Companion.mean: Mean<Int> get() = Mean(IntRing) { sum, count -> sum / count }
|
||||
public val Long.Companion.mean: Mean<Long> get() = Mean(LongRing) { sum, count -> sum / count }
|
||||
|
||||
|
||||
|
@ -8,7 +8,6 @@ package space.kscience.kmath.stat
|
||||
import kotlinx.coroutines.CoroutineDispatcher
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.ExperimentalCoroutinesApi
|
||||
import kotlinx.coroutines.FlowPreview
|
||||
import kotlinx.coroutines.flow.Flow
|
||||
import kotlinx.coroutines.flow.map
|
||||
import kotlinx.coroutines.flow.runningReduce
|
||||
@ -18,16 +17,23 @@ import space.kscience.kmath.structures.Buffer
|
||||
/**
|
||||
* A function, that transforms a buffer of random quantities to some resulting value
|
||||
*/
|
||||
public interface Statistic<in T, out R> {
|
||||
public fun interface Statistic<in T, out R> {
|
||||
public suspend fun evaluate(data: Buffer<T>): R
|
||||
}
|
||||
|
||||
public interface BlockingStatistic<in T, out R> : Statistic<T, R> {
|
||||
public suspend operator fun <T, R> Statistic<T, R>.invoke(data: Buffer<T>): R = evaluate(data)
|
||||
|
||||
/**
|
||||
* A statistic that is computed in a synchronous blocking mode
|
||||
*/
|
||||
public fun interface BlockingStatistic<in T, out R> : Statistic<T, R> {
|
||||
public fun evaluateBlocking(data: Buffer<T>): R
|
||||
|
||||
override suspend fun evaluate(data: Buffer<T>): R = evaluateBlocking(data)
|
||||
}
|
||||
|
||||
public operator fun <T, R> BlockingStatistic<T, R>.invoke(data: Buffer<T>): R = evaluateBlocking(data)
|
||||
|
||||
/**
|
||||
* A statistic tha could be computed separately on different blocks of data and then composed
|
||||
*
|
||||
@ -48,8 +54,10 @@ public interface ComposableStatistic<in T, I, out R> : Statistic<T, R> {
|
||||
override suspend fun evaluate(data: Buffer<T>): R = toResult(computeIntermediate(data))
|
||||
}
|
||||
|
||||
@FlowPreview
|
||||
@ExperimentalCoroutinesApi
|
||||
/**
|
||||
* Flow intermediate state of the [ComposableStatistic]
|
||||
*/
|
||||
@OptIn(ExperimentalCoroutinesApi::class)
|
||||
private fun <T, I, R> ComposableStatistic<T, I, R>.flowIntermediate(
|
||||
flow: Flow<Buffer<T>>,
|
||||
dispatcher: CoroutineDispatcher = Dispatchers.Default,
|
||||
@ -64,7 +72,7 @@ private fun <T, I, R> ComposableStatistic<T, I, R>.flowIntermediate(
|
||||
*
|
||||
* The resulting flow contains values that include the whole previous statistics, not only the last chunk.
|
||||
*/
|
||||
@OptIn(FlowPreview::class, ExperimentalCoroutinesApi::class)
|
||||
@OptIn(ExperimentalCoroutinesApi::class)
|
||||
public fun <T, I, R> ComposableStatistic<T, I, R>.flow(
|
||||
flow: Flow<Buffer<T>>,
|
||||
dispatcher: CoroutineDispatcher = Dispatchers.Default,
|
||||
|
@ -5,11 +5,13 @@
|
||||
|
||||
package space.kscience.kmath.stat
|
||||
|
||||
import kotlinx.coroutines.flow.drop
|
||||
import kotlinx.coroutines.flow.first
|
||||
import kotlinx.coroutines.flow.last
|
||||
import kotlinx.coroutines.flow.take
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import space.kscience.kmath.streaming.chunked
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
|
||||
internal class StatisticTest {
|
||||
//create a random number generator.
|
||||
@ -22,12 +24,27 @@ internal class StatisticTest {
|
||||
val chunked = data.chunked(1000)
|
||||
|
||||
@Test
|
||||
fun testParallelMean() = runBlocking {
|
||||
val average = Mean.double
|
||||
.flow(chunked) //create a flow with results
|
||||
.drop(99) // Skip first 99 values and use one with total data
|
||||
.first() //get 1e5 data samples average
|
||||
fun singleBlockingMean() {
|
||||
val first = runBlocking { chunked.first()}
|
||||
val res = Double.mean(first)
|
||||
assertEquals(0.5,res, 1e-1)
|
||||
}
|
||||
|
||||
println(average)
|
||||
@Test
|
||||
fun singleSuspendMean() = runBlocking {
|
||||
val first = runBlocking { chunked.first()}
|
||||
val res = Double.mean(first)
|
||||
assertEquals(0.5,res, 1e-1)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun parallelMean() = runBlocking {
|
||||
val average = Double.mean
|
||||
.flow(chunked) //create a flow from evaluated results
|
||||
.take(100) // Take 100 data chunks from the source and accumulate them
|
||||
.last() //get 1e5 data samples average
|
||||
|
||||
assertEquals(0.5,average, 1e-3)
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user