forked from kscience/kmath
Histograms refactor
This commit is contained in:
parent
987997cc02
commit
ce18e85a0a
@ -1,8 +1,8 @@
|
||||
package scietifik.kmath.histogram
|
||||
|
||||
import kscience.kmath.histogram.RealHistogramSpace
|
||||
import kscience.kmath.histogram.fill
|
||||
import kscience.kmath.histogram.put
|
||||
import kscience.kmath.operations.invoke
|
||||
import kscience.kmath.real.RealVector
|
||||
import kscience.kmath.real.invoke
|
||||
import kotlin.random.Random
|
||||
@ -37,12 +37,44 @@ internal class MultivariateHistogramTest {
|
||||
|
||||
val n = 10000
|
||||
val histogram = hSpace.produce {
|
||||
fill {
|
||||
repeat(n) {
|
||||
yield(RealVector(nextDouble(), nextDouble(), nextDouble()))
|
||||
}
|
||||
put(nextDouble(), nextDouble(), nextDouble())
|
||||
}
|
||||
}
|
||||
assertEquals(n, histogram.bins.sumBy { it.value.toInt() })
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testHistogramAlgebra() {
|
||||
val hSpace = RealHistogramSpace.fromRanges(
|
||||
(-1.0..1.0),
|
||||
(-1.0..1.0),
|
||||
(-1.0..1.0)
|
||||
).invoke {
|
||||
val random = Random(1234)
|
||||
|
||||
fun nextDouble() = random.nextDouble(-1.0, 1.0)
|
||||
val n = 10000
|
||||
val histogram1 = produce {
|
||||
repeat(n) {
|
||||
put(nextDouble(), nextDouble(), nextDouble())
|
||||
}
|
||||
}
|
||||
val histogram2 = produce {
|
||||
repeat(n) {
|
||||
put(nextDouble(), nextDouble(), nextDouble())
|
||||
}
|
||||
}
|
||||
val res = histogram1 - histogram2
|
||||
assertTrue {
|
||||
strides.indices().all { index ->
|
||||
res.values[index] <= histogram1.values[index]
|
||||
}
|
||||
}
|
||||
assertTrue {
|
||||
res.bins.count() >= histogram1.bins.count()
|
||||
}
|
||||
assertEquals(0.0, res.bins.sumByDouble { it.value.toDouble() })
|
||||
}
|
||||
}
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
package kscience.kmath.histogram
|
||||
|
||||
|
||||
/**
|
||||
* Univariate histogram with log(n) bin search speed
|
||||
*/
|
||||
//private abstract class AbstractUnivariateHistogram<B: UnivariateBin>{
|
||||
//
|
||||
// public abstract val bins: TreeMap<Double, B>
|
||||
//
|
||||
// public open operator fun get(value: Double): B? {
|
||||
// // check ceiling entry and return it if it is what needed
|
||||
// val ceil = bins.ceilingEntry(value)?.value
|
||||
// if (ceil != null && value in ceil) return ceil
|
||||
// //check floor entry
|
||||
// val floor = bins.floorEntry(value)?.value
|
||||
// if (floor != null && value in floor) return floor
|
||||
// //neither is valid, not found
|
||||
// return null
|
||||
// }
|
||||
|
||||
// public override operator fun get(point: Buffer<out Double>): B? = get(point[0])
|
||||
//
|
||||
// public override val dimension: Int get() = 1
|
||||
//
|
||||
// public override operator fun iterator(): Iterator<B> = bins.values.iterator()
|
||||
//
|
||||
// public companion object {
|
||||
|
||||
// }
|
||||
//}
|
||||
|
||||
|
@ -0,0 +1,156 @@
|
||||
package kscience.kmath.histogram
|
||||
|
||||
import kscience.kmath.domains.UnivariateDomain
|
||||
import kscience.kmath.misc.UnstableKMathAPI
|
||||
import kscience.kmath.operations.Space
|
||||
import kscience.kmath.structures.Buffer
|
||||
import java.util.*
|
||||
import kotlin.math.abs
|
||||
import kotlin.math.sqrt
|
||||
|
||||
internal fun <B: ClosedFloatingPointRange<Double>> TreeMap<Double, B>.getBin(value: Double): B? {
|
||||
// check ceiling entry and return it if it is what needed
|
||||
val ceil = ceilingEntry(value)?.value
|
||||
if (ceil != null && value in ceil) return ceil
|
||||
//check floor entry
|
||||
val floor = floorEntry(value)?.value
|
||||
if (floor != null && value in floor) return floor
|
||||
//neither is valid, not found
|
||||
return null
|
||||
}
|
||||
|
||||
@UnstableKMathAPI
|
||||
public class TreeHistogram(
|
||||
override val context: TreeHistogramSpace,
|
||||
private val binMap: TreeMap<Double, out UnivariateBin>,
|
||||
) : UnivariateHistogram {
|
||||
override fun get(value: Double): UnivariateBin? = binMap.getBin(value)
|
||||
override val dimension: Int get() = 1
|
||||
override val bins: Collection<UnivariateBin> get() = binMap.values
|
||||
}
|
||||
|
||||
private class UnivariateBinValue(
|
||||
override val domain: UnivariateDomain,
|
||||
override val value: Double,
|
||||
override val standardDeviation: Double,
|
||||
) : UnivariateBin, ClosedFloatingPointRange<Double> by domain.range
|
||||
|
||||
@UnstableKMathAPI
|
||||
public class TreeHistogramSpace(
|
||||
public val binFactory: (Double) -> UnivariateDomain,
|
||||
) : Space<UnivariateHistogram> {
|
||||
|
||||
private class BinCounter(val domain: UnivariateDomain, val counter: Counter<Double> = Counter.real()) :
|
||||
ClosedFloatingPointRange<Double> by domain.range
|
||||
|
||||
public fun produce(builder: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram {
|
||||
val bins: TreeMap<Double, BinCounter> = TreeMap()
|
||||
val hBuilder = object : UnivariateHistogramBuilder {
|
||||
|
||||
fun get(value: Double): BinCounter? = bins.getBin(value)
|
||||
|
||||
fun createBin(value: Double): BinCounter {
|
||||
val binDefinition = binFactory(value)
|
||||
val newBin = BinCounter(binDefinition)
|
||||
synchronized(this) { bins[binDefinition.center] = newBin }
|
||||
return newBin
|
||||
}
|
||||
|
||||
/**
|
||||
* Thread safe put operation
|
||||
*/
|
||||
override fun putValue(at: Double, value: Double) {
|
||||
(get(at) ?: createBin(at)).apply {
|
||||
counter.add(value)
|
||||
}
|
||||
}
|
||||
|
||||
override fun putValue(point: Buffer<Double>, value: Number) {
|
||||
put(point[0], value.toDouble())
|
||||
}
|
||||
}
|
||||
hBuilder.apply(builder)
|
||||
val resBins = TreeMap<Double, UnivariateBin>()
|
||||
bins.forEach { key, binCounter ->
|
||||
val count = binCounter.counter.value
|
||||
resBins[key] = UnivariateBinValue(binCounter.domain, count, sqrt(count))
|
||||
}
|
||||
return TreeHistogram(this, resBins)
|
||||
}
|
||||
|
||||
override fun add(
|
||||
a: UnivariateHistogram,
|
||||
b: UnivariateHistogram,
|
||||
): UnivariateHistogram {
|
||||
require(a.context == this) { "Histogram $a does not belong to this context" }
|
||||
require(b.context == this) { "Histogram $b does not belong to this context" }
|
||||
val bins = TreeMap<Double, UnivariateBin>().apply {
|
||||
(a.bins.map { it.domain } union b.bins.map { it.domain }).forEach { def ->
|
||||
val newBin = UnivariateBinValue(
|
||||
def,
|
||||
value = (a[def.center]?.value ?: 0.0) + (b[def.center]?.value ?: 0.0),
|
||||
standardDeviation = (a[def.center]?.standardDeviation
|
||||
?: 0.0) + (b[def.center]?.standardDeviation ?: 0.0)
|
||||
)
|
||||
}
|
||||
}
|
||||
return TreeHistogram(this, bins)
|
||||
}
|
||||
|
||||
override fun multiply(a: UnivariateHistogram, k: Number): UnivariateHistogram {
|
||||
val bins = TreeMap<Double, UnivariateBin>().apply {
|
||||
a.bins.forEach { bin ->
|
||||
put(bin.domain.center,
|
||||
UnivariateBinValue(
|
||||
bin.domain,
|
||||
value = bin.value * k.toDouble(),
|
||||
standardDeviation = abs(bin.standardDeviation * k.toDouble())
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return TreeHistogram(this, bins)
|
||||
}
|
||||
|
||||
override val zero: UnivariateHistogram = produce { }
|
||||
|
||||
public companion object {
|
||||
/**
|
||||
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
|
||||
*/
|
||||
public fun uniform(
|
||||
binSize: Double,
|
||||
start: Double = 0.0,
|
||||
): TreeHistogramSpace = TreeHistogramSpace { value ->
|
||||
val center = start + binSize * Math.floor((value - start) / binSize + 0.5)
|
||||
UnivariateDomain((center - binSize / 2)..(center + binSize / 2))
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a histogram with custom cell borders
|
||||
*/
|
||||
public fun custom(borders: DoubleArray): TreeHistogramSpace {
|
||||
val sorted = borders.sortedArray()
|
||||
|
||||
return TreeHistogramSpace { value ->
|
||||
when {
|
||||
value < sorted.first() -> UnivariateDomain(
|
||||
Double.NEGATIVE_INFINITY..sorted.first()
|
||||
)
|
||||
|
||||
value > sorted.last() -> UnivariateDomain(
|
||||
sorted.last()..Double.POSITIVE_INFINITY
|
||||
)
|
||||
|
||||
else -> {
|
||||
val index = sorted.indices.first { value > sorted[it] }
|
||||
val left = sorted[index]
|
||||
val right = sorted[index + 1]
|
||||
UnivariateDomain(left..right)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,24 +1,17 @@
|
||||
package kscience.kmath.histogram
|
||||
|
||||
import kscience.kmath.linear.Point
|
||||
import kscience.kmath.domains.UnivariateDomain
|
||||
import kscience.kmath.misc.UnstableKMathAPI
|
||||
import kscience.kmath.operations.Space
|
||||
import kscience.kmath.operations.SpaceElement
|
||||
import kscience.kmath.structures.Buffer
|
||||
import kscience.kmath.structures.asBuffer
|
||||
import kscience.kmath.structures.asSequence
|
||||
|
||||
public data class UnivariateHistogramBinDefinition(
|
||||
val position: Double,
|
||||
val size: Double,
|
||||
) : Comparable<UnivariateHistogramBinDefinition> {
|
||||
override fun compareTo(other: UnivariateHistogramBinDefinition): Int = this.position.compareTo(other.position)
|
||||
}
|
||||
|
||||
public interface UnivariateBin : Bin<Double> {
|
||||
public val def: UnivariateHistogramBinDefinition
|
||||
public val UnivariateDomain.center: Double get() = (range.endInclusive - range.start) / 2
|
||||
|
||||
public val position: Double get() = def.position
|
||||
public val size: Double get() = def.size
|
||||
public interface UnivariateBin : Bin<Double>, ClosedFloatingPointRange<Double> {
|
||||
public val domain: UnivariateDomain
|
||||
|
||||
/**
|
||||
* The value of histogram including weighting
|
||||
@ -30,19 +23,15 @@ public interface UnivariateBin : Bin<Double> {
|
||||
*/
|
||||
public val standardDeviation: Double
|
||||
|
||||
public val center: Point<Double> get() = doubleArrayOf(position).asBuffer()
|
||||
|
||||
public override val dimension: Int get() = 1
|
||||
|
||||
public override fun contains(point: Buffer<Double>): Boolean = contains(point[0])
|
||||
public override fun contains(point: Buffer<Double>): Boolean = point.size == 1 && contains(point[0])
|
||||
|
||||
}
|
||||
|
||||
public operator fun UnivariateBin.contains(value: Double): Boolean =
|
||||
value in (position - size / 2)..(position + size / 2)
|
||||
|
||||
@OptIn(UnstableKMathAPI::class)
|
||||
@UnstableKMathAPI
|
||||
public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
|
||||
SpaceElement<UnivariateHistogram, UnivariateHistogramSpace> {
|
||||
SpaceElement<UnivariateHistogram, Space<UnivariateHistogram>> {
|
||||
public operator fun get(value: Double): UnivariateBin?
|
||||
public override operator fun get(point: Buffer<Double>): UnivariateBin? = get(point[0])
|
||||
|
||||
@ -54,7 +43,7 @@ public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
|
||||
binSize: Double,
|
||||
start: Double = 0.0,
|
||||
builder: UnivariateHistogramBuilder.() -> Unit,
|
||||
): UnivariateHistogram = UnivariateHistogramSpace.uniform(binSize, start).produce(builder)
|
||||
): UnivariateHistogram = TreeHistogramSpace.uniform(binSize, start).produce(builder)
|
||||
|
||||
/**
|
||||
* Build and fill a histogram with custom borders. Returns a read-only histogram.
|
||||
@ -62,33 +51,26 @@ public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
|
||||
public fun custom(
|
||||
borders: DoubleArray,
|
||||
builder: UnivariateHistogramBuilder.() -> Unit,
|
||||
): UnivariateHistogram = UnivariateHistogramSpace.custom(borders).produce(builder)
|
||||
): UnivariateHistogram = TreeHistogramSpace.custom(borders).produce(builder)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@UnstableKMathAPI
|
||||
public interface UnivariateHistogramBuilder : HistogramBuilder<Double> {
|
||||
|
||||
/**
|
||||
* Thread safe put operation
|
||||
*/
|
||||
public fun put(value: Double, weight: Double = 1.0)
|
||||
public fun putValue(at: Double, value: Double = 1.0)
|
||||
|
||||
override fun putValue(point: Buffer<Double>, value: Number)
|
||||
|
||||
/**
|
||||
* Put several items into a single bin
|
||||
*/
|
||||
public fun putMany(value: Double, count: Int, weight: Double = count.toDouble())
|
||||
|
||||
public fun build(): UnivariateHistogram
|
||||
}
|
||||
|
||||
@UnstableKMathAPI
|
||||
public fun UnivariateHistogramBuilder.fill(items: Iterable<Double>): Unit = items.forEach(::put)
|
||||
public fun UnivariateHistogramBuilder.fill(items: Iterable<Double>): Unit = items.forEach(this::putValue)
|
||||
|
||||
@UnstableKMathAPI
|
||||
public fun UnivariateHistogramBuilder.fill(array: DoubleArray): Unit = array.forEach(::put)
|
||||
public fun UnivariateHistogramBuilder.fill(array: DoubleArray): Unit = array.forEach(this::putValue)
|
||||
|
||||
@UnstableKMathAPI
|
||||
public fun UnivariateHistogramBuilder.fill(buffer: Buffer<Double>): Unit = buffer.asSequence().forEach(::put)
|
||||
public fun UnivariateHistogramBuilder.fill(buffer: Buffer<Double>): Unit = buffer.asSequence().forEach(this::putValue)
|
@ -1,185 +0,0 @@
|
||||
package kscience.kmath.histogram
|
||||
|
||||
import kscience.kmath.operations.Space
|
||||
import kscience.kmath.structures.Buffer
|
||||
import java.util.*
|
||||
import kotlin.math.abs
|
||||
import kotlin.math.sqrt
|
||||
|
||||
private fun <B : UnivariateBin> TreeMap<Double, B>.getBin(value: Double): B? {
|
||||
// check ceiling entry and return it if it is what needed
|
||||
val ceil = ceilingEntry(value)?.value
|
||||
if (ceil != null && value in ceil) return ceil
|
||||
//check floor entry
|
||||
val floor = floorEntry(value)?.value
|
||||
if (floor != null && value in floor) return floor
|
||||
//neither is valid, not found
|
||||
return null
|
||||
}
|
||||
|
||||
|
||||
private class UnivariateHistogramImpl(
|
||||
override val context: UnivariateHistogramSpace,
|
||||
val binMap: TreeMap<Double, out UnivariateBin>,
|
||||
) : UnivariateHistogram {
|
||||
override fun get(value: Double): UnivariateBin? = binMap.getBin(value)
|
||||
override val dimension: Int get() = 1
|
||||
override val bins: Collection<UnivariateBin> get() = binMap.values
|
||||
}
|
||||
|
||||
private class UnivariateBinCounter(
|
||||
override val def: UnivariateHistogramBinDefinition,
|
||||
) : UnivariateBin {
|
||||
val counter: LongCounter = LongCounter()
|
||||
val valueCounter: ObjectCounter<Double> = Counter.real()
|
||||
|
||||
/**
|
||||
* The precise number of events ignoring weighting
|
||||
*/
|
||||
val count: Long get() = counter.value
|
||||
|
||||
override val standardDeviation: Double get() = sqrt(count.toDouble()) / count * value
|
||||
|
||||
/**
|
||||
* The value of histogram including weighting
|
||||
*/
|
||||
override val value: Double get() = valueCounter.value
|
||||
|
||||
public fun increment(count: Long, value: Double) {
|
||||
counter.add(count)
|
||||
valueCounter.add(value)
|
||||
}
|
||||
}
|
||||
|
||||
private class UnivariateBinValue(
|
||||
override val def: UnivariateHistogramBinDefinition,
|
||||
override val value: Double,
|
||||
override val standardDeviation: Double,
|
||||
) : UnivariateBin
|
||||
|
||||
|
||||
public class UnivariateHistogramSpace(
|
||||
public val binFactory: (Double) -> UnivariateHistogramBinDefinition,
|
||||
) : Space<UnivariateHistogram> {
|
||||
|
||||
private inner class UnivariateHistogramBuilderImpl : UnivariateHistogramBuilder {
|
||||
|
||||
val bins: TreeMap<Double, UnivariateBinCounter> = TreeMap()
|
||||
fun get(value: Double): UnivariateBinCounter? = bins.getBin(value)
|
||||
|
||||
private fun createBin(value: Double): UnivariateBinCounter {
|
||||
val binDefinition = binFactory(value)
|
||||
val newBin = UnivariateBinCounter(binDefinition)
|
||||
synchronized(this) { bins[binDefinition.position] = newBin }
|
||||
return newBin
|
||||
}
|
||||
|
||||
/**
|
||||
* Thread safe put operation
|
||||
*/
|
||||
override fun put(value: Double, weight: Double) {
|
||||
(get(value) ?: createBin(value)).apply {
|
||||
increment(1, weight)
|
||||
}
|
||||
}
|
||||
|
||||
override fun putValue(point: Buffer<Double>, value: Number) {
|
||||
put(point[0], value.toDouble())
|
||||
}
|
||||
|
||||
/**
|
||||
* Put several items into a single bin
|
||||
*/
|
||||
override fun putMany(value: Double, count: Int, weight: Double) {
|
||||
(get(value) ?: createBin(value)).apply {
|
||||
increment(count.toLong(), weight)
|
||||
}
|
||||
}
|
||||
|
||||
override fun build(): UnivariateHistogram = UnivariateHistogramImpl(this@UnivariateHistogramSpace, bins)
|
||||
}
|
||||
|
||||
|
||||
public fun builder(): UnivariateHistogramBuilder = UnivariateHistogramBuilderImpl()
|
||||
|
||||
public fun produce(builder: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram =
|
||||
UnivariateHistogramBuilderImpl().apply(builder).build()
|
||||
|
||||
override fun add(
|
||||
a: UnivariateHistogram,
|
||||
b: UnivariateHistogram,
|
||||
): UnivariateHistogram {
|
||||
require(a.context == this) { "Histogram $a does not belong to this context" }
|
||||
require(b.context == this) { "Histogram $b does not belong to this context" }
|
||||
val bins = TreeMap<Double, UnivariateBin>().apply {
|
||||
(a.bins.map { it.def } union b.bins.map { it.def }).forEach { def ->
|
||||
val newBin = UnivariateBinValue(
|
||||
def,
|
||||
value = (a[def.position]?.value ?: 0.0) + (b[def.position]?.value ?: 0.0),
|
||||
standardDeviation = (a[def.position]?.standardDeviation
|
||||
?: 0.0) + (b[def.position]?.standardDeviation ?: 0.0)
|
||||
)
|
||||
}
|
||||
}
|
||||
return UnivariateHistogramImpl(this, bins)
|
||||
}
|
||||
|
||||
override fun multiply(a: UnivariateHistogram, k: Number): UnivariateHistogram {
|
||||
val bins = TreeMap<Double, UnivariateBin>().apply {
|
||||
a.bins.forEach { bin ->
|
||||
put(bin.position,
|
||||
UnivariateBinValue(
|
||||
bin.def,
|
||||
value = bin.value * k.toDouble(),
|
||||
standardDeviation = abs(bin.standardDeviation * k.toDouble())
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return UnivariateHistogramImpl(this, bins)
|
||||
}
|
||||
|
||||
override val zero: UnivariateHistogram = produce { }
|
||||
|
||||
public companion object {
|
||||
/**
|
||||
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
|
||||
*/
|
||||
public fun uniform(
|
||||
binSize: Double,
|
||||
start: Double = 0.0
|
||||
): UnivariateHistogramSpace = UnivariateHistogramSpace { value ->
|
||||
val center = start + binSize * Math.floor((value - start) / binSize + 0.5)
|
||||
UnivariateHistogramBinDefinition(center, binSize)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a histogram with custom cell borders
|
||||
*/
|
||||
public fun custom(borders: DoubleArray): UnivariateHistogramSpace {
|
||||
val sorted = borders.sortedArray()
|
||||
|
||||
return UnivariateHistogramSpace { value ->
|
||||
when {
|
||||
value < sorted.first() -> UnivariateHistogramBinDefinition(
|
||||
Double.NEGATIVE_INFINITY,
|
||||
Double.MAX_VALUE
|
||||
)
|
||||
|
||||
value > sorted.last() -> UnivariateHistogramBinDefinition(
|
||||
Double.POSITIVE_INFINITY,
|
||||
Double.MAX_VALUE
|
||||
)
|
||||
|
||||
else -> {
|
||||
val index = sorted.indices.first { value > sorted[it] }
|
||||
val left = sorted[index]
|
||||
val right = sorted[index + 1]
|
||||
UnivariateHistogramBinDefinition((left + right) / 2, (right - left))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user