Histograms refactor

This commit is contained in:
Alexander Nozik 2021-02-14 22:44:05 +03:00
parent 987997cc02
commit ce18e85a0a
6 changed files with 211 additions and 259 deletions

View File

@ -1,8 +1,8 @@
package scietifik.kmath.histogram package scietifik.kmath.histogram
import kscience.kmath.histogram.RealHistogramSpace import kscience.kmath.histogram.RealHistogramSpace
import kscience.kmath.histogram.fill
import kscience.kmath.histogram.put import kscience.kmath.histogram.put
import kscience.kmath.operations.invoke
import kscience.kmath.real.RealVector import kscience.kmath.real.RealVector
import kscience.kmath.real.invoke import kscience.kmath.real.invoke
import kotlin.random.Random import kotlin.random.Random
@ -37,12 +37,44 @@ internal class MultivariateHistogramTest {
val n = 10000 val n = 10000
val histogram = hSpace.produce { val histogram = hSpace.produce {
fill {
repeat(n) { repeat(n) {
yield(RealVector(nextDouble(), nextDouble(), nextDouble())) put(nextDouble(), nextDouble(), nextDouble())
}
} }
} }
assertEquals(n, histogram.bins.sumBy { it.value.toInt() }) assertEquals(n, histogram.bins.sumBy { it.value.toInt() })
} }
@Test
fun testHistogramAlgebra() {
val hSpace = RealHistogramSpace.fromRanges(
(-1.0..1.0),
(-1.0..1.0),
(-1.0..1.0)
).invoke {
val random = Random(1234)
fun nextDouble() = random.nextDouble(-1.0, 1.0)
val n = 10000
val histogram1 = produce {
repeat(n) {
put(nextDouble(), nextDouble(), nextDouble())
}
}
val histogram2 = produce {
repeat(n) {
put(nextDouble(), nextDouble(), nextDouble())
}
}
val res = histogram1 - histogram2
assertTrue {
strides.indices().all { index ->
res.values[index] <= histogram1.values[index]
}
}
assertTrue {
res.bins.count() >= histogram1.bins.count()
}
assertEquals(0.0, res.bins.sumByDouble { it.value.toDouble() })
}
}
} }

View File

@ -1,33 +0,0 @@
package kscience.kmath.histogram
/**
* Univariate histogram with log(n) bin search speed
*/
//private abstract class AbstractUnivariateHistogram<B: UnivariateBin>{
//
// public abstract val bins: TreeMap<Double, B>
//
// public open operator fun get(value: Double): B? {
// // check ceiling entry and return it if it is what needed
// val ceil = bins.ceilingEntry(value)?.value
// if (ceil != null && value in ceil) return ceil
// //check floor entry
// val floor = bins.floorEntry(value)?.value
// if (floor != null && value in floor) return floor
// //neither is valid, not found
// return null
// }
// public override operator fun get(point: Buffer<out Double>): B? = get(point[0])
//
// public override val dimension: Int get() = 1
//
// public override operator fun iterator(): Iterator<B> = bins.values.iterator()
//
// public companion object {
// }
//}

View File

@ -0,0 +1,156 @@
package kscience.kmath.histogram
import kscience.kmath.domains.UnivariateDomain
import kscience.kmath.misc.UnstableKMathAPI
import kscience.kmath.operations.Space
import kscience.kmath.structures.Buffer
import java.util.*
import kotlin.math.abs
import kotlin.math.sqrt
internal fun <B: ClosedFloatingPointRange<Double>> TreeMap<Double, B>.getBin(value: Double): B? {
// check ceiling entry and return it if it is what needed
val ceil = ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
@UnstableKMathAPI
public class TreeHistogram(
override val context: TreeHistogramSpace,
private val binMap: TreeMap<Double, out UnivariateBin>,
) : UnivariateHistogram {
override fun get(value: Double): UnivariateBin? = binMap.getBin(value)
override val dimension: Int get() = 1
override val bins: Collection<UnivariateBin> get() = binMap.values
}
private class UnivariateBinValue(
override val domain: UnivariateDomain,
override val value: Double,
override val standardDeviation: Double,
) : UnivariateBin, ClosedFloatingPointRange<Double> by domain.range
@UnstableKMathAPI
public class TreeHistogramSpace(
public val binFactory: (Double) -> UnivariateDomain,
) : Space<UnivariateHistogram> {
private class BinCounter(val domain: UnivariateDomain, val counter: Counter<Double> = Counter.real()) :
ClosedFloatingPointRange<Double> by domain.range
public fun produce(builder: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram {
val bins: TreeMap<Double, BinCounter> = TreeMap()
val hBuilder = object : UnivariateHistogramBuilder {
fun get(value: Double): BinCounter? = bins.getBin(value)
fun createBin(value: Double): BinCounter {
val binDefinition = binFactory(value)
val newBin = BinCounter(binDefinition)
synchronized(this) { bins[binDefinition.center] = newBin }
return newBin
}
/**
* Thread safe put operation
*/
override fun putValue(at: Double, value: Double) {
(get(at) ?: createBin(at)).apply {
counter.add(value)
}
}
override fun putValue(point: Buffer<Double>, value: Number) {
put(point[0], value.toDouble())
}
}
hBuilder.apply(builder)
val resBins = TreeMap<Double, UnivariateBin>()
bins.forEach { key, binCounter ->
val count = binCounter.counter.value
resBins[key] = UnivariateBinValue(binCounter.domain, count, sqrt(count))
}
return TreeHistogram(this, resBins)
}
override fun add(
a: UnivariateHistogram,
b: UnivariateHistogram,
): UnivariateHistogram {
require(a.context == this) { "Histogram $a does not belong to this context" }
require(b.context == this) { "Histogram $b does not belong to this context" }
val bins = TreeMap<Double, UnivariateBin>().apply {
(a.bins.map { it.domain } union b.bins.map { it.domain }).forEach { def ->
val newBin = UnivariateBinValue(
def,
value = (a[def.center]?.value ?: 0.0) + (b[def.center]?.value ?: 0.0),
standardDeviation = (a[def.center]?.standardDeviation
?: 0.0) + (b[def.center]?.standardDeviation ?: 0.0)
)
}
}
return TreeHistogram(this, bins)
}
override fun multiply(a: UnivariateHistogram, k: Number): UnivariateHistogram {
val bins = TreeMap<Double, UnivariateBin>().apply {
a.bins.forEach { bin ->
put(bin.domain.center,
UnivariateBinValue(
bin.domain,
value = bin.value * k.toDouble(),
standardDeviation = abs(bin.standardDeviation * k.toDouble())
)
)
}
}
return TreeHistogram(this, bins)
}
override val zero: UnivariateHistogram = produce { }
public companion object {
/**
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
*/
public fun uniform(
binSize: Double,
start: Double = 0.0,
): TreeHistogramSpace = TreeHistogramSpace { value ->
val center = start + binSize * Math.floor((value - start) / binSize + 0.5)
UnivariateDomain((center - binSize / 2)..(center + binSize / 2))
}
/**
* Create a histogram with custom cell borders
*/
public fun custom(borders: DoubleArray): TreeHistogramSpace {
val sorted = borders.sortedArray()
return TreeHistogramSpace { value ->
when {
value < sorted.first() -> UnivariateDomain(
Double.NEGATIVE_INFINITY..sorted.first()
)
value > sorted.last() -> UnivariateDomain(
sorted.last()..Double.POSITIVE_INFINITY
)
else -> {
val index = sorted.indices.first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
UnivariateDomain(left..right)
}
}
}
}
}
}

View File

@ -1,24 +1,17 @@
package kscience.kmath.histogram package kscience.kmath.histogram
import kscience.kmath.linear.Point import kscience.kmath.domains.UnivariateDomain
import kscience.kmath.misc.UnstableKMathAPI import kscience.kmath.misc.UnstableKMathAPI
import kscience.kmath.operations.Space
import kscience.kmath.operations.SpaceElement import kscience.kmath.operations.SpaceElement
import kscience.kmath.structures.Buffer import kscience.kmath.structures.Buffer
import kscience.kmath.structures.asBuffer
import kscience.kmath.structures.asSequence import kscience.kmath.structures.asSequence
public data class UnivariateHistogramBinDefinition(
val position: Double,
val size: Double,
) : Comparable<UnivariateHistogramBinDefinition> {
override fun compareTo(other: UnivariateHistogramBinDefinition): Int = this.position.compareTo(other.position)
}
public interface UnivariateBin : Bin<Double> { public val UnivariateDomain.center: Double get() = (range.endInclusive - range.start) / 2
public val def: UnivariateHistogramBinDefinition
public val position: Double get() = def.position public interface UnivariateBin : Bin<Double>, ClosedFloatingPointRange<Double> {
public val size: Double get() = def.size public val domain: UnivariateDomain
/** /**
* The value of histogram including weighting * The value of histogram including weighting
@ -30,19 +23,15 @@ public interface UnivariateBin : Bin<Double> {
*/ */
public val standardDeviation: Double public val standardDeviation: Double
public val center: Point<Double> get() = doubleArrayOf(position).asBuffer()
public override val dimension: Int get() = 1 public override val dimension: Int get() = 1
public override fun contains(point: Buffer<Double>): Boolean = contains(point[0]) public override fun contains(point: Buffer<Double>): Boolean = point.size == 1 && contains(point[0])
} }
public operator fun UnivariateBin.contains(value: Double): Boolean = @UnstableKMathAPI
value in (position - size / 2)..(position + size / 2)
@OptIn(UnstableKMathAPI::class)
public interface UnivariateHistogram : Histogram<Double, UnivariateBin>, public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
SpaceElement<UnivariateHistogram, UnivariateHistogramSpace> { SpaceElement<UnivariateHistogram, Space<UnivariateHistogram>> {
public operator fun get(value: Double): UnivariateBin? public operator fun get(value: Double): UnivariateBin?
public override operator fun get(point: Buffer<Double>): UnivariateBin? = get(point[0]) public override operator fun get(point: Buffer<Double>): UnivariateBin? = get(point[0])
@ -54,7 +43,7 @@ public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
binSize: Double, binSize: Double,
start: Double = 0.0, start: Double = 0.0,
builder: UnivariateHistogramBuilder.() -> Unit, builder: UnivariateHistogramBuilder.() -> Unit,
): UnivariateHistogram = UnivariateHistogramSpace.uniform(binSize, start).produce(builder) ): UnivariateHistogram = TreeHistogramSpace.uniform(binSize, start).produce(builder)
/** /**
* Build and fill a histogram with custom borders. Returns a read-only histogram. * Build and fill a histogram with custom borders. Returns a read-only histogram.
@ -62,33 +51,26 @@ public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
public fun custom( public fun custom(
borders: DoubleArray, borders: DoubleArray,
builder: UnivariateHistogramBuilder.() -> Unit, builder: UnivariateHistogramBuilder.() -> Unit,
): UnivariateHistogram = UnivariateHistogramSpace.custom(borders).produce(builder) ): UnivariateHistogram = TreeHistogramSpace.custom(borders).produce(builder)
} }
} }
@UnstableKMathAPI
public interface UnivariateHistogramBuilder : HistogramBuilder<Double> { public interface UnivariateHistogramBuilder : HistogramBuilder<Double> {
/** /**
* Thread safe put operation * Thread safe put operation
*/ */
public fun put(value: Double, weight: Double = 1.0) public fun putValue(at: Double, value: Double = 1.0)
override fun putValue(point: Buffer<Double>, value: Number) override fun putValue(point: Buffer<Double>, value: Number)
/**
* Put several items into a single bin
*/
public fun putMany(value: Double, count: Int, weight: Double = count.toDouble())
public fun build(): UnivariateHistogram
} }
@UnstableKMathAPI @UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(items: Iterable<Double>): Unit = items.forEach(::put) public fun UnivariateHistogramBuilder.fill(items: Iterable<Double>): Unit = items.forEach(this::putValue)
@UnstableKMathAPI @UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(array: DoubleArray): Unit = array.forEach(::put) public fun UnivariateHistogramBuilder.fill(array: DoubleArray): Unit = array.forEach(this::putValue)
@UnstableKMathAPI @UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(buffer: Buffer<Double>): Unit = buffer.asSequence().forEach(::put) public fun UnivariateHistogramBuilder.fill(buffer: Buffer<Double>): Unit = buffer.asSequence().forEach(this::putValue)

View File

@ -1,185 +0,0 @@
package kscience.kmath.histogram
import kscience.kmath.operations.Space
import kscience.kmath.structures.Buffer
import java.util.*
import kotlin.math.abs
import kotlin.math.sqrt
private fun <B : UnivariateBin> TreeMap<Double, B>.getBin(value: Double): B? {
// check ceiling entry and return it if it is what needed
val ceil = ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
private class UnivariateHistogramImpl(
override val context: UnivariateHistogramSpace,
val binMap: TreeMap<Double, out UnivariateBin>,
) : UnivariateHistogram {
override fun get(value: Double): UnivariateBin? = binMap.getBin(value)
override val dimension: Int get() = 1
override val bins: Collection<UnivariateBin> get() = binMap.values
}
private class UnivariateBinCounter(
override val def: UnivariateHistogramBinDefinition,
) : UnivariateBin {
val counter: LongCounter = LongCounter()
val valueCounter: ObjectCounter<Double> = Counter.real()
/**
* The precise number of events ignoring weighting
*/
val count: Long get() = counter.value
override val standardDeviation: Double get() = sqrt(count.toDouble()) / count * value
/**
* The value of histogram including weighting
*/
override val value: Double get() = valueCounter.value
public fun increment(count: Long, value: Double) {
counter.add(count)
valueCounter.add(value)
}
}
private class UnivariateBinValue(
override val def: UnivariateHistogramBinDefinition,
override val value: Double,
override val standardDeviation: Double,
) : UnivariateBin
public class UnivariateHistogramSpace(
public val binFactory: (Double) -> UnivariateHistogramBinDefinition,
) : Space<UnivariateHistogram> {
private inner class UnivariateHistogramBuilderImpl : UnivariateHistogramBuilder {
val bins: TreeMap<Double, UnivariateBinCounter> = TreeMap()
fun get(value: Double): UnivariateBinCounter? = bins.getBin(value)
private fun createBin(value: Double): UnivariateBinCounter {
val binDefinition = binFactory(value)
val newBin = UnivariateBinCounter(binDefinition)
synchronized(this) { bins[binDefinition.position] = newBin }
return newBin
}
/**
* Thread safe put operation
*/
override fun put(value: Double, weight: Double) {
(get(value) ?: createBin(value)).apply {
increment(1, weight)
}
}
override fun putValue(point: Buffer<Double>, value: Number) {
put(point[0], value.toDouble())
}
/**
* Put several items into a single bin
*/
override fun putMany(value: Double, count: Int, weight: Double) {
(get(value) ?: createBin(value)).apply {
increment(count.toLong(), weight)
}
}
override fun build(): UnivariateHistogram = UnivariateHistogramImpl(this@UnivariateHistogramSpace, bins)
}
public fun builder(): UnivariateHistogramBuilder = UnivariateHistogramBuilderImpl()
public fun produce(builder: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram =
UnivariateHistogramBuilderImpl().apply(builder).build()
override fun add(
a: UnivariateHistogram,
b: UnivariateHistogram,
): UnivariateHistogram {
require(a.context == this) { "Histogram $a does not belong to this context" }
require(b.context == this) { "Histogram $b does not belong to this context" }
val bins = TreeMap<Double, UnivariateBin>().apply {
(a.bins.map { it.def } union b.bins.map { it.def }).forEach { def ->
val newBin = UnivariateBinValue(
def,
value = (a[def.position]?.value ?: 0.0) + (b[def.position]?.value ?: 0.0),
standardDeviation = (a[def.position]?.standardDeviation
?: 0.0) + (b[def.position]?.standardDeviation ?: 0.0)
)
}
}
return UnivariateHistogramImpl(this, bins)
}
override fun multiply(a: UnivariateHistogram, k: Number): UnivariateHistogram {
val bins = TreeMap<Double, UnivariateBin>().apply {
a.bins.forEach { bin ->
put(bin.position,
UnivariateBinValue(
bin.def,
value = bin.value * k.toDouble(),
standardDeviation = abs(bin.standardDeviation * k.toDouble())
)
)
}
}
return UnivariateHistogramImpl(this, bins)
}
override val zero: UnivariateHistogram = produce { }
public companion object {
/**
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
*/
public fun uniform(
binSize: Double,
start: Double = 0.0
): UnivariateHistogramSpace = UnivariateHistogramSpace { value ->
val center = start + binSize * Math.floor((value - start) / binSize + 0.5)
UnivariateHistogramBinDefinition(center, binSize)
}
/**
* Create a histogram with custom cell borders
*/
public fun custom(borders: DoubleArray): UnivariateHistogramSpace {
val sorted = borders.sortedArray()
return UnivariateHistogramSpace { value ->
when {
value < sorted.first() -> UnivariateHistogramBinDefinition(
Double.NEGATIVE_INFINITY,
Double.MAX_VALUE
)
value > sorted.last() -> UnivariateHistogramBinDefinition(
Double.POSITIVE_INFINITY,
Double.MAX_VALUE
)
else -> {
val index = sorted.indices.first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
UnivariateHistogramBinDefinition((left + right) / 2, (right - left))
}
}
}
}
}
}