Histograms refactor

This commit is contained in:
Alexander Nozik 2021-02-14 22:44:05 +03:00
parent 987997cc02
commit ce18e85a0a
6 changed files with 211 additions and 259 deletions

View File

@ -77,7 +77,7 @@ public class RealHistogramSpace(
return IndexedHistogram(this, values)
}
public companion object{
public companion object {
/**
* Use it like
* ```

View File

@ -1,8 +1,8 @@
package scietifik.kmath.histogram
import kscience.kmath.histogram.RealHistogramSpace
import kscience.kmath.histogram.fill
import kscience.kmath.histogram.put
import kscience.kmath.operations.invoke
import kscience.kmath.real.RealVector
import kscience.kmath.real.invoke
import kotlin.random.Random
@ -37,12 +37,44 @@ internal class MultivariateHistogramTest {
val n = 10000
val histogram = hSpace.produce {
fill {
repeat(n) {
yield(RealVector(nextDouble(), nextDouble(), nextDouble()))
}
repeat(n) {
put(nextDouble(), nextDouble(), nextDouble())
}
}
assertEquals(n, histogram.bins.sumBy { it.value.toInt() })
}
@Test
fun testHistogramAlgebra() {
val hSpace = RealHistogramSpace.fromRanges(
(-1.0..1.0),
(-1.0..1.0),
(-1.0..1.0)
).invoke {
val random = Random(1234)
fun nextDouble() = random.nextDouble(-1.0, 1.0)
val n = 10000
val histogram1 = produce {
repeat(n) {
put(nextDouble(), nextDouble(), nextDouble())
}
}
val histogram2 = produce {
repeat(n) {
put(nextDouble(), nextDouble(), nextDouble())
}
}
val res = histogram1 - histogram2
assertTrue {
strides.indices().all { index ->
res.values[index] <= histogram1.values[index]
}
}
assertTrue {
res.bins.count() >= histogram1.bins.count()
}
assertEquals(0.0, res.bins.sumByDouble { it.value.toDouble() })
}
}
}

View File

@ -1,33 +0,0 @@
package kscience.kmath.histogram
/**
* Univariate histogram with log(n) bin search speed
*/
//private abstract class AbstractUnivariateHistogram<B: UnivariateBin>{
//
// public abstract val bins: TreeMap<Double, B>
//
// public open operator fun get(value: Double): B? {
// // check ceiling entry and return it if it is what needed
// val ceil = bins.ceilingEntry(value)?.value
// if (ceil != null && value in ceil) return ceil
// //check floor entry
// val floor = bins.floorEntry(value)?.value
// if (floor != null && value in floor) return floor
// //neither is valid, not found
// return null
// }
// public override operator fun get(point: Buffer<out Double>): B? = get(point[0])
//
// public override val dimension: Int get() = 1
//
// public override operator fun iterator(): Iterator<B> = bins.values.iterator()
//
// public companion object {
// }
//}

View File

@ -0,0 +1,156 @@
package kscience.kmath.histogram
import kscience.kmath.domains.UnivariateDomain
import kscience.kmath.misc.UnstableKMathAPI
import kscience.kmath.operations.Space
import kscience.kmath.structures.Buffer
import java.util.*
import kotlin.math.abs
import kotlin.math.sqrt
internal fun <B: ClosedFloatingPointRange<Double>> TreeMap<Double, B>.getBin(value: Double): B? {
// check ceiling entry and return it if it is what needed
val ceil = ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
@UnstableKMathAPI
public class TreeHistogram(
override val context: TreeHistogramSpace,
private val binMap: TreeMap<Double, out UnivariateBin>,
) : UnivariateHistogram {
override fun get(value: Double): UnivariateBin? = binMap.getBin(value)
override val dimension: Int get() = 1
override val bins: Collection<UnivariateBin> get() = binMap.values
}
private class UnivariateBinValue(
override val domain: UnivariateDomain,
override val value: Double,
override val standardDeviation: Double,
) : UnivariateBin, ClosedFloatingPointRange<Double> by domain.range
@UnstableKMathAPI
public class TreeHistogramSpace(
public val binFactory: (Double) -> UnivariateDomain,
) : Space<UnivariateHistogram> {
private class BinCounter(val domain: UnivariateDomain, val counter: Counter<Double> = Counter.real()) :
ClosedFloatingPointRange<Double> by domain.range
public fun produce(builder: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram {
val bins: TreeMap<Double, BinCounter> = TreeMap()
val hBuilder = object : UnivariateHistogramBuilder {
fun get(value: Double): BinCounter? = bins.getBin(value)
fun createBin(value: Double): BinCounter {
val binDefinition = binFactory(value)
val newBin = BinCounter(binDefinition)
synchronized(this) { bins[binDefinition.center] = newBin }
return newBin
}
/**
* Thread safe put operation
*/
override fun putValue(at: Double, value: Double) {
(get(at) ?: createBin(at)).apply {
counter.add(value)
}
}
override fun putValue(point: Buffer<Double>, value: Number) {
put(point[0], value.toDouble())
}
}
hBuilder.apply(builder)
val resBins = TreeMap<Double, UnivariateBin>()
bins.forEach { key, binCounter ->
val count = binCounter.counter.value
resBins[key] = UnivariateBinValue(binCounter.domain, count, sqrt(count))
}
return TreeHistogram(this, resBins)
}
override fun add(
a: UnivariateHistogram,
b: UnivariateHistogram,
): UnivariateHistogram {
require(a.context == this) { "Histogram $a does not belong to this context" }
require(b.context == this) { "Histogram $b does not belong to this context" }
val bins = TreeMap<Double, UnivariateBin>().apply {
(a.bins.map { it.domain } union b.bins.map { it.domain }).forEach { def ->
val newBin = UnivariateBinValue(
def,
value = (a[def.center]?.value ?: 0.0) + (b[def.center]?.value ?: 0.0),
standardDeviation = (a[def.center]?.standardDeviation
?: 0.0) + (b[def.center]?.standardDeviation ?: 0.0)
)
}
}
return TreeHistogram(this, bins)
}
override fun multiply(a: UnivariateHistogram, k: Number): UnivariateHistogram {
val bins = TreeMap<Double, UnivariateBin>().apply {
a.bins.forEach { bin ->
put(bin.domain.center,
UnivariateBinValue(
bin.domain,
value = bin.value * k.toDouble(),
standardDeviation = abs(bin.standardDeviation * k.toDouble())
)
)
}
}
return TreeHistogram(this, bins)
}
override val zero: UnivariateHistogram = produce { }
public companion object {
/**
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
*/
public fun uniform(
binSize: Double,
start: Double = 0.0,
): TreeHistogramSpace = TreeHistogramSpace { value ->
val center = start + binSize * Math.floor((value - start) / binSize + 0.5)
UnivariateDomain((center - binSize / 2)..(center + binSize / 2))
}
/**
* Create a histogram with custom cell borders
*/
public fun custom(borders: DoubleArray): TreeHistogramSpace {
val sorted = borders.sortedArray()
return TreeHistogramSpace { value ->
when {
value < sorted.first() -> UnivariateDomain(
Double.NEGATIVE_INFINITY..sorted.first()
)
value > sorted.last() -> UnivariateDomain(
sorted.last()..Double.POSITIVE_INFINITY
)
else -> {
val index = sorted.indices.first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
UnivariateDomain(left..right)
}
}
}
}
}
}

View File

@ -1,24 +1,17 @@
package kscience.kmath.histogram
import kscience.kmath.linear.Point
import kscience.kmath.domains.UnivariateDomain
import kscience.kmath.misc.UnstableKMathAPI
import kscience.kmath.operations.Space
import kscience.kmath.operations.SpaceElement
import kscience.kmath.structures.Buffer
import kscience.kmath.structures.asBuffer
import kscience.kmath.structures.asSequence
public data class UnivariateHistogramBinDefinition(
val position: Double,
val size: Double,
) : Comparable<UnivariateHistogramBinDefinition> {
override fun compareTo(other: UnivariateHistogramBinDefinition): Int = this.position.compareTo(other.position)
}
public interface UnivariateBin : Bin<Double> {
public val def: UnivariateHistogramBinDefinition
public val UnivariateDomain.center: Double get() = (range.endInclusive - range.start) / 2
public val position: Double get() = def.position
public val size: Double get() = def.size
public interface UnivariateBin : Bin<Double>, ClosedFloatingPointRange<Double> {
public val domain: UnivariateDomain
/**
* The value of histogram including weighting
@ -30,19 +23,15 @@ public interface UnivariateBin : Bin<Double> {
*/
public val standardDeviation: Double
public val center: Point<Double> get() = doubleArrayOf(position).asBuffer()
public override val dimension: Int get() = 1
public override fun contains(point: Buffer<Double>): Boolean = contains(point[0])
public override fun contains(point: Buffer<Double>): Boolean = point.size == 1 && contains(point[0])
}
public operator fun UnivariateBin.contains(value: Double): Boolean =
value in (position - size / 2)..(position + size / 2)
@OptIn(UnstableKMathAPI::class)
@UnstableKMathAPI
public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
SpaceElement<UnivariateHistogram, UnivariateHistogramSpace> {
SpaceElement<UnivariateHistogram, Space<UnivariateHistogram>> {
public operator fun get(value: Double): UnivariateBin?
public override operator fun get(point: Buffer<Double>): UnivariateBin? = get(point[0])
@ -54,7 +43,7 @@ public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
binSize: Double,
start: Double = 0.0,
builder: UnivariateHistogramBuilder.() -> Unit,
): UnivariateHistogram = UnivariateHistogramSpace.uniform(binSize, start).produce(builder)
): UnivariateHistogram = TreeHistogramSpace.uniform(binSize, start).produce(builder)
/**
* Build and fill a histogram with custom borders. Returns a read-only histogram.
@ -62,33 +51,26 @@ public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
public fun custom(
borders: DoubleArray,
builder: UnivariateHistogramBuilder.() -> Unit,
): UnivariateHistogram = UnivariateHistogramSpace.custom(borders).produce(builder)
): UnivariateHistogram = TreeHistogramSpace.custom(borders).produce(builder)
}
}
public interface UnivariateHistogramBuilder: HistogramBuilder<Double> {
@UnstableKMathAPI
public interface UnivariateHistogramBuilder : HistogramBuilder<Double> {
/**
* Thread safe put operation
*/
public fun put(value: Double, weight: Double = 1.0)
public fun putValue(at: Double, value: Double = 1.0)
override fun putValue(point: Buffer<Double>, value: Number)
/**
* Put several items into a single bin
*/
public fun putMany(value: Double, count: Int, weight: Double = count.toDouble())
public fun build(): UnivariateHistogram
}
@UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(items: Iterable<Double>): Unit = items.forEach(::put)
public fun UnivariateHistogramBuilder.fill(items: Iterable<Double>): Unit = items.forEach(this::putValue)
@UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(array: DoubleArray): Unit = array.forEach(::put)
public fun UnivariateHistogramBuilder.fill(array: DoubleArray): Unit = array.forEach(this::putValue)
@UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(buffer: Buffer<Double>): Unit = buffer.asSequence().forEach(::put)
public fun UnivariateHistogramBuilder.fill(buffer: Buffer<Double>): Unit = buffer.asSequence().forEach(this::putValue)

View File

@ -1,185 +0,0 @@
package kscience.kmath.histogram
import kscience.kmath.operations.Space
import kscience.kmath.structures.Buffer
import java.util.*
import kotlin.math.abs
import kotlin.math.sqrt
private fun <B : UnivariateBin> TreeMap<Double, B>.getBin(value: Double): B? {
// check ceiling entry and return it if it is what needed
val ceil = ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
private class UnivariateHistogramImpl(
override val context: UnivariateHistogramSpace,
val binMap: TreeMap<Double, out UnivariateBin>,
) : UnivariateHistogram {
override fun get(value: Double): UnivariateBin? = binMap.getBin(value)
override val dimension: Int get() = 1
override val bins: Collection<UnivariateBin> get() = binMap.values
}
private class UnivariateBinCounter(
override val def: UnivariateHistogramBinDefinition,
) : UnivariateBin {
val counter: LongCounter = LongCounter()
val valueCounter: ObjectCounter<Double> = Counter.real()
/**
* The precise number of events ignoring weighting
*/
val count: Long get() = counter.value
override val standardDeviation: Double get() = sqrt(count.toDouble()) / count * value
/**
* The value of histogram including weighting
*/
override val value: Double get() = valueCounter.value
public fun increment(count: Long, value: Double) {
counter.add(count)
valueCounter.add(value)
}
}
private class UnivariateBinValue(
override val def: UnivariateHistogramBinDefinition,
override val value: Double,
override val standardDeviation: Double,
) : UnivariateBin
public class UnivariateHistogramSpace(
public val binFactory: (Double) -> UnivariateHistogramBinDefinition,
) : Space<UnivariateHistogram> {
private inner class UnivariateHistogramBuilderImpl : UnivariateHistogramBuilder {
val bins: TreeMap<Double, UnivariateBinCounter> = TreeMap()
fun get(value: Double): UnivariateBinCounter? = bins.getBin(value)
private fun createBin(value: Double): UnivariateBinCounter {
val binDefinition = binFactory(value)
val newBin = UnivariateBinCounter(binDefinition)
synchronized(this) { bins[binDefinition.position] = newBin }
return newBin
}
/**
* Thread safe put operation
*/
override fun put(value: Double, weight: Double) {
(get(value) ?: createBin(value)).apply {
increment(1, weight)
}
}
override fun putValue(point: Buffer<Double>, value: Number) {
put(point[0], value.toDouble())
}
/**
* Put several items into a single bin
*/
override fun putMany(value: Double, count: Int, weight: Double) {
(get(value) ?: createBin(value)).apply {
increment(count.toLong(), weight)
}
}
override fun build(): UnivariateHistogram = UnivariateHistogramImpl(this@UnivariateHistogramSpace, bins)
}
public fun builder(): UnivariateHistogramBuilder = UnivariateHistogramBuilderImpl()
public fun produce(builder: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram =
UnivariateHistogramBuilderImpl().apply(builder).build()
override fun add(
a: UnivariateHistogram,
b: UnivariateHistogram,
): UnivariateHistogram {
require(a.context == this) { "Histogram $a does not belong to this context" }
require(b.context == this) { "Histogram $b does not belong to this context" }
val bins = TreeMap<Double, UnivariateBin>().apply {
(a.bins.map { it.def } union b.bins.map { it.def }).forEach { def ->
val newBin = UnivariateBinValue(
def,
value = (a[def.position]?.value ?: 0.0) + (b[def.position]?.value ?: 0.0),
standardDeviation = (a[def.position]?.standardDeviation
?: 0.0) + (b[def.position]?.standardDeviation ?: 0.0)
)
}
}
return UnivariateHistogramImpl(this, bins)
}
override fun multiply(a: UnivariateHistogram, k: Number): UnivariateHistogram {
val bins = TreeMap<Double, UnivariateBin>().apply {
a.bins.forEach { bin ->
put(bin.position,
UnivariateBinValue(
bin.def,
value = bin.value * k.toDouble(),
standardDeviation = abs(bin.standardDeviation * k.toDouble())
)
)
}
}
return UnivariateHistogramImpl(this, bins)
}
override val zero: UnivariateHistogram = produce { }
public companion object {
/**
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
*/
public fun uniform(
binSize: Double,
start: Double = 0.0
): UnivariateHistogramSpace = UnivariateHistogramSpace { value ->
val center = start + binSize * Math.floor((value - start) / binSize + 0.5)
UnivariateHistogramBinDefinition(center, binSize)
}
/**
* Create a histogram with custom cell borders
*/
public fun custom(borders: DoubleArray): UnivariateHistogramSpace {
val sorted = borders.sortedArray()
return UnivariateHistogramSpace { value ->
when {
value < sorted.first() -> UnivariateHistogramBinDefinition(
Double.NEGATIVE_INFINITY,
Double.MAX_VALUE
)
value > sorted.last() -> UnivariateHistogramBinDefinition(
Double.POSITIVE_INFINITY,
Double.MAX_VALUE
)
else -> {
val index = sorted.indices.first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
UnivariateHistogramBinDefinition((left + right) / 2, (right - left))
}
}
}
}
}
}