From 5f6c133550e3ddda09b63bd1a455fdf270402894 Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Sat, 30 Jan 2021 11:24:34 +0300 Subject: [PATCH] Histogram refactor --- CHANGELOG.md | 1 + .../kscience/kmath/histogram/RealHistogram.kt | 48 +++---- .../kmath/histogram/UnivariateHistogram.kt | 123 +++++++++++++----- 3 files changed, 117 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e2aa8f51..68a1829d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ - Features moved to NDStructure and became transparent. - Capitalization of LUP in many names changed to Lup. - Refactored `NDStructure` algebra to be more simple, preferring under-the-hood conversion to explicit NDStructure types +- Refactor histograms. They are marked as prototype ### Deprecated diff --git a/kmath-histograms/src/commonMain/kotlin/kscience/kmath/histogram/RealHistogram.kt b/kmath-histograms/src/commonMain/kotlin/kscience/kmath/histogram/RealHistogram.kt index 085641106..11eb77735 100644 --- a/kmath-histograms/src/commonMain/kotlin/kscience/kmath/histogram/RealHistogram.kt +++ b/kmath-histograms/src/commonMain/kotlin/kscience/kmath/histogram/RealHistogram.kt @@ -8,10 +8,10 @@ import kscience.kmath.operations.invoke import kscience.kmath.structures.* import kotlin.math.floor -public data class BinDef>( +public data class BinDefinition>( public val space: SpaceOperations>, public val center: Point, - public val sizes: Point + public val sizes: Point, ) { public fun contains(vector: Point): Boolean { require(vector.size == center.size) { "Dimension mismatch for input vector. Expected ${center.size}, but found ${vector.size}" } @@ -22,14 +22,17 @@ public data class BinDef>( } -public class MultivariateBin>(public val def: BinDef, public override val value: Number) : Bin { +public class MultivariateBin>( + public val definition: BinDefinition, + public override val value: Number, +) : Bin { public override val dimension: Int - get() = def.center.size + get() = definition.center.size public override val center: Point - get() = def.center + get() = definition.center - public override operator fun contains(point: Point): Boolean = def.contains(point) + public override operator fun contains(point: Point): Boolean = definition.contains(point) } /** @@ -38,11 +41,11 @@ public class MultivariateBin>(public val def: BinDef, publi public class RealHistogram( private val lower: Buffer, private val upper: Buffer, - private val binNums: IntArray = IntArray(lower.size) { 20 } + private val binNums: IntArray = IntArray(lower.size) { 20 }, ) : MutableHistogram> { private val strides = DefaultStrides(IntArray(binNums.size) { binNums[it] + 2 }) - private val values: NDStructure = NDStructure.auto(strides) { LongCounter() } - private val weights: NDStructure = NDStructure.auto(strides) { DoubleCounter() } + private val counts: NDStructure = NDStructure.auto(strides) { LongCounter() } + private val values: NDStructure = NDStructure.auto(strides) { DoubleCounter() } public override val dimension: Int get() = lower.size private val binSize = RealBuffer(dimension) { (upper[it] - lower[it]) / binNums[it] } @@ -65,11 +68,11 @@ public class RealHistogram( private fun getIndex(point: Buffer): IntArray = IntArray(dimension) { getIndex(it, point[it]) } - private fun getValue(index: IntArray): Long = values[index].sum() + private fun getValue(index: IntArray): Long = counts[index].sum() public fun getValue(point: Buffer): Long = getValue(getIndex(point)) - private fun getDef(index: IntArray): BinDef { + private fun getBinDefinition(index: IntArray): BinDefinition { val center = index.mapIndexed { axis, i -> when (i) { 0 -> Double.NEGATIVE_INFINITY @@ -78,14 +81,14 @@ public class RealHistogram( } }.asBuffer() - return BinDef(RealBufferFieldOperations, center, binSize) + return BinDefinition(RealBufferFieldOperations, center, binSize) } - public fun getDef(point: Buffer): BinDef = getDef(getIndex(point)) + public fun getBinDefinition(point: Buffer): BinDefinition = getBinDefinition(getIndex(point)) public override operator fun get(point: Buffer): MultivariateBin? { val index = getIndex(point) - return MultivariateBin(getDef(index), getValue(index)) + return MultivariateBin(getBinDefinition(index), getValue(index)) } // fun put(point: Point){ @@ -95,23 +98,24 @@ public class RealHistogram( public override fun putWithWeight(point: Buffer, weight: Double) { val index = getIndex(point) - values[index].increment() - weights[index].add(weight) + counts[index].increment() + values[index].add(weight) } public override operator fun iterator(): Iterator> = - weights.elements().map { (index, value) -> MultivariateBin(getDef(index), value.sum()) } - .iterator() + values.elements().map { (index, value) -> + MultivariateBin(getBinDefinition(index), value.sum()) + }.iterator() /** - * Convert this histogram into NDStructure containing bin values but not bin descriptions + * NDStructure containing number of events in bins without weights */ - public fun values(): NDStructure = NDStructure.auto(values.shape) { values[it].sum() } + public fun counts(): NDStructure = NDStructure.auto(counts.shape) { counts[it].sum() } /** - * Sum of weights + * NDStructure containing values of bins including weights */ - public fun weights(): NDStructure = NDStructure.auto(weights.shape) { weights[it].sum() } + public fun values(): NDStructure = NDStructure.auto(values.shape) { values[it].sum() } public companion object { /** diff --git a/kmath-histograms/src/jvmMain/kotlin/kscience/kmath/histogram/UnivariateHistogram.kt b/kmath-histograms/src/jvmMain/kotlin/kscience/kmath/histogram/UnivariateHistogram.kt index d07c2ba01..049f61d5a 100644 --- a/kmath-histograms/src/jvmMain/kotlin/kscience/kmath/histogram/UnivariateHistogram.kt +++ b/kmath-histograms/src/jvmMain/kotlin/kscience/kmath/histogram/UnivariateHistogram.kt @@ -1,8 +1,10 @@ package kscience.kmath.histogram import kscience.kmath.linear.Point +import kscience.kmath.misc.UnstableKMathAPI import kscience.kmath.structures.Buffer import kscience.kmath.structures.asBuffer +import kscience.kmath.structures.asSequence import java.util.* import kotlin.math.floor @@ -11,29 +13,36 @@ import kotlin.math.floor public class UnivariateBin( public val position: Double, public val size: Double, - public val counter: LongCounter = LongCounter(), ) : Bin { - //TODO add weighting - public override val value: Number get() = counter.sum() + //internal mutation operations + internal val counter: LongCounter = LongCounter() + internal val weightCounter: DoubleCounter = DoubleCounter() + + /** + * The precise number of events ignoring weighting + */ + public val count: Long get() = counter.sum() + + /** + * The value of histogram including weighting + */ + public override val value: Double get() = weightCounter.sum() public override val center: Point get() = doubleArrayOf(position).asBuffer() public override val dimension: Int get() = 1 public operator fun contains(value: Double): Boolean = value in (position - size / 2)..(position + size / 2) public override fun contains(point: Buffer): Boolean = contains(point[0]) - internal operator fun inc(): UnivariateBin = this.also { counter.increment() } } /** * Univariate histogram with log(n) bin search speed */ -public class UnivariateHistogram private constructor( - private val factory: (Double) -> UnivariateBin, -) : MutableHistogram { +public abstract class UnivariateHistogram( + protected val bins: TreeMap = TreeMap(), +) : Histogram { - private val bins: TreeMap = TreeMap() - - private operator fun get(value: Double): UnivariateBin? { + public operator fun get(value: Double): UnivariateBin? { // check ceiling entry and return it if it is what needed val ceil = bins.ceilingEntry(value)?.value if (ceil != null && value in ceil) return ceil @@ -44,38 +53,38 @@ public class UnivariateHistogram private constructor( return null } - private fun createBin(value: Double): UnivariateBin = factory(value).also { - synchronized(this) { bins[it.position] = it } - } - public override operator fun get(point: Buffer): UnivariateBin? = get(point[0]) public override val dimension: Int get() = 1 public override operator fun iterator(): Iterator = bins.values.iterator() - /** - * Thread safe put operation - */ - public fun put(value: Double) { - (get(value) ?: createBin(value)).inc() - } - - override fun putWithWeight(point: Buffer, weight: Double) { - if (weight != 1.0) TODO("Implement weighting") - put(point[0]) - } - public companion object { - public fun uniform(binSize: Double, start: Double = 0.0): UnivariateHistogram = UnivariateHistogram { value -> - val center = start + binSize * floor((value - start) / binSize + 0.5) - UnivariateBin(center, binSize) - } + /** + * Build a histogram with a uniform binning with a start at [start] and a bin size of [binSize] + */ + public fun uniformBuilder(binSize: Double, start: Double = 0.0): UnivariateHistogramBuilder = + UnivariateHistogramBuilder { value -> + val center = start + binSize * floor((value - start) / binSize + 0.5) + UnivariateBin(center, binSize) + } - public fun custom(borders: DoubleArray): UnivariateHistogram { + /** + * Build and fill a [UnivariateHistogram]. Returns a read-only histogram. + */ + public fun uniform( + binSize: Double, + start: Double = 0.0, + builder: UnivariateHistogramBuilder.() -> Unit, + ): UnivariateHistogram = uniformBuilder(binSize, start).apply(builder) + + /** + * Create a histogram with custom cell borders + */ + public fun customBuilder(borders: DoubleArray): UnivariateHistogramBuilder { val sorted = borders.sortedArray() - return UnivariateHistogram { value -> + return UnivariateHistogramBuilder { value -> when { value < sorted.first() -> UnivariateBin( Double.NEGATIVE_INFINITY, @@ -96,7 +105,55 @@ public class UnivariateHistogram private constructor( } } } + + /** + * Build and fill a histogram with custom borders. Returns a read-only histogram. + */ + public fun custom( + borders: DoubleArray, + builder: UnivariateHistogramBuilder.() -> Unit, + ): UnivariateHistogram = customBuilder(borders).apply(builder) } } -public fun UnivariateHistogram.fill(sequence: Iterable): Unit = sequence.forEach(::put) +public class UnivariateHistogramBuilder( + private val factory: (Double) -> UnivariateBin, +) : UnivariateHistogram(), MutableHistogram { + + private fun createBin(value: Double): UnivariateBin = factory(value).also { + synchronized(this) { bins[it.position] = it } + } + + /** + * Thread safe put operation + */ + public fun put(value: Double, weight: Double = 1.0) { + (get(value) ?: createBin(value)).apply{ + counter.increment() + weightCounter.add(weight) + } + } + + override fun putWithWeight(point: Buffer, weight: Double) { + put(point[0], weight) + } + + /** + * Put several items into a single bin + */ + public fun putMany(value: Double, count: Int, weight: Double = count.toDouble()){ + (get(value) ?: createBin(value)).apply{ + counter.add(count.toLong()) + weightCounter.add(weight) + } + } +} + +@UnstableKMathAPI +public fun UnivariateHistogramBuilder.fill(items: Iterable): Unit = items.forEach(::put) + +@UnstableKMathAPI +public fun UnivariateHistogramBuilder.fill(array: DoubleArray): Unit = array.forEach(::put) + +@UnstableKMathAPI +public fun UnivariateHistogramBuilder.fill(buffer: Buffer): Unit = buffer.asSequence().forEach(::put) \ No newline at end of file