WIP histograms refactor

This commit is contained in:
Alexander Nozik 2021-02-11 11:18:16 +03:00
parent a384b323c3
commit 2fdba53911
10 changed files with 298 additions and 182 deletions

View File

@ -1,4 +1,10 @@
plugins { id("ru.mipt.npm.mpp") }
plugins {
id("ru.mipt.npm.mpp")
}
kscience {
useAtomic()
}
kotlin.sourceSets {
commonMain {
@ -6,8 +12,8 @@ kotlin.sourceSets {
api(project(":kmath-core"))
}
}
commonTest{
dependencies{
commonTest {
dependencies {
implementation(project(":kmath-for-real"))
}
}

View File

@ -1,20 +1,47 @@
package kscience.kmath.histogram
import kotlinx.atomicfu.atomic
import kotlinx.atomicfu.getAndUpdate
import kscience.kmath.operations.Space
/*
* Common representation for atomic counters
* TODO replace with atomics
*/
public expect class LongCounter() {
public fun decrement()
public fun increment()
public fun reset()
public fun sum(): Long
public fun add(l: Long)
public interface Counter<T : Any> {
public fun add(delta: T)
public val value: T
}
public expect class DoubleCounter() {
public fun reset()
public fun sum(): Double
public fun add(d: Double)
public class IntCounter : Counter<Int> {
private val innerValue = atomic(0)
override fun add(delta: Int) {
innerValue += delta
}
override val value: Int get() = innerValue.value
}
public class LongCounter : Counter<Long> {
private val innerValue = atomic(0L)
override fun add(delta: Long) {
innerValue += delta
}
override val value: Long get() = innerValue.value
}
public class ObjectCounter<T : Any>(public val space: Space<T>) : Counter<T> {
private val innerValue = atomic(space.zero)
override fun add(delta: T) {
innerValue.getAndUpdate { space.run { it + delta } }
}
override val value: T get() = innerValue.value
}

View File

@ -6,7 +6,7 @@ import kscience.kmath.structures.ArrayBuffer
import kscience.kmath.structures.RealBuffer
/**
* The bin in the histogram. The histogram is by definition always done in the real space
* The binned data element. Could be a histogram bin with a number of counts or an artificial construct
*/
public interface Bin<T : Any> : Domain<T> {
/**
@ -17,7 +17,7 @@ public interface Bin<T : Any> : Domain<T> {
public val center: Point<T>
}
public interface Histogram<T : Any, out B : Bin<T>> : Iterable<B> {
public interface Histogram<T : Any, out B : Bin<T>> {
/**
* Find existing bin, corresponding to given coordinates
*/
@ -27,9 +27,11 @@ public interface Histogram<T : Any, out B : Bin<T>> : Iterable<B> {
* Dimension of the histogram
*/
public val dimension: Int
public val bins: Collection<B>
}
public interface MutableHistogram<T : Any, out B : Bin<T>> : Histogram<T, B> {
public interface HistogramBuilder<T : Any, out B : Bin<T>> : Histogram<T, B> {
/**
* Increment appropriate bin
@ -39,16 +41,16 @@ public interface MutableHistogram<T : Any, out B : Bin<T>> : Histogram<T, B> {
public fun put(point: Point<out T>): Unit = putWithWeight(point, 1.0)
}
public fun <T : Any> MutableHistogram<T, *>.put(vararg point: T): Unit = put(ArrayBuffer(point))
public fun <T : Any> HistogramBuilder<T, *>.put(vararg point: T): Unit = put(ArrayBuffer(point))
public fun MutableHistogram<Double, *>.put(vararg point: Number): Unit =
public fun HistogramBuilder<Double, *>.put(vararg point: Number): Unit =
put(RealBuffer(point.map { it.toDouble() }.toDoubleArray()))
public fun MutableHistogram<Double, *>.put(vararg point: Double): Unit = put(RealBuffer(point))
public fun <T : Any> MutableHistogram<T, *>.fill(sequence: Iterable<Point<T>>): Unit = sequence.forEach { put(it) }
public fun HistogramBuilder<Double, *>.put(vararg point: Double): Unit = put(RealBuffer(point))
public fun <T : Any> HistogramBuilder<T, *>.fill(sequence: Iterable<Point<T>>): Unit = sequence.forEach { put(it) }
/**
* Pass a sequence builder into histogram
*/
public fun <T : Any> MutableHistogram<T, *>.fill(block: suspend SequenceScope<Point<T>>.() -> Unit): Unit =
public fun <T : Any> HistogramBuilder<T, *>.fill(block: suspend SequenceScope<Point<T>>.() -> Unit): Unit =
fill(sequence(block).asIterable())

View File

@ -43,7 +43,7 @@ public class RealHistogram(
private val lower: Buffer<Double>,
private val upper: Buffer<Double>,
private val binNums: IntArray = IntArray(lower.size) { 20 },
) : MutableHistogram<Double, MultivariateBin<Double>> {
) : HistogramBuilder<Double, MultivariateBin<Double>> {
private val strides = DefaultStrides(IntArray(binNums.size) { binNums[it] + 2 })
private val counts: NDStructure<LongCounter> = NDStructure.auto(strides) { LongCounter() }
private val values: NDStructure<DoubleCounter> = NDStructure.auto(strides) { DoubleCounter() }
@ -88,11 +88,12 @@ public class RealHistogram(
return MultivariateBinDefinition(RealBufferFieldOperations, center, binSize)
}
public fun getBinDefinition(point: Buffer<out Double>): MultivariateBinDefinition<Double> = getBinDefinition(getIndex(point))
public fun getBinDefinition(point: Buffer<out Double>): MultivariateBinDefinition<Double> =
getBinDefinition(getIndex(point))
public override operator fun get(point: Buffer<out Double>): MultivariateBin<Double>? {
val index = getIndex(point)
return MultivariateBin(getBinDefinition(index), getCount(index),getValue(index))
return MultivariateBin(getBinDefinition(index), getCount(index), getValue(index))
}
// fun put(point: Point<out Double>){
@ -106,10 +107,10 @@ public class RealHistogram(
values[index].add(weight)
}
public override operator fun iterator(): Iterator<MultivariateBin<Double>> =
strides.indices().map { index->
override val bins: Collection<MultivariateBin<Double>>
get() = strides.indices().map { index ->
MultivariateBin(getBinDefinition(index), counts[index].sum(), values[index].sum())
}.iterator()
}.toList()
/**
* NDStructure containing number of events in bins without weights

View File

@ -16,7 +16,7 @@ internal class MultivariateHistogramTest {
(-1.0..1.0)
)
histogram.put(0.55, 0.55)
val bin = histogram.find { it.value.toInt() > 0 } ?: fail()
val bin = histogram.bins.find { it.value.toInt() > 0 } ?: fail()
assertTrue { bin.contains(RealVector(0.55, 0.55)) }
assertTrue { bin.contains(RealVector(0.6, 0.5)) }
assertFalse { bin.contains(RealVector(-0.55, 0.55)) }
@ -40,6 +40,6 @@ internal class MultivariateHistogramTest {
yield(RealVector(nextDouble(), nextDouble(), nextDouble()))
}
}
assertEquals(n, histogram.sumBy { it.value.toInt() })
assertEquals(n, histogram.bins.sumBy { it.value.toInt() })
}
}

View File

@ -1,37 +0,0 @@
package kscience.kmath.histogram
public actual class LongCounter {
private var sum: Long = 0L
public actual fun decrement() {
sum--
}
public actual fun increment() {
sum++
}
public actual fun reset() {
sum = 0
}
public actual fun sum(): Long = sum
public actual fun add(l: Long) {
sum += l
}
}
public actual class DoubleCounter {
private var sum: Double = 0.0
public actual fun reset() {
sum = 0.0
}
public actual fun sum(): Double = sum
public actual fun add(d: Double) {
sum += d
}
}

View File

@ -0,0 +1,33 @@
package kscience.kmath.histogram
/**
* Univariate histogram with log(n) bin search speed
*/
//private abstract class AbstractUnivariateHistogram<B: UnivariateBin>{
//
// public abstract val bins: TreeMap<Double, B>
//
// public open operator fun get(value: Double): B? {
// // check ceiling entry and return it if it is what needed
// val ceil = bins.ceilingEntry(value)?.value
// if (ceil != null && value in ceil) return ceil
// //check floor entry
// val floor = bins.floorEntry(value)?.value
// if (floor != null && value in floor) return floor
// //neither is valid, not found
// return null
// }
// public override operator fun get(point: Buffer<out Double>): B? = get(point[0])
//
// public override val dimension: Int get() = 1
//
// public override operator fun iterator(): Iterator<B> = bins.values.iterator()
//
// public companion object {
// }
//}

View File

@ -1,7 +0,0 @@
package kscience.kmath.histogram
import java.util.concurrent.atomic.DoubleAdder
import java.util.concurrent.atomic.LongAdder
public actual typealias LongCounter = LongAdder
public actual typealias DoubleCounter = DoubleAdder

View File

@ -6,71 +6,47 @@ import kscience.kmath.operations.SpaceElement
import kscience.kmath.structures.Buffer
import kscience.kmath.structures.asBuffer
import kscience.kmath.structures.asSequence
import java.util.*
import kotlin.math.floor
//TODO move to common
public data class UnivariateHistogramBinDefinition(
val position: Double,
val size: Double,
) : Comparable<UnivariateHistogramBinDefinition> {
override fun compareTo(other: UnivariateHistogramBinDefinition): Int = this.position.compareTo(other.position)
}
public class UnivariateBin(
public val position: Double,
public val size: Double,
) : Bin<Double> {
//internal mutation operations
internal val counter: LongCounter = LongCounter()
internal val weightCounter: DoubleCounter = DoubleCounter()
public interface UnivariateBin : Bin<Double> {
public val def: UnivariateHistogramBinDefinition
/**
* The precise number of events ignoring weighting
*/
public val count: Long get() = counter.sum()
public val position: Double get() = def.position
public val size: Double get() = def.size
/**
* The value of histogram including weighting
*/
public override val value: Double get() = weightCounter.sum()
public override val value: Double
/**
* Standard deviation of the bin value. Zero if not applicable
*/
public val standardDeviation: Double
public override val center: Point<Double> get() = doubleArrayOf(position).asBuffer()
public override val dimension: Int get() = 1
public operator fun contains(value: Double): Boolean = value in (position - size / 2)..(position + size / 2)
public override fun contains(point: Buffer<Double>): Boolean = contains(point[0])
}
/**
* Univariate histogram with log(n) bin search speed
*/
public operator fun UnivariateBin.contains(value: Double): Boolean =
value in (position - size / 2)..(position + size / 2)
@OptIn(UnstableKMathAPI::class)
public abstract class UnivariateHistogram protected constructor(
protected val bins: TreeMap<Double, UnivariateBin> = TreeMap(),
) : Histogram<Double, UnivariateBin>, SpaceElement<UnivariateHistogram, UnivariateHistogramSpace> {
public operator fun get(value: Double): UnivariateBin? {
// check ceiling entry and return it if it is what needed
val ceil = bins.ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = bins.floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
public interface UnivariateHistogram : Histogram<Double, UnivariateBin>,
SpaceElement<UnivariateHistogram, UnivariateHistogramSpace> {
public operator fun get(value: Double): UnivariateBin?
public override operator fun get(point: Buffer<out Double>): UnivariateBin? = get(point[0])
public override val dimension: Int get() = 1
public override operator fun iterator(): Iterator<UnivariateBin> = bins.values.iterator()
public companion object {
/**
* Build a histogram with a uniform binning with a start at [start] and a bin size of [binSize]
*/
public fun uniformBuilder(binSize: Double, start: Double = 0.0): UnivariateHistogramBuilder =
UnivariateHistogramSpace { value ->
val center = start + binSize * floor((value - start) / binSize + 0.5)
UnivariateBin(center, binSize)
}.builder()
/**
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
*/
@ -78,35 +54,7 @@ public abstract class UnivariateHistogram protected constructor(
binSize: Double,
start: Double = 0.0,
builder: UnivariateHistogramBuilder.() -> Unit,
): UnivariateHistogram = uniformBuilder(binSize, start).apply(builder)
/**
* Create a histogram with custom cell borders
*/
public fun customBuilder(borders: DoubleArray): UnivariateHistogramBuilder {
val sorted = borders.sortedArray()
return UnivariateHistogramSpace { value ->
when {
value < sorted.first() -> UnivariateBin(
Double.NEGATIVE_INFINITY,
Double.MAX_VALUE
)
value > sorted.last() -> UnivariateBin(
Double.POSITIVE_INFINITY,
Double.MAX_VALUE
)
else -> {
val index = sorted.indices.first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
UnivariateBin((left + right) / 2, (right - left))
}
}
}.builder()
}
): UnivariateHistogram = UnivariateHistogramSpace.uniform(binSize, start).produce(builder)
/**
* Build and fill a histogram with custom borders. Returns a read-only histogram.
@ -114,41 +62,24 @@ public abstract class UnivariateHistogram protected constructor(
public fun custom(
borders: DoubleArray,
builder: UnivariateHistogramBuilder.() -> Unit,
): UnivariateHistogram = customBuilder(borders).apply(builder)
): UnivariateHistogram = UnivariateHistogramSpace.custom(borders).produce(builder)
}
}
public class UnivariateHistogramBuilder internal constructor(
override val context: UnivariateHistogramSpace,
) : UnivariateHistogram(), MutableHistogram<Double, UnivariateBin> {
private fun createBin(value: Double): UnivariateBin = context.binFactory(value).also {
synchronized(this) { bins[it.position] = it }
}
public interface UnivariateHistogramBuilder {
/**
* Thread safe put operation
*/
public fun put(value: Double, weight: Double = 1.0) {
(get(value) ?: createBin(value)).apply {
counter.increment()
weightCounter.add(weight)
}
}
override fun putWithWeight(point: Buffer<out Double>, weight: Double) {
put(point[0], weight)
}
public fun put(value: Double, weight: Double = 1.0)
public fun putWithWeight(point: Buffer<out Double>, weight: Double)
/**
* Put several items into a single bin
*/
public fun putMany(value: Double, count: Int, weight: Double = count.toDouble()) {
(get(value) ?: createBin(value)).apply {
counter.add(count.toLong())
weightCounter.add(weight)
}
}
public fun putMany(value: Double, count: Int, weight: Double = count.toDouble())
public fun build(): UnivariateHistogram
}
@UnstableKMathAPI

View File

@ -1,25 +1,185 @@
package kscience.kmath.histogram
import kscience.kmath.operations.Space
import kscience.kmath.structures.Buffer
import java.util.*
import kotlin.math.abs
import kotlin.math.sqrt
public class UnivariateHistogramSpace(public val binFactory: (Double) -> UnivariateBin) : Space<UnivariateHistogram> {
private fun <B : UnivariateBin> TreeMap<Double, B>.getBin(value: Double): B? {
// check ceiling entry and return it if it is what needed
val ceil = ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
public fun builder(): UnivariateHistogramBuilder = UnivariateHistogramBuilder(this)
public fun produce(builder: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram = builder().apply(builder)
private class UnivariateHistogramImpl(
override val context: UnivariateHistogramSpace,
val binMap: TreeMap<Double, out UnivariateBin>,
) : UnivariateHistogram {
override fun get(value: Double): UnivariateBin? = binMap.getBin(value)
override val dimension: Int get() = 1
override val bins: Collection<UnivariateBin> get() = binMap.values
}
private class UnivariateBinCounter(
override val def: UnivariateHistogramBinDefinition,
) : UnivariateBin {
val counter: LongCounter = LongCounter()
val valueCounter: DoubleCounter = DoubleCounter()
/**
* The precise number of events ignoring weighting
*/
val count: Long get() = counter.sum()
override val standardDeviation: Double get() = sqrt(count.toDouble()) / count * value
/**
* The value of histogram including weighting
*/
override val value: Double get() = valueCounter.sum()
public fun increment(count: Long, value: Double) {
counter.add(count)
valueCounter.add(value)
}
}
private class UnivariateBinValue(
override val def: UnivariateHistogramBinDefinition,
override val value: Double,
override val standardDeviation: Double,
) : UnivariateBin
public class UnivariateHistogramSpace(
public val binFactory: (Double) -> UnivariateHistogramBinDefinition,
) : Space<UnivariateHistogram> {
private inner class UnivariateHistogramBuilderImpl : UnivariateHistogramBuilder {
val bins: TreeMap<Double, UnivariateBinCounter> = TreeMap()
fun get(value: Double): UnivariateBinCounter? = bins.getBin(value)
private fun createBin(value: Double): UnivariateBinCounter {
val binDefinition = binFactory(value)
val newBin = UnivariateBinCounter(binDefinition)
synchronized(this) { bins[binDefinition.position] = newBin }
return newBin
}
/**
* Thread safe put operation
*/
override fun put(value: Double, weight: Double) {
(get(value) ?: createBin(value)).apply {
increment(1, weight)
}
}
override fun putWithWeight(point: Buffer<out Double>, weight: Double) {
put(point[0], weight)
}
/**
* Put several items into a single bin
*/
override fun putMany(value: Double, count: Int, weight: Double) {
(get(value) ?: createBin(value)).apply {
increment(count.toLong(), weight)
}
}
override fun build(): UnivariateHistogram = UnivariateHistogramImpl(this@UnivariateHistogramSpace, bins)
}
public fun builder(): UnivariateHistogramBuilder = UnivariateHistogramBuilderImpl()
public fun produce(builder: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram =
UnivariateHistogramBuilderImpl().apply(builder).build()
override fun add(
a: UnivariateHistogram,
b: UnivariateHistogram,
): UnivariateHistogram {
require(a.context == this){"Histogram $a does not belong to this context"}
require(b.context == this){"Histogram $b does not belong to this context"}
TODO()
require(a.context == this) { "Histogram $a does not belong to this context" }
require(b.context == this) { "Histogram $b does not belong to this context" }
val bins = TreeMap<Double, UnivariateBin>().apply {
(a.bins.map { it.def } union b.bins.map { it.def }).forEach { def ->
val newBin = UnivariateBinValue(
def,
value = (a[def.position]?.value ?: 0.0) + (b[def.position]?.value ?: 0.0),
standardDeviation = (a[def.position]?.standardDeviation
?: 0.0) + (b[def.position]?.standardDeviation ?: 0.0)
)
}
}
return UnivariateHistogramImpl(this, bins)
}
override fun multiply(a: UnivariateHistogram, k: Number): UnivariateHistogram {
TODO("Not yet implemented")
val bins = TreeMap<Double, UnivariateBin>().apply {
a.bins.forEach { bin ->
put(bin.position,
UnivariateBinValue(
bin.def,
value = bin.value * k.toDouble(),
standardDeviation = abs(bin.standardDeviation * k.toDouble())
)
)
}
}
return UnivariateHistogramImpl(this, bins)
}
override val zero: UnivariateHistogram = produce { }
override val zero: UnivariateHistogram = produce { }
public companion object {
/**
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
*/
public fun uniform(
binSize: Double,
start: Double = 0.0
): UnivariateHistogramSpace = UnivariateHistogramSpace { value ->
val center = start + binSize * Math.floor((value - start) / binSize + 0.5)
UnivariateHistogramBinDefinition(center, binSize)
}
/**
* Create a histogram with custom cell borders
*/
public fun custom(borders: DoubleArray): UnivariateHistogramSpace {
val sorted = borders.sortedArray()
return UnivariateHistogramSpace { value ->
when {
value < sorted.first() -> UnivariateHistogramBinDefinition(
Double.NEGATIVE_INFINITY,
Double.MAX_VALUE
)
value > sorted.last() -> UnivariateHistogramBinDefinition(
Double.POSITIVE_INFINITY,
Double.MAX_VALUE
)
else -> {
val index = sorted.indices.first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
UnivariateHistogramBinDefinition((left + right) / 2, (right - left))
}
}
}
}
}
}