Refactor tree histogram

This commit is contained in:
Alexander Nozik 2022-04-10 15:29:46 +03:00
parent 6247e79884
commit 1295a407c3
No known key found for this signature in database
GPG Key ID: F7FCF2DD25C71357
9 changed files with 212 additions and 216 deletions

View File

@ -6,7 +6,7 @@ kotlin.code.style=official
kotlin.jupyter.add.scanner=false
kotlin.mpp.stability.nowarn=true
kotlin.native.ignoreDisabledTargets=true
kotlin.incremental.js.ir=true
#kotlin.incremental.js.ir=true
org.gradle.configureondemand=true
org.gradle.jvmargs=-XX:MaxMetaspaceSize=1G

View File

@ -105,6 +105,16 @@ public interface Buffer<out T> {
*/
public val Buffer<*>.indices: IntRange get() = 0 until size
public fun <T> Buffer<T>.first(): T {
require(size > 0) { "Can't get the first element of empty buffer" }
return get(0)
}
public fun <T> Buffer<T>.last(): T {
require(size > 0) { "Can't get the last element of empty buffer" }
return get(size - 1)
}
/**
* Immutable wrapper for [MutableBuffer].
*

View File

@ -43,6 +43,7 @@ public interface Histogram1DBuilder<in T : Any, V : Any> : HistogramBuilder<T, V
public fun putValue(at: T, value: V = defaultValue)
override fun putValue(point: Point<out T>, value: V) {
require(point.size == 1) { "Only points with single value could be used in Histogram1D" }
putValue(point[0], value)
}
}

View File

@ -43,7 +43,7 @@ public class HistogramND<T : Comparable<T>, D : Domain<T>, V : Any>(
public interface HistogramGroupND<T : Comparable<T>, D : Domain<T>, V : Any> :
Group<HistogramND<T, D, V>>, ScaleOperations<HistogramND<T, D, V>> {
public val shape: Shape
public val valueAlgebra: FieldOpsND<V, *> //= NDAlgebra.space(valueSpace, Buffer.Companion::boxing, *shape),
public val valueAlgebraND: FieldOpsND<V, *> //= NDAlgebra.space(valueSpace, Buffer.Companion::boxing, *shape),
/**
* Resolve index of the bin including given [point]. Return null if point is outside histogram area
@ -63,12 +63,12 @@ public interface HistogramGroupND<T : Comparable<T>, D : Domain<T>, V : Any> :
require(left.group == this && right.group == this) {
"A histogram belonging to a different group cannot be operated."
}
return HistogramND(this, valueAlgebra { left.values + right.values })
return HistogramND(this, valueAlgebraND { left.values + right.values })
}
override fun scale(a: HistogramND<T, D, V>, value: Double): HistogramND<T, D, V> {
require(a.group == this) { "A histogram belonging to a different group cannot be operated." }
return HistogramND(this, valueAlgebra { a.values * value })
return HistogramND(this, valueAlgebraND { a.values * value })
}
override val zero: HistogramND<T, D, V> get() = produce { }

View File

@ -126,11 +126,11 @@ public class UniformHistogram1DGroup<V : Any, A>(
}
public fun <V : Any, A> Histogram.Companion.uniform1D(
algebra: A,
valueAlgebra: A,
binSize: Double,
startPoint: Double = 0.0,
): UniformHistogram1DGroup<V, A> where A : Ring<V>, A : ScaleOperations<V> =
UniformHistogram1DGroup(algebra, binSize, startPoint)
UniformHistogram1DGroup(valueAlgebra, binSize, startPoint)
@UnstableKMathAPI
public fun <V : Any> UniformHistogram1DGroup<V, *>.produce(

View File

@ -24,7 +24,7 @@ public typealias HyperSquareBin<V> = DomainBin<Double, HyperSquareDomain, V>
* @param bufferFactory is an optional parameter used to optimize buffer production.
*/
public class UniformHistogramGroupND<V : Any, A : Field<V>>(
override val valueAlgebra: FieldOpsND<V, A>,
override val valueAlgebraND: FieldOpsND<V, A>,
private val lower: Buffer<Double>,
private val upper: Buffer<Double>,
private val binNums: IntArray = IntArray(lower.size) { 20 },
@ -84,11 +84,11 @@ public class UniformHistogramGroupND<V : Any, A : Field<V>>(
override fun produce(builder: HistogramBuilder<Double, V>.() -> Unit): HistogramND<Double, HyperSquareDomain, V> {
val ndCounter = StructureND.buffered(shape) { Counter.of(valueAlgebra.elementAlgebra) }
val ndCounter = StructureND.buffered(shape) { Counter.of(valueAlgebraND.elementAlgebra) }
val hBuilder = object : HistogramBuilder<Double, V> {
override val defaultValue: V get() = valueAlgebra.elementAlgebra.one
override val defaultValue: V get() = valueAlgebraND.elementAlgebra.one
override fun putValue(point: Point<out Double>, value: V) = with(valueAlgebra.elementAlgebra) {
override fun putValue(point: Point<out Double>, value: V) = with(valueAlgebraND.elementAlgebra) {
val index = getIndexOrNull(point)
ndCounter[index].add(value)
}
@ -112,11 +112,11 @@ public class UniformHistogramGroupND<V : Any, A : Field<V>>(
*```
*/
public fun <V : Any, A : Field<V>> Histogram.Companion.uniformNDFromRanges(
valueAlgebra: FieldOpsND<V, A>,
valueAlgebraND: FieldOpsND<V, A>,
vararg ranges: ClosedFloatingPointRange<Double>,
bufferFactory: BufferFactory<V> = Buffer.Companion::boxing,
): UniformHistogramGroupND<V, A> = UniformHistogramGroupND(
valueAlgebra,
valueAlgebraND,
ranges.map(ClosedFloatingPointRange<Double>::start).asBuffer(),
ranges.map(ClosedFloatingPointRange<Double>::endInclusive).asBuffer(),
bufferFactory = bufferFactory
@ -138,11 +138,11 @@ public fun Histogram.Companion.uniformDoubleNDFromRanges(
*```
*/
public fun <V : Any, A : Field<V>> Histogram.Companion.uniformNDFromRanges(
valueAlgebra: FieldOpsND<V, A>,
valueAlgebraND: FieldOpsND<V, A>,
vararg ranges: Pair<ClosedFloatingPointRange<Double>, Int>,
bufferFactory: BufferFactory<V> = Buffer.Companion::boxing,
): UniformHistogramGroupND<V, A> = UniformHistogramGroupND(
valueAlgebra,
valueAlgebraND,
ListBuffer(
ranges
.map(Pair<ClosedFloatingPointRange<Double>, Int>::first)

View File

@ -0,0 +1,179 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
@file:OptIn(UnstableKMathAPI::class)
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.DoubleDomain1D
import space.kscience.kmath.domains.center
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.misc.sorted
import space.kscience.kmath.operations.Group
import space.kscience.kmath.operations.Ring
import space.kscience.kmath.operations.ScaleOperations
import space.kscience.kmath.structures.Buffer
import space.kscience.kmath.structures.first
import space.kscience.kmath.structures.indices
import space.kscience.kmath.structures.last
import java.util.*
private fun <B : ClosedRange<Double>> TreeMap<Double, B>.getBin(value: Double): B? {
// check ceiling entry and return it if it is what needed
val ceil = ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
//public data class ValueAndError(val value: Double, val error: Double)
//
//public typealias WeightedBin1D = Bin1D<Double, ValueAndError>
/**
* A histogram based on a tree map of values
*/
public class TreeHistogram<V : Any>(
private val binMap: TreeMap<Double, Bin1D<Double, V>>,
) : Histogram1D<Double, V> {
override fun get(value: Double): Bin1D<Double, V>? = binMap.getBin(value)
override val bins: Collection<Bin1D<Double, V>> get() = binMap.values
}
/**
* A space for univariate histograms with variable bin borders based on a tree map
*/
public class TreeHistogramGroup<V : Any, A>(
public val valueAlgebra: A,
@PublishedApi internal val binFactory: (Double) -> DoubleDomain1D,
) : Group<TreeHistogram<V>>, ScaleOperations<TreeHistogram<V>> where A : Ring<V>, A : ScaleOperations<V> {
internal inner class DomainCounter(val domain: DoubleDomain1D, val counter: Counter<V> = Counter.of(valueAlgebra)) :
ClosedRange<Double> by domain.range
@PublishedApi
internal inner class TreeHistogramBuilder : Histogram1DBuilder<Double, V> {
override val defaultValue: V get() = valueAlgebra.one
private val bins: TreeMap<Double, DomainCounter> = TreeMap()
private fun createBin(value: Double): DomainCounter {
val binDefinition: DoubleDomain1D = binFactory(value)
val newBin = DomainCounter(binDefinition)
synchronized(this) {
bins[binDefinition.center] = newBin
}
return newBin
}
/**
* Thread safe put operation
*/
override fun putValue(at: Double, value: V) {
(bins.getBin(at) ?: createBin(at)).counter.add(value)
}
fun build(): TreeHistogram<V> {
val map = bins.mapValuesTo(TreeMap<Double, Bin1D<Double, V>>()) { (_, binCounter) ->
Bin1D(binCounter.domain, binCounter.counter.value)
}
return TreeHistogram(map)
}
}
public inline fun produce(block: Histogram1DBuilder<Double, V>.() -> Unit): TreeHistogram<V> =
TreeHistogramBuilder().apply(block).build()
override fun add(
left: TreeHistogram<V>,
right: TreeHistogram<V>,
): TreeHistogram<V> {
val bins = TreeMap<Double, Bin1D<Double, V>>().apply {
(left.bins.map { it.domain } union right.bins.map { it.domain }).forEach { def ->
put(
def.center,
Bin1D(
def,
with(valueAlgebra) {
(left[def.center]?.binValue ?: zero) + (right[def.center]?.binValue ?: zero)
}
)
)
}
}
return TreeHistogram(bins)
}
override fun scale(a: TreeHistogram<V>, value: Double): TreeHistogram<V> {
val bins = TreeMap<Double, Bin1D<Double, V>>().apply {
a.bins.forEach { bin ->
put(
bin.domain.center,
Bin1D(bin.domain, valueAlgebra.scale(bin.binValue, value))
)
}
}
return TreeHistogram(bins)
}
override fun TreeHistogram<V>.unaryMinus(): TreeHistogram<V> = this * (-1)
override val zero: TreeHistogram<V> = produce { }
}
///**
// * Build and fill a histogram with custom borders. Returns a read-only histogram.
// */
//public inline fun Histogram.custom(
// borders: DoubleArray,
// builder: Histogram1DBuilder<Double, Double>.() -> Unit,
//): TreeHistogram = custom(borders).fill(builder)
//
//
///**
// * Build and fill a [DoubleHistogram1D]. Returns a read-only histogram.
// */
//public fun uniform(
// binSize: Double,
// start: Double = 0.0,
//): TreeHistogramSpace = TreeHistogramSpace { value ->
// val center = start + binSize * floor((value - start) / binSize + 0.5)
// DoubleDomain1D((center - binSize / 2)..(center + binSize / 2))
//}
/**
* Create a histogram group with custom cell borders
*/
public fun <V : Any, A> Histogram.Companion.custom1D(
valueAlgebra: A,
borders: Buffer<Double>,
): TreeHistogramGroup<V, A> where A : Ring<V>, A : ScaleOperations<V> {
val sorted = borders.sorted()
return TreeHistogramGroup(valueAlgebra) { value ->
when {
value <= sorted.first() -> DoubleDomain1D(
Double.NEGATIVE_INFINITY..sorted.first()
)
value > sorted.last() -> DoubleDomain1D(
sorted.last()..Double.POSITIVE_INFINITY
)
else -> {
val index = sorted.indices.first { value <= sorted[it] }
val left = sorted[index - 1]
val right = sorted[index]
DoubleDomain1D(left..right)
}
}
}
}

View File

@ -1,199 +0,0 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
@file:OptIn(UnstableKMathAPI::class)
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.DoubleDomain1D
import space.kscience.kmath.domains.center
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.operations.Group
import space.kscience.kmath.operations.ScaleOperations
import space.kscience.kmath.structures.Buffer
import java.util.*
import kotlin.math.abs
import kotlin.math.floor
import kotlin.math.sqrt
private fun <B : ClosedRange<Double>> TreeMap<Double, B>.getBin(value: Double): B? {
// check ceiling entry and return it if it is what needed
val ceil = ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
public data class ValueAndError(val value: Double, val error: Double)
public typealias WeightedBin1D = Bin1D<Double, ValueAndError>
public class TreeHistogram(
private val binMap: TreeMap<Double, WeightedBin1D>,
) : Histogram1D<Double, ValueAndError> {
override fun get(value: Double): WeightedBin1D? = binMap.getBin(value)
override val bins: Collection<WeightedBin1D> get() = binMap.values
}
@PublishedApi
internal class TreeHistogramBuilder(val binFactory: (Double) -> DoubleDomain1D) : Histogram1DBuilder<Double, Double> {
override val defaultValue: Double get() = 1.0
internal class BinCounter(val domain: DoubleDomain1D, val counter: Counter<Double> = Counter.ofDouble()) :
ClosedRange<Double> by domain.range
private val bins: TreeMap<Double, BinCounter> = TreeMap()
fun get(value: Double): BinCounter? = bins.getBin(value)
fun createBin(value: Double): BinCounter {
val binDefinition: DoubleDomain1D = binFactory(value)
val newBin = BinCounter(binDefinition)
synchronized(this) {
bins[binDefinition.center] = newBin
}
return newBin
}
/**
* Thread safe put operation
*/
override fun putValue(at: Double, value: Double) {
(get(at) ?: createBin(at)).apply {
counter.add(value)
}
}
override fun putValue(point: Buffer<Double>, value: Double) {
require(point.size == 1) { "Only points with single value could be used in univariate histogram" }
putValue(point[0], value.toDouble())
}
fun build(): TreeHistogram {
val map = bins.mapValuesTo(TreeMap<Double, WeightedBin1D>()) { (_, binCounter) ->
val count: Double = binCounter.counter.value
WeightedBin1D(binCounter.domain, ValueAndError(count, sqrt(count)))
}
return TreeHistogram(map)
}
}
/**
* A space for univariate histograms with variable bin borders based on a tree map
*/
public class TreeHistogramSpace(
@PublishedApi internal val binFactory: (Double) -> DoubleDomain1D,
) : Group<TreeHistogram>, ScaleOperations<TreeHistogram> {
public inline fun fill(block: Histogram1DBuilder<Double, Double>.() -> Unit): TreeHistogram =
TreeHistogramBuilder(binFactory).apply(block).build()
override fun add(
left: TreeHistogram,
right: TreeHistogram,
): TreeHistogram {
// require(a.context == this) { "Histogram $a does not belong to this context" }
// require(b.context == this) { "Histogram $b does not belong to this context" }
val bins = TreeMap<Double, WeightedBin1D>().apply {
(left.bins.map { it.domain } union right.bins.map { it.domain }).forEach { def ->
put(
def.center,
WeightedBin1D(
def,
ValueAndError(
(left[def.center]?.binValue?.value ?: 0.0) + (right[def.center]?.binValue?.value ?: 0.0),
(left[def.center]?.binValue?.error ?: 0.0) + (right[def.center]?.binValue?.error ?: 0.0)
)
)
)
}
}
return TreeHistogram(bins)
}
override fun scale(a: TreeHistogram, value: Double): TreeHistogram {
val bins = TreeMap<Double, WeightedBin1D>().apply {
a.bins.forEach { bin ->
put(
bin.domain.center,
WeightedBin1D(
bin.domain,
ValueAndError(
bin.binValue.value * value,
abs(bin.binValue.error * value)
)
)
)
}
}
return TreeHistogram(bins)
}
override fun TreeHistogram.unaryMinus(): TreeHistogram = this * (-1)
override val zero: TreeHistogram by lazy { fill { } }
public companion object {
/**
* Build and fill a [TreeHistogram]. Returns a read-only histogram.
*/
public inline fun uniform(
binSize: Double,
start: Double = 0.0,
builder: Histogram1DBuilder<Double, Double>.() -> Unit,
): TreeHistogram = uniform(binSize, start).fill(builder)
/**
* Build and fill a histogram with custom borders. Returns a read-only histogram.
*/
public inline fun custom(
borders: DoubleArray,
builder: Histogram1DBuilder<Double, Double>.() -> Unit,
): TreeHistogram = custom(borders).fill(builder)
/**
* Build and fill a [DoubleHistogram1D]. Returns a read-only histogram.
*/
public fun uniform(
binSize: Double,
start: Double = 0.0,
): TreeHistogramSpace = TreeHistogramSpace { value ->
val center = start + binSize * floor((value - start) / binSize + 0.5)
DoubleDomain1D((center - binSize / 2)..(center + binSize / 2))
}
/**
* Create a histogram with custom cell borders
*/
public fun custom(borders: DoubleArray): TreeHistogramSpace {
val sorted = borders.sortedArray()
return TreeHistogramSpace { value ->
when {
value < sorted.first() -> DoubleDomain1D(
Double.NEGATIVE_INFINITY..sorted.first()
)
value > sorted.last() -> DoubleDomain1D(
sorted.last()..Double.POSITIVE_INFINITY
)
else -> {
val index = sorted.indices.first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
DoubleDomain1D(left..right)
}
}
}
}
}
}

View File

@ -6,19 +6,24 @@
package space.kscience.kmath.histogram
import org.junit.jupiter.api.Test
import space.kscience.kmath.operations.DoubleField
import space.kscience.kmath.real.step
import kotlin.random.Random
import kotlin.test.assertEquals
import kotlin.test.assertTrue
class TreeHistogramTest {
@Test
fun normalFill() {
val histogram = TreeHistogramSpace.uniform(0.1) {
val random = Random(123)
val histogram = Histogram.custom1D(DoubleField, 0.0..1.0 step 0.1).produce {
repeat(100_000) {
putValue(Random.nextDouble())
putValue(random.nextDouble())
}
}
assertTrue { histogram.bins.count() > 10 }
assertTrue { histogram.bins.count() > 8}
assertEquals(100_000, histogram.bins.sumOf { it.binValue }.toInt())
}
}