[WIP] Another histogram refactor

This commit is contained in:
Alexander Nozik 2022-03-22 22:17:20 +03:00
parent 39640498fc
commit 29369cd6d7
No known key found for this signature in database
GPG Key ID: F7FCF2DD25C71357
8 changed files with 159 additions and 133 deletions

View File

@ -9,16 +9,21 @@ import space.kscience.kmath.linear.Point
import space.kscience.kmath.misc.UnstableKMathAPI
@UnstableKMathAPI
public class UnivariateDomain(public val range: ClosedFloatingPointRange<Double>) : DoubleDomain {
public abstract class Domain1D<T : Comparable<T>>(public val range: ClosedRange<T>) : Domain<T> {
override val dimension: Int get() = 1
public operator fun contains(d: Double): Boolean = range.contains(d)
public operator fun contains(value: T): Boolean = range.contains(value)
override operator fun contains(point: Point<Double>): Boolean {
override operator fun contains(point: Point<T>): Boolean {
require(point.size == 0)
return contains(point[0])
}
}
@UnstableKMathAPI
public class DoubleDomain1D(
@Suppress("CanBeParameter") public val doubleRange: ClosedFloatingPointRange<Double>,
) : Domain1D<Double>(doubleRange), DoubleDomain {
override fun getLowerBound(num: Int): Double {
require(num == 0)
return range.start
@ -31,3 +36,7 @@ public class UnivariateDomain(public val range: ClosedFloatingPointRange<Double>
override fun volume(): Double = range.endInclusive - range.start
}
@UnstableKMathAPI
public val DoubleDomain1D.center: Double
get() = (range.endInclusive + range.start) / 2

View File

@ -6,6 +6,7 @@
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.HyperSquareDomain
import space.kscience.kmath.linear.Point
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.nd.*
import space.kscience.kmath.operations.DoubleField
@ -31,7 +32,7 @@ public class DoubleHistogramSpace(
public val dimension: Int get() = lower.size
override val shape: IntArray = IntArray(binNums.size) { binNums[it] + 2 }
override val histogramValueSpace: DoubleFieldND = DoubleField.ndAlgebra(*shape)
override val histogramValueAlgebra: DoubleFieldND = DoubleField.ndAlgebra(*shape)
private val binSize = DoubleBuffer(dimension) { (upper[it] - lower[it]) / binNums[it] }
@ -70,21 +71,20 @@ public class DoubleHistogramSpace(
}
@OptIn(UnstableKMathAPI::class)
public val Bin<Double>.domain: HyperSquareDomain
get() = (this as? DomainBin<Double>)?.domain as? HyperSquareDomain
?: error("Im a teapot. This is not my bin")
@OptIn(UnstableKMathAPI::class)
override fun produceBin(index: IntArray, value: Double): DomainBin<Double> {
override fun produceBin(index: IntArray, value: Double): DomainBin<Double, Double> {
val domain = getDomain(index)
return DomainBin(domain, value)
}
override fun produce(builder: HistogramBuilder<Double>.() -> Unit): IndexedHistogram<Double, Double> {
override fun produce(builder: HistogramBuilder<Double, Double>.() -> Unit): IndexedHistogram<Double, Double> {
val ndCounter = StructureND.auto(shape) { Counter.double() }
val hBuilder = HistogramBuilder<Double> { point, value ->
val index = getIndex(point)
ndCounter[index].add(value.toDouble())
val hBuilder = object : HistogramBuilder<Double, Double> {
override val defaultValue: Double get() = 1.0
override fun putValue(point: Point<out Double>, value: Double) {
val index = getIndex(point)
ndCounter[index].add(value)
}
}
hBuilder.apply(builder)
val values: BufferND<Double> = ndCounter.mapToBuffer { it.value }

View File

@ -13,14 +13,14 @@ import space.kscience.kmath.structures.asBuffer
/**
* The binned data element. Could be a histogram bin with a number of counts or an artificial construct.
*/
public interface Bin<in T : Any> : Domain<T> {
public interface Bin<in T : Any, out V> : Domain<T> {
/**
* The value of this bin.
*/
public val value: Number
public val value: V
}
public interface Histogram<in T : Any, out B : Bin<T>> {
public interface Histogram<in T : Any, out V, out B : Bin<T, V>> {
/**
* Find existing bin, corresponding to given coordinates
*/
@ -32,29 +32,38 @@ public interface Histogram<in T : Any, out B : Bin<T>> {
public val dimension: Int
public val bins: Iterable<B>
public companion object {
//A discoverability root
}
}
public fun interface HistogramBuilder<in T : Any> {
public interface HistogramBuilder<in T : Any, V : Any> {
/**
* Increment appropriate bin
* The default value increment for a bin
*/
public fun putValue(point: Point<out T>, value: Number)
public val defaultValue: V
/**
* Increment appropriate bin with given value
*/
public fun putValue(point: Point<out T>, value: V = defaultValue)
}
public fun <T : Any, B : Bin<T>> HistogramBuilder<T>.put(point: Point<out T>): Unit = putValue(point, 1.0)
public fun <T : Any> HistogramBuilder<T, *>.put(point: Point<out T>): Unit = putValue(point)
public fun <T : Any> HistogramBuilder<T>.put(vararg point: T): Unit = put(point.asBuffer())
public fun <T : Any> HistogramBuilder<T, *>.put(vararg point: T): Unit = put(point.asBuffer())
public fun HistogramBuilder<Double>.put(vararg point: Number): Unit =
public fun HistogramBuilder<Double, *>.put(vararg point: Number): Unit =
put(DoubleBuffer(point.map { it.toDouble() }.toDoubleArray()))
public fun HistogramBuilder<Double>.put(vararg point: Double): Unit = put(DoubleBuffer(point))
public fun <T : Any> HistogramBuilder<T>.fill(sequence: Iterable<Point<T>>): Unit = sequence.forEach { put(it) }
public fun HistogramBuilder<Double, *>.put(vararg point: Double): Unit = put(DoubleBuffer(point))
public fun <T : Any> HistogramBuilder<T, *>.fill(sequence: Iterable<Point<T>>): Unit = sequence.forEach { put(it) }
/**
* Pass a sequence builder into histogram
*/
public fun <T : Any> HistogramBuilder<T>.fill(block: suspend SequenceScope<Point<T>>.() -> Unit): Unit =
public fun <T : Any> HistogramBuilder<T, *>.fill(block: suspend SequenceScope<Point<T>>.() -> Unit): Unit =
fill(sequence(block).asIterable())

View File

@ -0,0 +1,56 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.Domain1D
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.operations.asSequence
import space.kscience.kmath.structures.Buffer
/**
* A univariate bin based on a range
*
* @property value The value of histogram including weighting
* @property standardDeviation Standard deviation of the bin value. Zero or negative if not applicable
*/
@UnstableKMathAPI
public class Bin1D<T : Comparable<T>, out V>(
public val domain: Domain1D<T>,
override val value: V,
) : Bin<T, V>, ClosedRange<T> by domain.range {
override val dimension: Int get() = 1
override fun contains(point: Buffer<T>): Boolean = point.size == 1 && contains(point[0])
}
@OptIn(UnstableKMathAPI::class)
public interface Histogram1D<T : Comparable<T>, V> : Histogram<T, V, Bin1D<T, V>> {
override val dimension: Int get() = 1
public operator fun get(value: T): Bin1D<T, V>?
override operator fun get(point: Buffer<T>): Bin1D<T, V>? = get(point[0])
}
@UnstableKMathAPI
public interface Histogram1DBuilder<in T : Any, V : Any> : HistogramBuilder<T, V> {
/**
* Thread safe put operation
*/
public fun putValue(at: T, value: V = defaultValue)
}
@UnstableKMathAPI
public fun Histogram1DBuilder<Double, *>.fill(items: Iterable<Double>): Unit =
items.forEach(this::putValue)
@UnstableKMathAPI
public fun Histogram1DBuilder<Double, *>.fill(array: DoubleArray): Unit =
array.forEach(this::putValue)
@UnstableKMathAPI
public fun Histogram1DBuilder<Double, *>.fill(buffer: Buffer<Double>): Unit =
buffer.asSequence().forEach(this::putValue)

View File

@ -18,24 +18,28 @@ import space.kscience.kmath.operations.invoke
/**
* A simple histogram bin based on domain
*/
public data class DomainBin<in T : Comparable<T>>(
public data class DomainBin<in T : Comparable<T>, out V>(
public val domain: Domain<T>,
override val value: Number,
) : Bin<T>, Domain<T> by domain
override val value: V,
) : Bin<T, V>, Domain<T> by domain
/**
* @param T the type of the argument space
* @param V the type of bin value
*/
public class IndexedHistogram<T : Comparable<T>, V : Any>(
public val histogramSpace: IndexedHistogramSpace<T, V>,
public val values: StructureND<V>,
) : Histogram<T, Bin<T>> {
) : Histogram<T, V, DomainBin<T, V>> {
override fun get(point: Point<T>): Bin<T>? {
override fun get(point: Point<T>): DomainBin<T, V>? {
val index = histogramSpace.getIndex(point) ?: return null
return histogramSpace.produceBin(index, values[index])
}
override val dimension: Int get() = histogramSpace.shape.size
override val bins: Iterable<Bin<T>>
override val bins: Iterable<DomainBin<T, V>>
get() = DefaultStrides(histogramSpace.shape).asSequence().map {
histogramSpace.produceBin(it, values[it])
}.asIterable()
@ -46,9 +50,8 @@ public class IndexedHistogram<T : Comparable<T>, V : Any>(
*/
public interface IndexedHistogramSpace<T : Comparable<T>, V : Any>
: Group<IndexedHistogram<T, V>>, ScaleOperations<IndexedHistogram<T, V>> {
//public val valueSpace: Space<V>
public val shape: Shape
public val histogramValueSpace: FieldND<V, *> //= NDAlgebra.space(valueSpace, Buffer.Companion::boxing, *shape),
public val histogramValueAlgebra: FieldND<V, *> //= NDAlgebra.space(valueSpace, Buffer.Companion::boxing, *shape),
/**
* Resolve index of the bin including given [point]
@ -60,19 +63,19 @@ public interface IndexedHistogramSpace<T : Comparable<T>, V : Any>
*/
public fun getDomain(index: IntArray): Domain<T>?
public fun produceBin(index: IntArray, value: V): Bin<T>
public fun produceBin(index: IntArray, value: V): DomainBin<T, V>
public fun produce(builder: HistogramBuilder<T>.() -> Unit): IndexedHistogram<T, V>
public fun produce(builder: HistogramBuilder<T, V>.() -> Unit): IndexedHistogram<T, V>
override fun add(left: IndexedHistogram<T, V>, right: IndexedHistogram<T, V>): IndexedHistogram<T, V> {
require(left.histogramSpace == this) { "Can't operate on a histogram produced by external space" }
require(right.histogramSpace == this) { "Can't operate on a histogram produced by external space" }
return IndexedHistogram(this, histogramValueSpace { left.values + right.values })
return IndexedHistogram(this, histogramValueAlgebra { left.values + right.values })
}
override fun scale(a: IndexedHistogram<T, V>, value: Double): IndexedHistogram<T, V> {
require(a.histogramSpace == this) { "Can't operate on a histogram produced by external space" }
return IndexedHistogram(this, histogramValueSpace { a.values * value })
return IndexedHistogram(this, histogramValueAlgebra { a.values * value })
}
override val zero: IndexedHistogram<T, V> get() = produce { }

View File

@ -0,0 +1,9 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.histogram
//class UniformDoubleHistogram1D: DoubleHistogram1D {
//}

View File

@ -1,59 +0,0 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.UnivariateDomain
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.operations.asSequence
import space.kscience.kmath.structures.Buffer
@UnstableKMathAPI
public val UnivariateDomain.center: Double
get() = (range.endInclusive + range.start) / 2
/**
* A univariate bin based on a range
*
* @property value The value of histogram including weighting
* @property standardDeviation Standard deviation of the bin value. Zero or negative if not applicable
*/
@UnstableKMathAPI
public class UnivariateBin(
public val domain: UnivariateDomain,
override val value: Double,
public val standardDeviation: Double,
) : Bin<Double>, ClosedFloatingPointRange<Double> by domain.range {
override val dimension: Int get() = 1
override fun contains(point: Buffer<Double>): Boolean = point.size == 1 && contains(point[0])
}
@OptIn(UnstableKMathAPI::class)
public interface UnivariateHistogram : Histogram<Double, UnivariateBin> {
public operator fun get(value: Double): UnivariateBin?
override operator fun get(point: Buffer<Double>): UnivariateBin? = get(point[0])
}
@UnstableKMathAPI
public interface UnivariateHistogramBuilder : HistogramBuilder<Double> {
/**
* Thread safe put operation
*/
public fun putValue(at: Double, value: Double = 1.0)
override fun putValue(point: Buffer<Double>, value: Number)
}
@UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(items: Iterable<Double>): Unit = items.forEach(this::putValue)
@UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(array: DoubleArray): Unit = array.forEach(this::putValue)
@UnstableKMathAPI
public fun UnivariateHistogramBuilder.fill(buffer: Buffer<Double>): Unit = buffer.asSequence().forEach(this::putValue)

View File

@ -5,7 +5,7 @@
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.UnivariateDomain
import space.kscience.kmath.domains.DoubleDomain1D
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.operations.Group
import space.kscience.kmath.operations.ScaleOperations
@ -15,7 +15,7 @@ import kotlin.math.abs
import kotlin.math.floor
import kotlin.math.sqrt
private fun <B : ClosedFloatingPointRange<Double>> TreeMap<Double, B>.getBin(value: Double): B? {
private fun <B : ClosedRange<Double>> TreeMap<Double, B>.getBin(value: Double): B? {
// check ceiling entry and return it if it is what needed
val ceil = ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
@ -28,19 +28,18 @@ private fun <B : ClosedFloatingPointRange<Double>> TreeMap<Double, B>.getBin(val
@UnstableKMathAPI
public class TreeHistogram(
private val binMap: TreeMap<Double, out UnivariateBin>,
) : UnivariateHistogram {
override fun get(value: Double): UnivariateBin? = binMap.getBin(value)
override val dimension: Int get() = 1
override val bins: Collection<UnivariateBin> get() = binMap.values
private val binMap: TreeMap<Double, out Bin1D<Double, Double>>,
) : Histogram1D<Double, Double> {
override fun get(value: Double): Bin1D<Double, Double>? = binMap.getBin(value)
override val bins: Collection<Bin1D<Double, Double>> get() = binMap.values
}
@OptIn(UnstableKMathAPI::class)
@PublishedApi
internal class TreeHistogramBuilder(val binFactory: (Double) -> UnivariateDomain) : UnivariateHistogramBuilder {
internal class TreeHistogramBuilder(val binFactory: (Double) -> DoubleDomain1D) : Histogram1DBuilder<Double, Double> {
internal class BinCounter(val domain: UnivariateDomain, val counter: Counter<Double> = Counter.double()) :
ClosedFloatingPointRange<Double> by domain.range
internal class BinCounter(val domain: DoubleDomain1D, val counter: Counter<Double> = Counter.double()) :
ClosedRange<Double> by domain.range
private val bins: TreeMap<Double, BinCounter> = TreeMap()
@ -64,15 +63,15 @@ internal class TreeHistogramBuilder(val binFactory: (Double) -> UnivariateDomain
}
}
override fun putValue(point: Buffer<Double>, value: Number) {
override fun putValue(point: Buffer<Double>, value: Double) {
require(point.size == 1) { "Only points with single value could be used in univariate histogram" }
putValue(point[0], value.toDouble())
}
fun build(): TreeHistogram {
val map = bins.mapValuesTo(TreeMap<Double, UnivariateBin>()) { (_, binCounter) ->
val map = bins.mapValuesTo(TreeMap<Double, Bin1D<Double,Double>>()) { (_, binCounter) ->
val count = binCounter.counter.value
UnivariateBin(binCounter.domain, count, sqrt(count))
Bin1D(binCounter.domain, count, sqrt(count))
}
return TreeHistogram(map)
}
@ -83,23 +82,23 @@ internal class TreeHistogramBuilder(val binFactory: (Double) -> UnivariateDomain
*/
@UnstableKMathAPI
public class TreeHistogramSpace(
@PublishedApi internal val binFactory: (Double) -> UnivariateDomain,
) : Group<UnivariateHistogram>, ScaleOperations<UnivariateHistogram> {
@PublishedApi internal val binFactory: (Double) -> DoubleDomain1D,
) : Group<Histogram1D<Double,Double>>, ScaleOperations<Histogram1D<Double,Double>> {
public inline fun fill(block: UnivariateHistogramBuilder.() -> Unit): UnivariateHistogram =
public inline fun fill(block: Histogram1DBuilder<Double,Double>.() -> Unit): Histogram1D<Double,Double> =
TreeHistogramBuilder(binFactory).apply(block).build()
override fun add(
left: UnivariateHistogram,
right: UnivariateHistogram,
): UnivariateHistogram {
left: Histogram1D<Double,Double>,
right: Histogram1D<Double,Double>,
): Histogram1D<Double,Double> {
// require(a.context == this) { "Histogram $a does not belong to this context" }
// require(b.context == this) { "Histogram $b does not belong to this context" }
val bins = TreeMap<Double, UnivariateBin>().apply {
val bins = TreeMap<Double, Bin1D<Double,Double>>().apply {
(left.bins.map { it.domain } union right.bins.map { it.domain }).forEach { def ->
put(
def.center,
UnivariateBin(
Bin1D(
def,
value = (left[def.center]?.value ?: 0.0) + (right[def.center]?.value ?: 0.0),
standardDeviation = (left[def.center]?.standardDeviation
@ -111,12 +110,12 @@ public class TreeHistogramSpace(
return TreeHistogram(bins)
}
override fun scale(a: UnivariateHistogram, value: Double): UnivariateHistogram {
val bins = TreeMap<Double, UnivariateBin>().apply {
override fun scale(a: Histogram1D<Double,Double>, value: Double): Histogram1D<Double,Double> {
val bins = TreeMap<Double, Bin1D<Double,Double>>().apply {
a.bins.forEach { bin ->
put(
bin.domain.center,
UnivariateBin(
Bin1D(
bin.domain,
value = bin.value * value,
standardDeviation = abs(bin.standardDeviation * value)
@ -128,38 +127,38 @@ public class TreeHistogramSpace(
return TreeHistogram(bins)
}
override fun UnivariateHistogram.unaryMinus(): UnivariateHistogram = this * (-1)
override fun Histogram1D<Double,Double>.unaryMinus(): Histogram1D<Double,Double> = this * (-1)
override val zero: UnivariateHistogram by lazy { fill { } }
override val zero: Histogram1D<Double,Double> by lazy { fill { } }
public companion object {
/**
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
* Build and fill a [DoubleHistogram1D]. Returns a read-only histogram.
*/
public inline fun uniform(
binSize: Double,
start: Double = 0.0,
builder: UnivariateHistogramBuilder.() -> Unit,
): UnivariateHistogram = uniform(binSize, start).fill(builder)
builder: Histogram1DBuilder<Double,Double>.() -> Unit,
): Histogram1D<Double,Double> = uniform(binSize, start).fill(builder)
/**
* Build and fill a histogram with custom borders. Returns a read-only histogram.
*/
public inline fun custom(
borders: DoubleArray,
builder: UnivariateHistogramBuilder.() -> Unit,
): UnivariateHistogram = custom(borders).fill(builder)
builder: Histogram1DBuilder<Double,Double>.() -> Unit,
): Histogram1D<Double,Double> = custom(borders).fill(builder)
/**
* Build and fill a [UnivariateHistogram]. Returns a read-only histogram.
* Build and fill a [DoubleHistogram1D]. Returns a read-only histogram.
*/
public fun uniform(
binSize: Double,
start: Double = 0.0,
): TreeHistogramSpace = TreeHistogramSpace { value ->
val center = start + binSize * floor((value - start) / binSize + 0.5)
UnivariateDomain((center - binSize / 2)..(center + binSize / 2))
DoubleDomain1D((center - binSize / 2)..(center + binSize / 2))
}
/**
@ -170,11 +169,11 @@ public class TreeHistogramSpace(
return TreeHistogramSpace { value ->
when {
value < sorted.first() -> UnivariateDomain(
value < sorted.first() -> DoubleDomain1D(
Double.NEGATIVE_INFINITY..sorted.first()
)
value > sorted.last() -> UnivariateDomain(
value > sorted.last() -> DoubleDomain1D(
sorted.last()..Double.POSITIVE_INFINITY
)
@ -182,7 +181,7 @@ public class TreeHistogramSpace(
val index = sorted.indices.first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
UnivariateDomain(left..right)
DoubleDomain1D(left..right)
}
}
}