Refactor multivariate histograms

This commit is contained in:
Alexander Nozik 2022-04-10 13:41:41 +03:00
parent 27a252b637
commit 6247e79884
No known key found for this signature in database
GPG Key ID: F7FCF2DD25C71357
12 changed files with 275 additions and 239 deletions

View File

@ -49,6 +49,7 @@
- Default Buffer and ND algebras are now Ops and lack neutral elements (0, 1) as well as algebra-level shapes.
- Tensor algebra takes read-only structures as input and inherits AlgebraND
- `UnivariateDistribution` renamed to `Distribution1D`
- Rework of histograms.
### Deprecated
- Specialized `DoubleBufferAlgebra`

View File

@ -6,6 +6,8 @@ kotlin.code.style=official
kotlin.jupyter.add.scanner=false
kotlin.mpp.stability.nowarn=true
kotlin.native.ignoreDisabledTargets=true
kotlin.incremental.js.ir=true
org.gradle.configureondemand=true
org.gradle.jvmargs=-XX:MaxMetaspaceSize=1G
org.gradle.parallel=true

View File

@ -194,7 +194,7 @@ public interface RingOpsND<T, out A : RingOps<T>> : RingOps<StructureND<T>>, Gro
override fun multiply(left: StructureND<T>, right: StructureND<T>): StructureND<T> =
zip(left, right) { aValue, bValue -> multiply(aValue, bValue) }
//TODO move to extensions after KEEP-176
//TODO move to extensions with context receivers
/**
* Multiplies an ND structure by an element of it.

View File

@ -32,18 +32,23 @@ public open class BufferND<out T>(
/**
* Transform structure to a new structure using provided [BufferFactory] and optimizing if argument is [BufferND]
*/
public inline fun <T, reified R : Any> StructureND<T>.mapToBuffer(
factory: BufferFactory<R> = Buffer.Companion::auto,
public inline fun <T, R : Any> StructureND<T>.mapToBuffer(
factory: BufferFactory<R>,
crossinline transform: (T) -> R,
): BufferND<R> {
return if (this is BufferND<T>)
BufferND(this.indices, factory.invoke(indices.linearSize) { transform(buffer[it]) })
else {
val strides = DefaultStrides(shape)
BufferND(strides, factory.invoke(strides.linearSize) { transform(get(strides.index(it))) })
}
): BufferND<R> = if (this is BufferND<T>)
BufferND(this.indices, factory.invoke(indices.linearSize) { transform(buffer[it]) })
else {
val strides = DefaultStrides(shape)
BufferND(strides, factory.invoke(strides.linearSize) { transform(get(strides.index(it))) })
}
/**
* Transform structure to a new structure using inferred [BufferFactory]
*/
public inline fun <T, reified R : Any> StructureND<T>.mapToBuffer(
crossinline transform: (T) -> R,
): BufferND<R> = mapToBuffer(Buffer.Companion::auto, transform)
/**
* Represents [MutableStructureND] over [MutableBuffer].
*

View File

@ -18,7 +18,8 @@ public interface Counter<T : Any> {
public val value: T
public companion object {
public fun double(): ObjectCounter<Double> = ObjectCounter(DoubleField)
public fun ofDouble(): ObjectCounter<Double> = ObjectCounter(DoubleField)
public fun <T: Any> of(group: Group<T>): ObjectCounter<T> = ObjectCounter(group)
}
}

View File

@ -1,140 +0,0 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.HyperSquareDomain
import space.kscience.kmath.linear.Point
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.nd.*
import space.kscience.kmath.operations.DoubleField
import space.kscience.kmath.structures.*
import kotlin.math.floor
/**
* Multivariate histogram space for hyper-square real-field bins.
*/
public class DoubleHistogramGroup(
private val lower: Buffer<Double>,
private val upper: Buffer<Double>,
private val binNums: IntArray = IntArray(lower.size) { 20 },
) : IndexedHistogramGroup<Double, Double> {
init {
// argument checks
require(lower.size == upper.size) { "Dimension mismatch in histogram lower and upper limits." }
require(lower.size == binNums.size) { "Dimension mismatch in bin count." }
require(!lower.indices.any { upper[it] - lower[it] < 0 }) { "Range for one of axis is not strictly positive" }
}
public val dimension: Int get() = lower.size
override val shape: IntArray = IntArray(binNums.size) { binNums[it] + 2 }
override val histogramValueAlgebra: DoubleFieldND = DoubleField.ndAlgebra(*shape)
private val binSize = DoubleBuffer(dimension) { (upper[it] - lower[it]) / binNums[it] }
/**
* Get internal [StructureND] bin index for given axis
*/
private fun getIndex(axis: Int, value: Double): Int = when {
value >= upper[axis] -> binNums[axis] + 1 // overflow
value < lower[axis] -> 0 // underflow
else -> floor((value - lower[axis]) / binSize[axis]).toInt()
}
override fun getIndexOrNull(point: Buffer<Double>): IntArray = IntArray(dimension) {
getIndex(it, point[it])
}
@OptIn(UnstableKMathAPI::class)
override fun getDomain(index: IntArray): HyperSquareDomain {
val lowerBoundary = index.mapIndexed { axis, i ->
when (i) {
0 -> Double.NEGATIVE_INFINITY
shape[axis] - 1 -> upper[axis]
else -> lower[axis] + (i.toDouble()) * binSize[axis]
}
}.asBuffer()
val upperBoundary = index.mapIndexed { axis, i ->
when (i) {
0 -> lower[axis]
shape[axis] - 1 -> Double.POSITIVE_INFINITY
else -> lower[axis] + (i.toDouble() + 1) * binSize[axis]
}
}.asBuffer()
return HyperSquareDomain(lowerBoundary, upperBoundary)
}
@OptIn(UnstableKMathAPI::class)
override fun produceBin(index: IntArray, value: Double): DomainBin<Double, Double> {
val domain = getDomain(index)
return DomainBin(domain, value)
}
override fun produce(builder: HistogramBuilder<Double, Double>.() -> Unit): IndexedHistogram<Double, Double> {
val ndCounter = StructureND.auto(shape) { Counter.double() }
val hBuilder = object : HistogramBuilder<Double, Double> {
override val defaultValue: Double get() = 1.0
override fun putValue(point: Point<out Double>, value: Double) {
val index = getIndexOrNull(point)
ndCounter[index].add(value)
}
}
hBuilder.apply(builder)
val values: BufferND<Double> = ndCounter.mapToBuffer { it.value }
return IndexedHistogram(this, values)
}
override fun IndexedHistogram<Double, Double>.unaryMinus(): IndexedHistogram<Double, Double> = this * (-1)
public companion object {
/**
* Use it like
* ```
*FastHistogram.fromRanges(
* (-1.0..1.0),
* (-1.0..1.0)
*)
*```
*/
public fun fromRanges(
vararg ranges: ClosedFloatingPointRange<Double>,
): DoubleHistogramGroup = DoubleHistogramGroup(
ranges.map(ClosedFloatingPointRange<Double>::start).asBuffer(),
ranges.map(ClosedFloatingPointRange<Double>::endInclusive).asBuffer()
)
/**
* Use it like
* ```
*FastHistogram.fromRanges(
* (-1.0..1.0) to 50,
* (-1.0..1.0) to 32
*)
*```
*/
public fun fromRanges(
vararg ranges: Pair<ClosedFloatingPointRange<Double>, Int>,
): DoubleHistogramGroup = DoubleHistogramGroup(
ListBuffer(
ranges
.map(Pair<ClosedFloatingPointRange<Double>, Int>::first)
.map(ClosedFloatingPointRange<Double>::start)
),
ListBuffer(
ranges
.map(Pair<ClosedFloatingPointRange<Double>, Int>::first)
.map(ClosedFloatingPointRange<Double>::endInclusive)
),
ranges.map(Pair<ClosedFloatingPointRange<Double>, Int>::second).toIntArray()
)
}
}

View File

@ -20,6 +20,15 @@ public interface Bin<in T : Any, out V> : Domain<T> {
public val binValue: V
}
/**
* A simple histogram bin based on domain
*/
public data class DomainBin<in T : Comparable<T>, D : Domain<T>, out V>(
public val domain: D,
override val binValue: V,
) : Bin<T, V>, Domain<T> by domain
public interface Histogram<in T : Any, out V, out B : Bin<T, V>> {
/**
* Find existing bin, corresponding to given coordinates

View File

@ -0,0 +1,76 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.Domain
import space.kscience.kmath.linear.Point
import space.kscience.kmath.nd.DefaultStrides
import space.kscience.kmath.nd.FieldOpsND
import space.kscience.kmath.nd.Shape
import space.kscience.kmath.nd.StructureND
import space.kscience.kmath.operations.Group
import space.kscience.kmath.operations.ScaleOperations
import space.kscience.kmath.operations.invoke
/**
* @param T the type of the argument space
* @param V the type of bin value
*/
public class HistogramND<T : Comparable<T>, D : Domain<T>, V : Any>(
public val group: HistogramGroupND<T, D, V>,
internal val values: StructureND<V>,
) : Histogram<T, V, DomainBin<T, D, V>> {
override fun get(point: Point<T>): DomainBin<T, D, V>? {
val index = group.getIndexOrNull(point) ?: return null
return group.produceBin(index, values[index])
}
override val dimension: Int get() = group.shape.size
override val bins: Iterable<DomainBin<T, D, V>>
get() = DefaultStrides(group.shape).asSequence().map {
group.produceBin(it, values[it])
}.asIterable()
}
/**
* A space for producing histograms with values in a NDStructure
*/
public interface HistogramGroupND<T : Comparable<T>, D : Domain<T>, V : Any> :
Group<HistogramND<T, D, V>>, ScaleOperations<HistogramND<T, D, V>> {
public val shape: Shape
public val valueAlgebra: FieldOpsND<V, *> //= NDAlgebra.space(valueSpace, Buffer.Companion::boxing, *shape),
/**
* Resolve index of the bin including given [point]. Return null if point is outside histogram area
*/
public fun getIndexOrNull(point: Point<T>): IntArray?
/**
* Get a bin domain represented by given index
*/
public fun getDomain(index: IntArray): Domain<T>?
public fun produceBin(index: IntArray, value: V): DomainBin<T, D, V>
public fun produce(builder: HistogramBuilder<T, V>.() -> Unit): HistogramND<T, D, V>
override fun add(left: HistogramND<T, D, V>, right: HistogramND<T, D, V>): HistogramND<T, D, V> {
require(left.group == this && right.group == this) {
"A histogram belonging to a different group cannot be operated."
}
return HistogramND(this, valueAlgebra { left.values + right.values })
}
override fun scale(a: HistogramND<T, D, V>, value: Double): HistogramND<T, D, V> {
require(a.group == this) { "A histogram belonging to a different group cannot be operated." }
return HistogramND(this, valueAlgebra { a.values * value })
}
override val zero: HistogramND<T, D, V> get() = produce { }
}

View File

@ -1,84 +0,0 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.Domain
import space.kscience.kmath.linear.Point
import space.kscience.kmath.nd.DefaultStrides
import space.kscience.kmath.nd.FieldND
import space.kscience.kmath.nd.Shape
import space.kscience.kmath.nd.StructureND
import space.kscience.kmath.operations.Group
import space.kscience.kmath.operations.ScaleOperations
import space.kscience.kmath.operations.invoke
/**
* A simple histogram bin based on domain
*/
public data class DomainBin<in T : Comparable<T>, out V>(
public val domain: Domain<T>,
override val binValue: V,
) : Bin<T, V>, Domain<T> by domain
/**
* @param T the type of the argument space
* @param V the type of bin value
*/
public class IndexedHistogram<T : Comparable<T>, V : Any>(
public val histogramGroup: IndexedHistogramGroup<T, V>,
public val values: StructureND<V>,
) : Histogram<T, V, DomainBin<T, V>> {
override fun get(point: Point<T>): DomainBin<T, V>? {
val index = histogramGroup.getIndexOrNull(point) ?: return null
return histogramGroup.produceBin(index, values[index])
}
override val dimension: Int get() = histogramGroup.shape.size
override val bins: Iterable<DomainBin<T, V>>
get() = DefaultStrides(histogramGroup.shape).asSequence().map {
histogramGroup.produceBin(it, values[it])
}.asIterable()
}
/**
* A space for producing histograms with values in a NDStructure
*/
public interface IndexedHistogramGroup<T : Comparable<T>, V : Any> : Group<IndexedHistogram<T, V>>,
ScaleOperations<IndexedHistogram<T, V>> {
public val shape: Shape
public val histogramValueAlgebra: FieldND<V, *> //= NDAlgebra.space(valueSpace, Buffer.Companion::boxing, *shape),
/**
* Resolve index of the bin including given [point]. Return null if point is outside histogram area
*/
public fun getIndexOrNull(point: Point<T>): IntArray?
/**
* Get a bin domain represented by given index
*/
public fun getDomain(index: IntArray): Domain<T>?
public fun produceBin(index: IntArray, value: V): DomainBin<T, V>
public fun produce(builder: HistogramBuilder<T, V>.() -> Unit): IndexedHistogram<T, V>
override fun add(left: IndexedHistogram<T, V>, right: IndexedHistogram<T, V>): IndexedHistogram<T, V> {
require(left.histogramGroup == this && right.histogramGroup == this) {
"A histogram belonging to a different group cannot be operated."
}
return IndexedHistogram(this, histogramValueAlgebra { left.values + right.values })
}
override fun scale(a: IndexedHistogram<T, V>, value: Double): IndexedHistogram<T, V> {
require(a.histogramGroup == this) { "A histogram belonging to a different group cannot be operated." }
return IndexedHistogram(this, histogramValueAlgebra { a.values * value })
}
override val zero: IndexedHistogram<T, V> get() = produce { }
}

View File

@ -0,0 +1,163 @@
/*
* Copyright 2018-2021 KMath contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
@file:OptIn(UnstableKMathAPI::class)
package space.kscience.kmath.histogram
import space.kscience.kmath.domains.HyperSquareDomain
import space.kscience.kmath.linear.Point
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.nd.*
import space.kscience.kmath.operations.DoubleField
import space.kscience.kmath.operations.Field
import space.kscience.kmath.operations.invoke
import space.kscience.kmath.structures.*
import kotlin.math.floor
public typealias HyperSquareBin<V> = DomainBin<Double, HyperSquareDomain, V>
/**
* Multivariate histogram space for hyper-square real-field bins.
* @param bufferFactory is an optional parameter used to optimize buffer production.
*/
public class UniformHistogramGroupND<V : Any, A : Field<V>>(
override val valueAlgebra: FieldOpsND<V, A>,
private val lower: Buffer<Double>,
private val upper: Buffer<Double>,
private val binNums: IntArray = IntArray(lower.size) { 20 },
private val bufferFactory: BufferFactory<V> = Buffer.Companion::boxing,
) : HistogramGroupND<Double, HyperSquareDomain, V> {
init {
// argument checks
require(lower.size == upper.size) { "Dimension mismatch in histogram lower and upper limits." }
require(lower.size == binNums.size) { "Dimension mismatch in bin count." }
require(!lower.indices.any { upper[it] - lower[it] < 0 }) { "Range for one of axis is not strictly positive" }
}
public val dimension: Int get() = lower.size
override val shape: IntArray = IntArray(binNums.size) { binNums[it] + 2 }
private val binSize = DoubleBuffer(dimension) { (upper[it] - lower[it]) / binNums[it] }
/**
* Get internal [StructureND] bin index for given axis
*/
private fun getIndex(axis: Int, value: Double): Int = when {
value >= upper[axis] -> binNums[axis] + 1 // overflow
value < lower[axis] -> 0 // underflow
else -> floor((value - lower[axis]) / binSize[axis]).toInt()
}
override fun getIndexOrNull(point: Buffer<Double>): IntArray = IntArray(dimension) {
getIndex(it, point[it])
}
override fun getDomain(index: IntArray): HyperSquareDomain {
val lowerBoundary = index.mapIndexed { axis, i ->
when (i) {
0 -> Double.NEGATIVE_INFINITY
shape[axis] - 1 -> upper[axis]
else -> lower[axis] + (i.toDouble()) * binSize[axis]
}
}.asBuffer()
val upperBoundary = index.mapIndexed { axis, i ->
when (i) {
0 -> lower[axis]
shape[axis] - 1 -> Double.POSITIVE_INFINITY
else -> lower[axis] + (i.toDouble() + 1) * binSize[axis]
}
}.asBuffer()
return HyperSquareDomain(lowerBoundary, upperBoundary)
}
override fun produceBin(index: IntArray, value: V): HyperSquareBin<V> {
val domain = getDomain(index)
return DomainBin(domain, value)
}
override fun produce(builder: HistogramBuilder<Double, V>.() -> Unit): HistogramND<Double, HyperSquareDomain, V> {
val ndCounter = StructureND.buffered(shape) { Counter.of(valueAlgebra.elementAlgebra) }
val hBuilder = object : HistogramBuilder<Double, V> {
override val defaultValue: V get() = valueAlgebra.elementAlgebra.one
override fun putValue(point: Point<out Double>, value: V) = with(valueAlgebra.elementAlgebra) {
val index = getIndexOrNull(point)
ndCounter[index].add(value)
}
}
hBuilder.apply(builder)
val values: BufferND<V> = ndCounter.mapToBuffer(bufferFactory) { it.value }
return HistogramND(this, values)
}
override fun HistogramND<Double, HyperSquareDomain, V>.unaryMinus(): HistogramND<Double, HyperSquareDomain, V> =
this * (-1)
}
/**
* Use it like
* ```
*FastHistogram.fromRanges(
* (-1.0..1.0),
* (-1.0..1.0)
*)
*```
*/
public fun <V : Any, A : Field<V>> Histogram.Companion.uniformNDFromRanges(
valueAlgebra: FieldOpsND<V, A>,
vararg ranges: ClosedFloatingPointRange<Double>,
bufferFactory: BufferFactory<V> = Buffer.Companion::boxing,
): UniformHistogramGroupND<V, A> = UniformHistogramGroupND(
valueAlgebra,
ranges.map(ClosedFloatingPointRange<Double>::start).asBuffer(),
ranges.map(ClosedFloatingPointRange<Double>::endInclusive).asBuffer(),
bufferFactory = bufferFactory
)
public fun Histogram.Companion.uniformDoubleNDFromRanges(
vararg ranges: ClosedFloatingPointRange<Double>,
): UniformHistogramGroupND<Double, DoubleField> =
uniformNDFromRanges(DoubleFieldOpsND, *ranges, bufferFactory = ::DoubleBuffer)
/**
* Use it like
* ```
*FastHistogram.fromRanges(
* (-1.0..1.0) to 50,
* (-1.0..1.0) to 32
*)
*```
*/
public fun <V : Any, A : Field<V>> Histogram.Companion.uniformNDFromRanges(
valueAlgebra: FieldOpsND<V, A>,
vararg ranges: Pair<ClosedFloatingPointRange<Double>, Int>,
bufferFactory: BufferFactory<V> = Buffer.Companion::boxing,
): UniformHistogramGroupND<V, A> = UniformHistogramGroupND(
valueAlgebra,
ListBuffer(
ranges
.map(Pair<ClosedFloatingPointRange<Double>, Int>::first)
.map(ClosedFloatingPointRange<Double>::start)
),
ListBuffer(
ranges
.map(Pair<ClosedFloatingPointRange<Double>, Int>::first)
.map(ClosedFloatingPointRange<Double>::endInclusive)
),
ranges.map(Pair<ClosedFloatingPointRange<Double>, Int>::second).toIntArray(),
bufferFactory = bufferFactory
)
public fun Histogram.Companion.uniformDoubleNDFromRanges(
vararg ranges: Pair<ClosedFloatingPointRange<Double>, Int>,
): UniformHistogramGroupND<Double, DoubleField> =
uniformNDFromRanges(DoubleFieldOpsND, *ranges, bufferFactory = ::DoubleBuffer)

View File

@ -3,8 +3,11 @@
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
@file:OptIn(UnstableKMathAPI::class)
package space.kscience.kmath.histogram
import space.kscience.kmath.misc.UnstableKMathAPI
import space.kscience.kmath.nd.DefaultStrides
import space.kscience.kmath.operations.invoke
import space.kscience.kmath.real.DoubleVector
@ -14,7 +17,7 @@ import kotlin.test.*
internal class MultivariateHistogramTest {
@Test
fun testSinglePutHistogram() {
val hSpace = DoubleHistogramGroup.fromRanges(
val hSpace = Histogram.uniformDoubleNDFromRanges(
(-1.0..1.0),
(-1.0..1.0)
)
@ -29,7 +32,7 @@ internal class MultivariateHistogramTest {
@Test
fun testSequentialPut() {
val hSpace = DoubleHistogramGroup.fromRanges(
val hSpace = Histogram.uniformDoubleNDFromRanges(
(-1.0..1.0),
(-1.0..1.0),
(-1.0..1.0)
@ -49,7 +52,7 @@ internal class MultivariateHistogramTest {
@Test
fun testHistogramAlgebra() {
DoubleHistogramGroup.fromRanges(
Histogram.uniformDoubleNDFromRanges(
(-1.0..1.0),
(-1.0..1.0),
(-1.0..1.0)

View File

@ -45,7 +45,7 @@ internal class TreeHistogramBuilder(val binFactory: (Double) -> DoubleDomain1D)
override val defaultValue: Double get() = 1.0
internal class BinCounter(val domain: DoubleDomain1D, val counter: Counter<Double> = Counter.double()) :
internal class BinCounter(val domain: DoubleDomain1D, val counter: Counter<Double> = Counter.ofDouble()) :
ClosedRange<Double> by domain.range
private val bins: TreeMap<Double, BinCounter> = TreeMap()