Histograms for jvm are in working order.
This commit is contained in:
parent
c7575caeee
commit
c8329eef8a
@ -0,0 +1,20 @@
|
|||||||
|
package scientifik.kmath.histogram
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Common representation for atomic counters
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
expect class LongCounter(){
|
||||||
|
fun decrement()
|
||||||
|
fun increment()
|
||||||
|
fun reset()
|
||||||
|
fun sum(): Long
|
||||||
|
fun add(l:Long)
|
||||||
|
}
|
||||||
|
|
||||||
|
expect class DoubleCounter(){
|
||||||
|
fun reset()
|
||||||
|
fun sum(): Double
|
||||||
|
fun add(d: Double)
|
||||||
|
}
|
@ -1,12 +1,22 @@
|
|||||||
package scientifik.kmath.histogram
|
package scientifik.kmath.histogram
|
||||||
|
|
||||||
import scientifik.kmath.linear.RealVector
|
import scientifik.kmath.linear.RealVector
|
||||||
|
import scientifik.kmath.linear.toVector
|
||||||
import scientifik.kmath.operations.Space
|
import scientifik.kmath.operations.Space
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple geometric domain
|
||||||
|
* TODO move to geometry module
|
||||||
|
*/
|
||||||
|
interface Domain {
|
||||||
|
operator fun contains(vector: RealVector): Boolean
|
||||||
|
val dimension: Int
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The bin in the histogram. The histogram is by definition always done in the real space
|
* The bin in the histogram. The histogram is by definition always done in the real space
|
||||||
*/
|
*/
|
||||||
interface Bin {
|
interface Bin : Domain {
|
||||||
/**
|
/**
|
||||||
* The value of this bin
|
* The value of this bin
|
||||||
*/
|
*/
|
||||||
@ -14,26 +24,36 @@ interface Bin {
|
|||||||
val center: RealVector
|
val center: RealVector
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new bin with zero count corresponding to given point
|
|
||||||
*/
|
|
||||||
interface BinFactory<out B : Bin> {
|
|
||||||
fun createBin(point: RealVector): B
|
|
||||||
}
|
|
||||||
|
|
||||||
interface Histogram<out B : Bin> : Iterable<B> {
|
interface Histogram<out B : Bin> : Iterable<B> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find existing bin, corresponding to given coordinates
|
* Find existing bin, corresponding to given coordinates
|
||||||
*/
|
*/
|
||||||
fun findBin(point: RealVector): B?
|
operator fun get(point: RealVector): B?
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dimension of the histogram
|
* Dimension of the histogram
|
||||||
*/
|
*/
|
||||||
val dimension: Int
|
val dimension: Int
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment appropriate bin
|
||||||
|
*/
|
||||||
|
fun put(point: RealVector)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun Histogram<*>.put(vararg point: Double) = put(point.toVector())
|
||||||
|
|
||||||
|
fun Histogram<*>.fill(sequence: Iterable<RealVector>) = sequence.forEach { put(it) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pass a sequence builder into histogram
|
||||||
|
*/
|
||||||
|
fun Histogram<*>.fill(buider: suspend SequenceScope<RealVector>.() -> Unit) = fill(sequence(buider).asIterable())
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A space to perform arithmetic operations on histograms
|
||||||
|
*/
|
||||||
interface HistogramSpace<B : Bin, H : Histogram<B>> : Space<H> {
|
interface HistogramSpace<B : Bin, H : Histogram<B>> : Space<H> {
|
||||||
/**
|
/**
|
||||||
* Rules for performing operations on bins
|
* Rules for performing operations on bins
|
||||||
|
@ -162,9 +162,8 @@ abstract class VectorSpace<T : Any>(val size: Int, val field: Field<T>) : Space<
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>> {
|
interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>>, Iterable<T> {
|
||||||
val size: Int
|
val size: Int get() = context.size
|
||||||
get() = context.size
|
|
||||||
|
|
||||||
operator fun get(i: Int): T
|
operator fun get(i: Int): T
|
||||||
|
|
||||||
@ -181,6 +180,8 @@ interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>> {
|
|||||||
fun ofReal(size: Int, initializer: (Int) -> Double) =
|
fun ofReal(size: Int, initializer: (Int) -> Double) =
|
||||||
ArrayVector(ArrayVectorSpace(size, DoubleField, realNDFieldFactory), initializer)
|
ArrayVector(ArrayVectorSpace(size, DoubleField, realNDFieldFactory), initializer)
|
||||||
|
|
||||||
|
fun ofReal(vararg point: Double) = point.toVector()
|
||||||
|
|
||||||
fun equals(v1: Vector<*>, v2: Vector<*>): Boolean {
|
fun equals(v1: Vector<*>, v2: Vector<*>): Boolean {
|
||||||
if (v1 === v2) return true
|
if (v1 === v2) return true
|
||||||
if (v1.context != v2.context) return false
|
if (v1.context != v2.context) return false
|
||||||
@ -265,6 +266,10 @@ class ArrayVector<T : Any> internal constructor(override val context: ArrayVecto
|
|||||||
}
|
}
|
||||||
|
|
||||||
override val self: ArrayVector<T> get() = this
|
override val self: ArrayVector<T> get() = this
|
||||||
|
|
||||||
|
override fun iterator(): Iterator<T> = (0 until size).map { array[it] }.iterator()
|
||||||
|
|
||||||
|
override fun toString(): String = this.joinToString(prefix = "[",postfix = "]", separator = ", "){it.toString()}
|
||||||
}
|
}
|
||||||
|
|
||||||
typealias RealVector = Vector<Double>
|
typealias RealVector = Vector<Double>
|
||||||
@ -284,6 +289,7 @@ interface LinearSolver<T : Any> {
|
|||||||
fun <T : Any> Array<T>.toVector(field: Field<T>) = Vector.of(size, field) { this[it] }
|
fun <T : Any> Array<T>.toVector(field: Field<T>) = Vector.of(size, field) { this[it] }
|
||||||
|
|
||||||
fun DoubleArray.toVector() = Vector.ofReal(this.size) { this[it] }
|
fun DoubleArray.toVector() = Vector.ofReal(this.size) { this[it] }
|
||||||
|
fun List<Double>.toVector() = Vector.ofReal(this.size) { this[it] }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert matrix to vector if it is possible
|
* Convert matrix to vector if it is possible
|
||||||
|
@ -105,9 +105,12 @@ abstract class NDField<T>(val shape: IntArray, val field: Field<T>) : Field<NDAr
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* NDStructure coupled to the context. Emulates Python ndarray
|
* Immutable [NDStructure] coupled to the context. Emulates Python ndarray
|
||||||
*/
|
*/
|
||||||
data class NDArray<T>(override val context: NDField<T>, private val structure: NDStructure<T>) : FieldElement<NDArray<T>, NDField<T>>, NDStructure<T> by structure {
|
data class NDArray<T>(override val context: NDField<T>, private val structure: NDStructure<T>) : FieldElement<NDArray<T>, NDField<T>>, NDStructure<T> by structure {
|
||||||
|
|
||||||
|
//TODO ensure structure is immutable
|
||||||
|
|
||||||
override val self: NDArray<T>
|
override val self: NDArray<T>
|
||||||
get() = this
|
get() = this
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ class DefaultStrides(override val shape: IntArray) : Strides {
|
|||||||
override fun offset(index: IntArray): Int {
|
override fun offset(index: IntArray): Int {
|
||||||
return index.mapIndexed { i, value ->
|
return index.mapIndexed { i, value ->
|
||||||
if (value < 0 || value >= shape[i]) {
|
if (value < 0 || value >= shape[i]) {
|
||||||
throw RuntimeException("Index out of shape bounds: ($i,$value)")
|
throw RuntimeException("Index $value out of shape bounds: (0,${shape[i]})")
|
||||||
}
|
}
|
||||||
value * strides[i]
|
value * strides[i]
|
||||||
}.sum()
|
}.sum()
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
package scientifik.kmath.histogram
|
||||||
|
|
||||||
|
actual class LongCounter{
|
||||||
|
private var sum: Long = 0
|
||||||
|
actual fun decrement() {sum--}
|
||||||
|
actual fun increment() {sum++}
|
||||||
|
actual fun reset() {sum = 0}
|
||||||
|
actual fun sum(): Long = sum
|
||||||
|
actual fun add(l: Long) {sum+=l}
|
||||||
|
}
|
||||||
|
actual class DoubleCounter{
|
||||||
|
private var sum: Double = 0.0
|
||||||
|
actual fun reset() {sum = 0.0}
|
||||||
|
actual fun sum(): Double = sum
|
||||||
|
actual fun add(d: Double) {sum+=d}
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
package scientifik.kmath.histogram
|
||||||
|
|
||||||
|
import java.util.concurrent.atomic.DoubleAdder
|
||||||
|
import java.util.concurrent.atomic.LongAdder
|
||||||
|
|
||||||
|
actual typealias LongCounter = LongAdder
|
||||||
|
actual typealias DoubleCounter = DoubleAdder
|
@ -0,0 +1,117 @@
|
|||||||
|
package scientifik.kmath.histogram
|
||||||
|
|
||||||
|
import scientifik.kmath.linear.RealVector
|
||||||
|
import scientifik.kmath.linear.toVector
|
||||||
|
import scientifik.kmath.structures.NDStructure
|
||||||
|
import scientifik.kmath.structures.ndStructure
|
||||||
|
import kotlin.math.floor
|
||||||
|
|
||||||
|
class MultivariateBin(override val center: RealVector, val sizes: RealVector, val counter: LongCounter = LongCounter()) : Bin {
|
||||||
|
init {
|
||||||
|
if (center.size != sizes.size) error("Dimension mismatch in bin creation. Expected ${center.size}, but found ${sizes.size}")
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun contains(vector: RealVector): Boolean {
|
||||||
|
assert(vector.size == center.size)
|
||||||
|
return vector.asSequence().mapIndexed { i, value -> value in (center[i] - sizes[i] / 2)..(center[i] + sizes[i] / 2) }.all { it }
|
||||||
|
}
|
||||||
|
|
||||||
|
override val value: Number get() = counter.sum()
|
||||||
|
internal operator fun inc() = this.also { counter.increment() }
|
||||||
|
|
||||||
|
override val dimension: Int get() = center.size
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uniform multivariate histogram with fixed borders. Based on NDStructure implementation with complexity of m for bin search, where m is the number of dimensions
|
||||||
|
*/
|
||||||
|
class FastHistogram(
|
||||||
|
private val lower: RealVector,
|
||||||
|
private val upper: RealVector,
|
||||||
|
private val binNums: IntArray = IntArray(lower.size) { 100 }
|
||||||
|
) : Histogram<MultivariateBin> {
|
||||||
|
|
||||||
|
init {
|
||||||
|
// argument checks
|
||||||
|
if (lower.size != upper.size) error("Dimension mismatch in histogram lower and upper limits.")
|
||||||
|
if (lower.size != binNums.size) error("Dimension mismatch in bin count.")
|
||||||
|
if ((upper - lower).any { it <= 0 }) error("Range for one of axis is not strictly positive")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
override val dimension: Int get() = lower.size
|
||||||
|
|
||||||
|
//TODO optimize binSize performance if needed
|
||||||
|
private val binSize = (upper - lower).mapIndexed { index, value -> value / binNums[index] }.toVector()
|
||||||
|
|
||||||
|
private val bins: NDStructure<MultivariateBin> by lazy {
|
||||||
|
val actualSizes = IntArray(binNums.size) { binNums[it] + 2 }
|
||||||
|
ndStructure(actualSizes) { indexArray ->
|
||||||
|
val center = indexArray.mapIndexed { axis, index ->
|
||||||
|
when (index) {
|
||||||
|
0 -> Double.NEGATIVE_INFINITY
|
||||||
|
actualSizes[axis] -> Double.POSITIVE_INFINITY
|
||||||
|
else -> lower[axis] + (index - 1) * binSize[axis]
|
||||||
|
}
|
||||||
|
}.toVector()
|
||||||
|
MultivariateBin(center, binSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get internal [NDStructure] bin index for given axis
|
||||||
|
*/
|
||||||
|
private fun getIndex(axis: Int, value: Double): Int {
|
||||||
|
return when {
|
||||||
|
value >= upper[axis] -> binNums[axis] + 1 // overflow
|
||||||
|
value < lower[axis] -> 0 // underflow
|
||||||
|
else -> floor((value - lower[axis]) / binSize[axis]).toInt() + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
override fun get(point: RealVector): MultivariateBin? {
|
||||||
|
val index = IntArray(dimension) { getIndex(it, point[it]) }
|
||||||
|
return bins[index]
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun put(point: RealVector) {
|
||||||
|
this[point]?.inc() ?: error("Could not find appropriate bin (should not be possible)")
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun iterator(): Iterator<MultivariateBin> = bins.asSequence().map { it.second }.iterator()
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use it like
|
||||||
|
* ```
|
||||||
|
*FastHistogram.fromRanges(
|
||||||
|
* (-1.0..1.0),
|
||||||
|
* (-1.0..1.0)
|
||||||
|
*)
|
||||||
|
*```
|
||||||
|
*/
|
||||||
|
fun fromRanges(vararg ranges: ClosedFloatingPointRange<Double>): FastHistogram {
|
||||||
|
return FastHistogram(ranges.map { it.start }.toVector(), ranges.map { it.endInclusive }.toVector())
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use it like
|
||||||
|
* ```
|
||||||
|
*FastHistogram.fromRanges(
|
||||||
|
* (-1.0..1.0) to 50,
|
||||||
|
* (-1.0..1.0) to 32
|
||||||
|
*)
|
||||||
|
*```
|
||||||
|
*/
|
||||||
|
fun fromRanges(vararg ranges: Pair<ClosedFloatingPointRange<Double>,Int>): FastHistogram {
|
||||||
|
return FastHistogram(
|
||||||
|
ranges.map { it.first.start }.toVector(),
|
||||||
|
ranges.map { it.first.endInclusive }.toVector(),
|
||||||
|
ranges.map { it.second }.toIntArray()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,88 @@
|
|||||||
|
package scientifik.kmath.histogram
|
||||||
|
|
||||||
|
import scientifik.kmath.linear.RealVector
|
||||||
|
import scientifik.kmath.linear.toVector
|
||||||
|
import java.util.*
|
||||||
|
import kotlin.math.floor
|
||||||
|
|
||||||
|
//TODO move to common
|
||||||
|
|
||||||
|
class UnivariateBin(val position: Double, val size: Double, val counter: LongCounter = LongCounter()) : Bin {
|
||||||
|
//TODO add weighting
|
||||||
|
override val value: Number get() = counter.sum()
|
||||||
|
|
||||||
|
override val center: RealVector get() = doubleArrayOf(position).toVector()
|
||||||
|
|
||||||
|
operator fun contains(value: Double): Boolean = value in (position - size / 2)..(position + size / 2)
|
||||||
|
|
||||||
|
override fun contains(vector: RealVector): Boolean = contains(vector[0])
|
||||||
|
|
||||||
|
internal operator fun inc() = this.also { counter.increment()}
|
||||||
|
|
||||||
|
override val dimension: Int get() = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Univariate histogram with log(n) bin search speed
|
||||||
|
*/
|
||||||
|
class UnivariateHistogram private constructor(private val factory: (Double) -> UnivariateBin) : Histogram<UnivariateBin> {
|
||||||
|
|
||||||
|
private val bins: TreeMap<Double, UnivariateBin> = TreeMap()
|
||||||
|
|
||||||
|
private operator fun get(value: Double): UnivariateBin? {
|
||||||
|
// check ceiling entry and return it if it is what needed
|
||||||
|
val ceil = bins.ceilingEntry(value)?.value
|
||||||
|
if (ceil != null && value in ceil) return ceil
|
||||||
|
//check floor entry
|
||||||
|
val floor = bins.floorEntry(value)?.value
|
||||||
|
if (floor != null && value in floor) return floor
|
||||||
|
//neither is valid, not found
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun createBin(value: Double): UnivariateBin = factory(value).also {
|
||||||
|
synchronized(this) { bins.put(it.position, it) }
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun get(point: RealVector): UnivariateBin? = get(point[0])
|
||||||
|
|
||||||
|
override val dimension: Int get() = 1
|
||||||
|
|
||||||
|
override fun iterator(): Iterator<UnivariateBin> = bins.values.iterator()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thread safe put operation
|
||||||
|
*/
|
||||||
|
fun put(value: Double) {
|
||||||
|
(get(value) ?: createBin(value)).inc()
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun put(point: RealVector) = put(point[0])
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun uniform(binSize: Double, start: Double = 0.0): UnivariateHistogram {
|
||||||
|
return UnivariateHistogram { value ->
|
||||||
|
val center = start + binSize * floor((value - start) / binSize + 0.5)
|
||||||
|
UnivariateBin(center, binSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun custom(borders: DoubleArray): UnivariateHistogram {
|
||||||
|
val sorted = borders.sortedArray()
|
||||||
|
return UnivariateHistogram { value ->
|
||||||
|
if (value < sorted.first()) {
|
||||||
|
UnivariateBin(Double.NEGATIVE_INFINITY, Double.MAX_VALUE)
|
||||||
|
} else if (value > sorted.last()) {
|
||||||
|
UnivariateBin(Double.POSITIVE_INFINITY, Double.MAX_VALUE)
|
||||||
|
} else {
|
||||||
|
val index = (0 until sorted.size).first { value > sorted[it] }
|
||||||
|
val left = sorted[index]
|
||||||
|
val right = sorted[index + 1]
|
||||||
|
UnivariateBin((left + right) / 2, (right - left))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun UnivariateHistogram.fill(sequence: Iterable<Double>) = sequence.forEach { put(it) }
|
@ -0,0 +1,43 @@
|
|||||||
|
package scientifik.kmath.histogram
|
||||||
|
|
||||||
|
import org.junit.Test
|
||||||
|
import scientifik.kmath.linear.Vector
|
||||||
|
import kotlin.random.Random
|
||||||
|
import kotlin.test.assertEquals
|
||||||
|
import kotlin.test.assertFalse
|
||||||
|
import kotlin.test.assertTrue
|
||||||
|
|
||||||
|
class MultivariateHistogramTest {
|
||||||
|
@Test
|
||||||
|
fun testSinglePutHistogram() {
|
||||||
|
val histogram = FastHistogram.fromRanges(
|
||||||
|
(-1.0..1.0),
|
||||||
|
(-1.0..1.0)
|
||||||
|
)
|
||||||
|
histogram.put(0.6, 0.6)
|
||||||
|
val bin = histogram.find { it.value.toInt() > 0 }!!
|
||||||
|
assertTrue { bin.contains(Vector.ofReal(0.6, 0.6)) }
|
||||||
|
assertFalse { bin.contains(Vector.ofReal(-0.6, 0.6)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testSequentialPut(){
|
||||||
|
val histogram = FastHistogram.fromRanges(
|
||||||
|
(-1.0..1.0),
|
||||||
|
(-1.0..1.0),
|
||||||
|
(-1.0..1.0)
|
||||||
|
)
|
||||||
|
val random = Random(1234)
|
||||||
|
|
||||||
|
fun nextDouble() = random.nextDouble(-1.0,1.0)
|
||||||
|
|
||||||
|
val n = 10000
|
||||||
|
|
||||||
|
histogram.fill {
|
||||||
|
repeat(n){
|
||||||
|
yield(Vector.ofReal(nextDouble(),nextDouble(),nextDouble()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals(n, histogram.sumBy { it.value.toInt() })
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user