forked from kscience/kmath
Histograms for jvm are in working order.
This commit is contained in:
parent
c7575caeee
commit
c8329eef8a
@ -0,0 +1,20 @@
|
||||
package scientifik.kmath.histogram
|
||||
|
||||
/*
|
||||
* Common representation for atomic counters
|
||||
*/
|
||||
|
||||
|
||||
expect class LongCounter(){
|
||||
fun decrement()
|
||||
fun increment()
|
||||
fun reset()
|
||||
fun sum(): Long
|
||||
fun add(l:Long)
|
||||
}
|
||||
|
||||
expect class DoubleCounter(){
|
||||
fun reset()
|
||||
fun sum(): Double
|
||||
fun add(d: Double)
|
||||
}
|
@ -1,12 +1,22 @@
|
||||
package scientifik.kmath.histogram
|
||||
|
||||
import scientifik.kmath.linear.RealVector
|
||||
import scientifik.kmath.linear.toVector
|
||||
import scientifik.kmath.operations.Space
|
||||
|
||||
/**
|
||||
* A simple geometric domain
|
||||
* TODO move to geometry module
|
||||
*/
|
||||
interface Domain {
|
||||
operator fun contains(vector: RealVector): Boolean
|
||||
val dimension: Int
|
||||
}
|
||||
|
||||
/**
|
||||
* The bin in the histogram. The histogram is by definition always done in the real space
|
||||
*/
|
||||
interface Bin {
|
||||
interface Bin : Domain {
|
||||
/**
|
||||
* The value of this bin
|
||||
*/
|
||||
@ -14,26 +24,36 @@ interface Bin {
|
||||
val center: RealVector
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new bin with zero count corresponding to given point
|
||||
*/
|
||||
interface BinFactory<out B : Bin> {
|
||||
fun createBin(point: RealVector): B
|
||||
}
|
||||
|
||||
interface Histogram<out B : Bin> : Iterable<B> {
|
||||
|
||||
/**
|
||||
* Find existing bin, corresponding to given coordinates
|
||||
*/
|
||||
fun findBin(point: RealVector): B?
|
||||
operator fun get(point: RealVector): B?
|
||||
|
||||
/**
|
||||
* Dimension of the histogram
|
||||
*/
|
||||
val dimension: Int
|
||||
|
||||
/**
|
||||
* Increment appropriate bin
|
||||
*/
|
||||
fun put(point: RealVector)
|
||||
}
|
||||
|
||||
fun Histogram<*>.put(vararg point: Double) = put(point.toVector())
|
||||
|
||||
fun Histogram<*>.fill(sequence: Iterable<RealVector>) = sequence.forEach { put(it) }
|
||||
|
||||
/**
|
||||
* Pass a sequence builder into histogram
|
||||
*/
|
||||
fun Histogram<*>.fill(buider: suspend SequenceScope<RealVector>.() -> Unit) = fill(sequence(buider).asIterable())
|
||||
|
||||
/**
|
||||
* A space to perform arithmetic operations on histograms
|
||||
*/
|
||||
interface HistogramSpace<B : Bin, H : Histogram<B>> : Space<H> {
|
||||
/**
|
||||
* Rules for performing operations on bins
|
||||
|
@ -162,9 +162,8 @@ abstract class VectorSpace<T : Any>(val size: Int, val field: Field<T>) : Space<
|
||||
}
|
||||
|
||||
|
||||
interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>> {
|
||||
val size: Int
|
||||
get() = context.size
|
||||
interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>>, Iterable<T> {
|
||||
val size: Int get() = context.size
|
||||
|
||||
operator fun get(i: Int): T
|
||||
|
||||
@ -181,6 +180,8 @@ interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>> {
|
||||
fun ofReal(size: Int, initializer: (Int) -> Double) =
|
||||
ArrayVector(ArrayVectorSpace(size, DoubleField, realNDFieldFactory), initializer)
|
||||
|
||||
fun ofReal(vararg point: Double) = point.toVector()
|
||||
|
||||
fun equals(v1: Vector<*>, v2: Vector<*>): Boolean {
|
||||
if (v1 === v2) return true
|
||||
if (v1.context != v2.context) return false
|
||||
@ -265,6 +266,10 @@ class ArrayVector<T : Any> internal constructor(override val context: ArrayVecto
|
||||
}
|
||||
|
||||
override val self: ArrayVector<T> get() = this
|
||||
|
||||
override fun iterator(): Iterator<T> = (0 until size).map { array[it] }.iterator()
|
||||
|
||||
override fun toString(): String = this.joinToString(prefix = "[",postfix = "]", separator = ", "){it.toString()}
|
||||
}
|
||||
|
||||
typealias RealVector = Vector<Double>
|
||||
@ -284,6 +289,7 @@ interface LinearSolver<T : Any> {
|
||||
fun <T : Any> Array<T>.toVector(field: Field<T>) = Vector.of(size, field) { this[it] }
|
||||
|
||||
fun DoubleArray.toVector() = Vector.ofReal(this.size) { this[it] }
|
||||
fun List<Double>.toVector() = Vector.ofReal(this.size) { this[it] }
|
||||
|
||||
/**
|
||||
* Convert matrix to vector if it is possible
|
||||
|
@ -105,9 +105,12 @@ abstract class NDField<T>(val shape: IntArray, val field: Field<T>) : Field<NDAr
|
||||
}
|
||||
|
||||
/**
|
||||
* NDStructure coupled to the context. Emulates Python ndarray
|
||||
* Immutable [NDStructure] coupled to the context. Emulates Python ndarray
|
||||
*/
|
||||
data class NDArray<T>(override val context: NDField<T>, private val structure: NDStructure<T>) : FieldElement<NDArray<T>, NDField<T>>, NDStructure<T> by structure {
|
||||
|
||||
//TODO ensure structure is immutable
|
||||
|
||||
override val self: NDArray<T>
|
||||
get() = this
|
||||
|
||||
|
@ -76,7 +76,7 @@ class DefaultStrides(override val shape: IntArray) : Strides {
|
||||
override fun offset(index: IntArray): Int {
|
||||
return index.mapIndexed { i, value ->
|
||||
if (value < 0 || value >= shape[i]) {
|
||||
throw RuntimeException("Index out of shape bounds: ($i,$value)")
|
||||
throw RuntimeException("Index $value out of shape bounds: (0,${shape[i]})")
|
||||
}
|
||||
value * strides[i]
|
||||
}.sum()
|
||||
|
@ -0,0 +1,16 @@
|
||||
package scientifik.kmath.histogram
|
||||
|
||||
actual class LongCounter{
|
||||
private var sum: Long = 0
|
||||
actual fun decrement() {sum--}
|
||||
actual fun increment() {sum++}
|
||||
actual fun reset() {sum = 0}
|
||||
actual fun sum(): Long = sum
|
||||
actual fun add(l: Long) {sum+=l}
|
||||
}
|
||||
actual class DoubleCounter{
|
||||
private var sum: Double = 0.0
|
||||
actual fun reset() {sum = 0.0}
|
||||
actual fun sum(): Double = sum
|
||||
actual fun add(d: Double) {sum+=d}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package scientifik.kmath.histogram
|
||||
|
||||
import java.util.concurrent.atomic.DoubleAdder
|
||||
import java.util.concurrent.atomic.LongAdder
|
||||
|
||||
actual typealias LongCounter = LongAdder
|
||||
actual typealias DoubleCounter = DoubleAdder
|
@ -0,0 +1,117 @@
|
||||
package scientifik.kmath.histogram
|
||||
|
||||
import scientifik.kmath.linear.RealVector
|
||||
import scientifik.kmath.linear.toVector
|
||||
import scientifik.kmath.structures.NDStructure
|
||||
import scientifik.kmath.structures.ndStructure
|
||||
import kotlin.math.floor
|
||||
|
||||
class MultivariateBin(override val center: RealVector, val sizes: RealVector, val counter: LongCounter = LongCounter()) : Bin {
|
||||
init {
|
||||
if (center.size != sizes.size) error("Dimension mismatch in bin creation. Expected ${center.size}, but found ${sizes.size}")
|
||||
}
|
||||
|
||||
override fun contains(vector: RealVector): Boolean {
|
||||
assert(vector.size == center.size)
|
||||
return vector.asSequence().mapIndexed { i, value -> value in (center[i] - sizes[i] / 2)..(center[i] + sizes[i] / 2) }.all { it }
|
||||
}
|
||||
|
||||
override val value: Number get() = counter.sum()
|
||||
internal operator fun inc() = this.also { counter.increment() }
|
||||
|
||||
override val dimension: Int get() = center.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Uniform multivariate histogram with fixed borders. Based on NDStructure implementation with complexity of m for bin search, where m is the number of dimensions
|
||||
*/
|
||||
class FastHistogram(
|
||||
private val lower: RealVector,
|
||||
private val upper: RealVector,
|
||||
private val binNums: IntArray = IntArray(lower.size) { 100 }
|
||||
) : Histogram<MultivariateBin> {
|
||||
|
||||
init {
|
||||
// argument checks
|
||||
if (lower.size != upper.size) error("Dimension mismatch in histogram lower and upper limits.")
|
||||
if (lower.size != binNums.size) error("Dimension mismatch in bin count.")
|
||||
if ((upper - lower).any { it <= 0 }) error("Range for one of axis is not strictly positive")
|
||||
}
|
||||
|
||||
|
||||
override val dimension: Int get() = lower.size
|
||||
|
||||
//TODO optimize binSize performance if needed
|
||||
private val binSize = (upper - lower).mapIndexed { index, value -> value / binNums[index] }.toVector()
|
||||
|
||||
private val bins: NDStructure<MultivariateBin> by lazy {
|
||||
val actualSizes = IntArray(binNums.size) { binNums[it] + 2 }
|
||||
ndStructure(actualSizes) { indexArray ->
|
||||
val center = indexArray.mapIndexed { axis, index ->
|
||||
when (index) {
|
||||
0 -> Double.NEGATIVE_INFINITY
|
||||
actualSizes[axis] -> Double.POSITIVE_INFINITY
|
||||
else -> lower[axis] + (index - 1) * binSize[axis]
|
||||
}
|
||||
}.toVector()
|
||||
MultivariateBin(center, binSize)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get internal [NDStructure] bin index for given axis
|
||||
*/
|
||||
private fun getIndex(axis: Int, value: Double): Int {
|
||||
return when {
|
||||
value >= upper[axis] -> binNums[axis] + 1 // overflow
|
||||
value < lower[axis] -> 0 // underflow
|
||||
else -> floor((value - lower[axis]) / binSize[axis]).toInt() + 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
override fun get(point: RealVector): MultivariateBin? {
|
||||
val index = IntArray(dimension) { getIndex(it, point[it]) }
|
||||
return bins[index]
|
||||
}
|
||||
|
||||
override fun put(point: RealVector) {
|
||||
this[point]?.inc() ?: error("Could not find appropriate bin (should not be possible)")
|
||||
}
|
||||
|
||||
override fun iterator(): Iterator<MultivariateBin> = bins.asSequence().map { it.second }.iterator()
|
||||
|
||||
companion object {
|
||||
|
||||
/**
|
||||
* Use it like
|
||||
* ```
|
||||
*FastHistogram.fromRanges(
|
||||
* (-1.0..1.0),
|
||||
* (-1.0..1.0)
|
||||
*)
|
||||
*```
|
||||
*/
|
||||
fun fromRanges(vararg ranges: ClosedFloatingPointRange<Double>): FastHistogram {
|
||||
return FastHistogram(ranges.map { it.start }.toVector(), ranges.map { it.endInclusive }.toVector())
|
||||
}
|
||||
|
||||
/**
|
||||
* Use it like
|
||||
* ```
|
||||
*FastHistogram.fromRanges(
|
||||
* (-1.0..1.0) to 50,
|
||||
* (-1.0..1.0) to 32
|
||||
*)
|
||||
*```
|
||||
*/
|
||||
fun fromRanges(vararg ranges: Pair<ClosedFloatingPointRange<Double>,Int>): FastHistogram {
|
||||
return FastHistogram(
|
||||
ranges.map { it.first.start }.toVector(),
|
||||
ranges.map { it.first.endInclusive }.toVector(),
|
||||
ranges.map { it.second }.toIntArray()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,88 @@
|
||||
package scientifik.kmath.histogram
|
||||
|
||||
import scientifik.kmath.linear.RealVector
|
||||
import scientifik.kmath.linear.toVector
|
||||
import java.util.*
|
||||
import kotlin.math.floor
|
||||
|
||||
//TODO move to common
|
||||
|
||||
class UnivariateBin(val position: Double, val size: Double, val counter: LongCounter = LongCounter()) : Bin {
|
||||
//TODO add weighting
|
||||
override val value: Number get() = counter.sum()
|
||||
|
||||
override val center: RealVector get() = doubleArrayOf(position).toVector()
|
||||
|
||||
operator fun contains(value: Double): Boolean = value in (position - size / 2)..(position + size / 2)
|
||||
|
||||
override fun contains(vector: RealVector): Boolean = contains(vector[0])
|
||||
|
||||
internal operator fun inc() = this.also { counter.increment()}
|
||||
|
||||
override val dimension: Int get() = 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Univariate histogram with log(n) bin search speed
|
||||
*/
|
||||
class UnivariateHistogram private constructor(private val factory: (Double) -> UnivariateBin) : Histogram<UnivariateBin> {
|
||||
|
||||
private val bins: TreeMap<Double, UnivariateBin> = TreeMap()
|
||||
|
||||
private operator fun get(value: Double): UnivariateBin? {
|
||||
// check ceiling entry and return it if it is what needed
|
||||
val ceil = bins.ceilingEntry(value)?.value
|
||||
if (ceil != null && value in ceil) return ceil
|
||||
//check floor entry
|
||||
val floor = bins.floorEntry(value)?.value
|
||||
if (floor != null && value in floor) return floor
|
||||
//neither is valid, not found
|
||||
return null
|
||||
}
|
||||
|
||||
private fun createBin(value: Double): UnivariateBin = factory(value).also {
|
||||
synchronized(this) { bins.put(it.position, it) }
|
||||
}
|
||||
|
||||
override fun get(point: RealVector): UnivariateBin? = get(point[0])
|
||||
|
||||
override val dimension: Int get() = 1
|
||||
|
||||
override fun iterator(): Iterator<UnivariateBin> = bins.values.iterator()
|
||||
|
||||
/**
|
||||
* Thread safe put operation
|
||||
*/
|
||||
fun put(value: Double) {
|
||||
(get(value) ?: createBin(value)).inc()
|
||||
}
|
||||
|
||||
override fun put(point: RealVector) = put(point[0])
|
||||
|
||||
companion object {
|
||||
fun uniform(binSize: Double, start: Double = 0.0): UnivariateHistogram {
|
||||
return UnivariateHistogram { value ->
|
||||
val center = start + binSize * floor((value - start) / binSize + 0.5)
|
||||
UnivariateBin(center, binSize)
|
||||
}
|
||||
}
|
||||
|
||||
fun custom(borders: DoubleArray): UnivariateHistogram {
|
||||
val sorted = borders.sortedArray()
|
||||
return UnivariateHistogram { value ->
|
||||
if (value < sorted.first()) {
|
||||
UnivariateBin(Double.NEGATIVE_INFINITY, Double.MAX_VALUE)
|
||||
} else if (value > sorted.last()) {
|
||||
UnivariateBin(Double.POSITIVE_INFINITY, Double.MAX_VALUE)
|
||||
} else {
|
||||
val index = (0 until sorted.size).first { value > sorted[it] }
|
||||
val left = sorted[index]
|
||||
val right = sorted[index + 1]
|
||||
UnivariateBin((left + right) / 2, (right - left))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun UnivariateHistogram.fill(sequence: Iterable<Double>) = sequence.forEach { put(it) }
|
@ -0,0 +1,43 @@
|
||||
package scientifik.kmath.histogram
|
||||
|
||||
import org.junit.Test
|
||||
import scientifik.kmath.linear.Vector
|
||||
import kotlin.random.Random
|
||||
import kotlin.test.assertEquals
|
||||
import kotlin.test.assertFalse
|
||||
import kotlin.test.assertTrue
|
||||
|
||||
class MultivariateHistogramTest {
|
||||
@Test
|
||||
fun testSinglePutHistogram() {
|
||||
val histogram = FastHistogram.fromRanges(
|
||||
(-1.0..1.0),
|
||||
(-1.0..1.0)
|
||||
)
|
||||
histogram.put(0.6, 0.6)
|
||||
val bin = histogram.find { it.value.toInt() > 0 }!!
|
||||
assertTrue { bin.contains(Vector.ofReal(0.6, 0.6)) }
|
||||
assertFalse { bin.contains(Vector.ofReal(-0.6, 0.6)) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSequentialPut(){
|
||||
val histogram = FastHistogram.fromRanges(
|
||||
(-1.0..1.0),
|
||||
(-1.0..1.0),
|
||||
(-1.0..1.0)
|
||||
)
|
||||
val random = Random(1234)
|
||||
|
||||
fun nextDouble() = random.nextDouble(-1.0,1.0)
|
||||
|
||||
val n = 10000
|
||||
|
||||
histogram.fill {
|
||||
repeat(n){
|
||||
yield(Vector.ofReal(nextDouble(),nextDouble(),nextDouble()))
|
||||
}
|
||||
}
|
||||
assertEquals(n, histogram.sumBy { it.value.toInt() })
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user