Histograms for jvm are in working order.

This commit is contained in:
Alexander Nozik 2018-10-28 16:47:54 +03:00
parent c7575caeee
commit c8329eef8a
10 changed files with 334 additions and 14 deletions

View File

@ -0,0 +1,20 @@
package scientifik.kmath.histogram
/*
* Common representation for atomic counters
*/
expect class LongCounter(){
fun decrement()
fun increment()
fun reset()
fun sum(): Long
fun add(l:Long)
}
expect class DoubleCounter(){
fun reset()
fun sum(): Double
fun add(d: Double)
}

View File

@ -1,12 +1,22 @@
package scientifik.kmath.histogram package scientifik.kmath.histogram
import scientifik.kmath.linear.RealVector import scientifik.kmath.linear.RealVector
import scientifik.kmath.linear.toVector
import scientifik.kmath.operations.Space import scientifik.kmath.operations.Space
/**
* A simple geometric domain
* TODO move to geometry module
*/
interface Domain {
operator fun contains(vector: RealVector): Boolean
val dimension: Int
}
/** /**
* The bin in the histogram. The histogram is by definition always done in the real space * The bin in the histogram. The histogram is by definition always done in the real space
*/ */
interface Bin { interface Bin : Domain {
/** /**
* The value of this bin * The value of this bin
*/ */
@ -14,26 +24,36 @@ interface Bin {
val center: RealVector val center: RealVector
} }
/**
* Creates a new bin with zero count corresponding to given point
*/
interface BinFactory<out B : Bin> {
fun createBin(point: RealVector): B
}
interface Histogram<out B : Bin> : Iterable<B> { interface Histogram<out B : Bin> : Iterable<B> {
/** /**
* Find existing bin, corresponding to given coordinates * Find existing bin, corresponding to given coordinates
*/ */
fun findBin(point: RealVector): B? operator fun get(point: RealVector): B?
/** /**
* Dimension of the histogram * Dimension of the histogram
*/ */
val dimension: Int val dimension: Int
/**
* Increment appropriate bin
*/
fun put(point: RealVector)
} }
fun Histogram<*>.put(vararg point: Double) = put(point.toVector())
fun Histogram<*>.fill(sequence: Iterable<RealVector>) = sequence.forEach { put(it) }
/**
* Pass a sequence builder into histogram
*/
fun Histogram<*>.fill(buider: suspend SequenceScope<RealVector>.() -> Unit) = fill(sequence(buider).asIterable())
/**
* A space to perform arithmetic operations on histograms
*/
interface HistogramSpace<B : Bin, H : Histogram<B>> : Space<H> { interface HistogramSpace<B : Bin, H : Histogram<B>> : Space<H> {
/** /**
* Rules for performing operations on bins * Rules for performing operations on bins

View File

@ -162,9 +162,8 @@ abstract class VectorSpace<T : Any>(val size: Int, val field: Field<T>) : Space<
} }
interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>> { interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>>, Iterable<T> {
val size: Int val size: Int get() = context.size
get() = context.size
operator fun get(i: Int): T operator fun get(i: Int): T
@ -181,6 +180,8 @@ interface Vector<T : Any> : SpaceElement<Vector<T>, VectorSpace<T>> {
fun ofReal(size: Int, initializer: (Int) -> Double) = fun ofReal(size: Int, initializer: (Int) -> Double) =
ArrayVector(ArrayVectorSpace(size, DoubleField, realNDFieldFactory), initializer) ArrayVector(ArrayVectorSpace(size, DoubleField, realNDFieldFactory), initializer)
fun ofReal(vararg point: Double) = point.toVector()
fun equals(v1: Vector<*>, v2: Vector<*>): Boolean { fun equals(v1: Vector<*>, v2: Vector<*>): Boolean {
if (v1 === v2) return true if (v1 === v2) return true
if (v1.context != v2.context) return false if (v1.context != v2.context) return false
@ -265,6 +266,10 @@ class ArrayVector<T : Any> internal constructor(override val context: ArrayVecto
} }
override val self: ArrayVector<T> get() = this override val self: ArrayVector<T> get() = this
override fun iterator(): Iterator<T> = (0 until size).map { array[it] }.iterator()
override fun toString(): String = this.joinToString(prefix = "[",postfix = "]", separator = ", "){it.toString()}
} }
typealias RealVector = Vector<Double> typealias RealVector = Vector<Double>
@ -284,6 +289,7 @@ interface LinearSolver<T : Any> {
fun <T : Any> Array<T>.toVector(field: Field<T>) = Vector.of(size, field) { this[it] } fun <T : Any> Array<T>.toVector(field: Field<T>) = Vector.of(size, field) { this[it] }
fun DoubleArray.toVector() = Vector.ofReal(this.size) { this[it] } fun DoubleArray.toVector() = Vector.ofReal(this.size) { this[it] }
fun List<Double>.toVector() = Vector.ofReal(this.size) { this[it] }
/** /**
* Convert matrix to vector if it is possible * Convert matrix to vector if it is possible

View File

@ -105,9 +105,12 @@ abstract class NDField<T>(val shape: IntArray, val field: Field<T>) : Field<NDAr
} }
/** /**
* NDStructure coupled to the context. Emulates Python ndarray * Immutable [NDStructure] coupled to the context. Emulates Python ndarray
*/ */
data class NDArray<T>(override val context: NDField<T>, private val structure: NDStructure<T>) : FieldElement<NDArray<T>, NDField<T>>, NDStructure<T> by structure { data class NDArray<T>(override val context: NDField<T>, private val structure: NDStructure<T>) : FieldElement<NDArray<T>, NDField<T>>, NDStructure<T> by structure {
//TODO ensure structure is immutable
override val self: NDArray<T> override val self: NDArray<T>
get() = this get() = this

View File

@ -76,7 +76,7 @@ class DefaultStrides(override val shape: IntArray) : Strides {
override fun offset(index: IntArray): Int { override fun offset(index: IntArray): Int {
return index.mapIndexed { i, value -> return index.mapIndexed { i, value ->
if (value < 0 || value >= shape[i]) { if (value < 0 || value >= shape[i]) {
throw RuntimeException("Index out of shape bounds: ($i,$value)") throw RuntimeException("Index $value out of shape bounds: (0,${shape[i]})")
} }
value * strides[i] value * strides[i]
}.sum() }.sum()

View File

@ -0,0 +1,16 @@
package scientifik.kmath.histogram
actual class LongCounter{
private var sum: Long = 0
actual fun decrement() {sum--}
actual fun increment() {sum++}
actual fun reset() {sum = 0}
actual fun sum(): Long = sum
actual fun add(l: Long) {sum+=l}
}
actual class DoubleCounter{
private var sum: Double = 0.0
actual fun reset() {sum = 0.0}
actual fun sum(): Double = sum
actual fun add(d: Double) {sum+=d}
}

View File

@ -0,0 +1,7 @@
package scientifik.kmath.histogram
import java.util.concurrent.atomic.DoubleAdder
import java.util.concurrent.atomic.LongAdder
actual typealias LongCounter = LongAdder
actual typealias DoubleCounter = DoubleAdder

View File

@ -0,0 +1,117 @@
package scientifik.kmath.histogram
import scientifik.kmath.linear.RealVector
import scientifik.kmath.linear.toVector
import scientifik.kmath.structures.NDStructure
import scientifik.kmath.structures.ndStructure
import kotlin.math.floor
class MultivariateBin(override val center: RealVector, val sizes: RealVector, val counter: LongCounter = LongCounter()) : Bin {
init {
if (center.size != sizes.size) error("Dimension mismatch in bin creation. Expected ${center.size}, but found ${sizes.size}")
}
override fun contains(vector: RealVector): Boolean {
assert(vector.size == center.size)
return vector.asSequence().mapIndexed { i, value -> value in (center[i] - sizes[i] / 2)..(center[i] + sizes[i] / 2) }.all { it }
}
override val value: Number get() = counter.sum()
internal operator fun inc() = this.also { counter.increment() }
override val dimension: Int get() = center.size
}
/**
* Uniform multivariate histogram with fixed borders. Based on NDStructure implementation with complexity of m for bin search, where m is the number of dimensions
*/
class FastHistogram(
private val lower: RealVector,
private val upper: RealVector,
private val binNums: IntArray = IntArray(lower.size) { 100 }
) : Histogram<MultivariateBin> {
init {
// argument checks
if (lower.size != upper.size) error("Dimension mismatch in histogram lower and upper limits.")
if (lower.size != binNums.size) error("Dimension mismatch in bin count.")
if ((upper - lower).any { it <= 0 }) error("Range for one of axis is not strictly positive")
}
override val dimension: Int get() = lower.size
//TODO optimize binSize performance if needed
private val binSize = (upper - lower).mapIndexed { index, value -> value / binNums[index] }.toVector()
private val bins: NDStructure<MultivariateBin> by lazy {
val actualSizes = IntArray(binNums.size) { binNums[it] + 2 }
ndStructure(actualSizes) { indexArray ->
val center = indexArray.mapIndexed { axis, index ->
when (index) {
0 -> Double.NEGATIVE_INFINITY
actualSizes[axis] -> Double.POSITIVE_INFINITY
else -> lower[axis] + (index - 1) * binSize[axis]
}
}.toVector()
MultivariateBin(center, binSize)
}
}
/**
* Get internal [NDStructure] bin index for given axis
*/
private fun getIndex(axis: Int, value: Double): Int {
return when {
value >= upper[axis] -> binNums[axis] + 1 // overflow
value < lower[axis] -> 0 // underflow
else -> floor((value - lower[axis]) / binSize[axis]).toInt() + 1
}
}
override fun get(point: RealVector): MultivariateBin? {
val index = IntArray(dimension) { getIndex(it, point[it]) }
return bins[index]
}
override fun put(point: RealVector) {
this[point]?.inc() ?: error("Could not find appropriate bin (should not be possible)")
}
override fun iterator(): Iterator<MultivariateBin> = bins.asSequence().map { it.second }.iterator()
companion object {
/**
* Use it like
* ```
*FastHistogram.fromRanges(
* (-1.0..1.0),
* (-1.0..1.0)
*)
*```
*/
fun fromRanges(vararg ranges: ClosedFloatingPointRange<Double>): FastHistogram {
return FastHistogram(ranges.map { it.start }.toVector(), ranges.map { it.endInclusive }.toVector())
}
/**
* Use it like
* ```
*FastHistogram.fromRanges(
* (-1.0..1.0) to 50,
* (-1.0..1.0) to 32
*)
*```
*/
fun fromRanges(vararg ranges: Pair<ClosedFloatingPointRange<Double>,Int>): FastHistogram {
return FastHistogram(
ranges.map { it.first.start }.toVector(),
ranges.map { it.first.endInclusive }.toVector(),
ranges.map { it.second }.toIntArray()
)
}
}
}

View File

@ -0,0 +1,88 @@
package scientifik.kmath.histogram
import scientifik.kmath.linear.RealVector
import scientifik.kmath.linear.toVector
import java.util.*
import kotlin.math.floor
//TODO move to common
class UnivariateBin(val position: Double, val size: Double, val counter: LongCounter = LongCounter()) : Bin {
//TODO add weighting
override val value: Number get() = counter.sum()
override val center: RealVector get() = doubleArrayOf(position).toVector()
operator fun contains(value: Double): Boolean = value in (position - size / 2)..(position + size / 2)
override fun contains(vector: RealVector): Boolean = contains(vector[0])
internal operator fun inc() = this.also { counter.increment()}
override val dimension: Int get() = 1
}
/**
* Univariate histogram with log(n) bin search speed
*/
class UnivariateHistogram private constructor(private val factory: (Double) -> UnivariateBin) : Histogram<UnivariateBin> {
private val bins: TreeMap<Double, UnivariateBin> = TreeMap()
private operator fun get(value: Double): UnivariateBin? {
// check ceiling entry and return it if it is what needed
val ceil = bins.ceilingEntry(value)?.value
if (ceil != null && value in ceil) return ceil
//check floor entry
val floor = bins.floorEntry(value)?.value
if (floor != null && value in floor) return floor
//neither is valid, not found
return null
}
private fun createBin(value: Double): UnivariateBin = factory(value).also {
synchronized(this) { bins.put(it.position, it) }
}
override fun get(point: RealVector): UnivariateBin? = get(point[0])
override val dimension: Int get() = 1
override fun iterator(): Iterator<UnivariateBin> = bins.values.iterator()
/**
* Thread safe put operation
*/
fun put(value: Double) {
(get(value) ?: createBin(value)).inc()
}
override fun put(point: RealVector) = put(point[0])
companion object {
fun uniform(binSize: Double, start: Double = 0.0): UnivariateHistogram {
return UnivariateHistogram { value ->
val center = start + binSize * floor((value - start) / binSize + 0.5)
UnivariateBin(center, binSize)
}
}
fun custom(borders: DoubleArray): UnivariateHistogram {
val sorted = borders.sortedArray()
return UnivariateHistogram { value ->
if (value < sorted.first()) {
UnivariateBin(Double.NEGATIVE_INFINITY, Double.MAX_VALUE)
} else if (value > sorted.last()) {
UnivariateBin(Double.POSITIVE_INFINITY, Double.MAX_VALUE)
} else {
val index = (0 until sorted.size).first { value > sorted[it] }
val left = sorted[index]
val right = sorted[index + 1]
UnivariateBin((left + right) / 2, (right - left))
}
}
}
}
}
fun UnivariateHistogram.fill(sequence: Iterable<Double>) = sequence.forEach { put(it) }

View File

@ -0,0 +1,43 @@
package scientifik.kmath.histogram
import org.junit.Test
import scientifik.kmath.linear.Vector
import kotlin.random.Random
import kotlin.test.assertEquals
import kotlin.test.assertFalse
import kotlin.test.assertTrue
class MultivariateHistogramTest {
@Test
fun testSinglePutHistogram() {
val histogram = FastHistogram.fromRanges(
(-1.0..1.0),
(-1.0..1.0)
)
histogram.put(0.6, 0.6)
val bin = histogram.find { it.value.toInt() > 0 }!!
assertTrue { bin.contains(Vector.ofReal(0.6, 0.6)) }
assertFalse { bin.contains(Vector.ofReal(-0.6, 0.6)) }
}
@Test
fun testSequentialPut(){
val histogram = FastHistogram.fromRanges(
(-1.0..1.0),
(-1.0..1.0),
(-1.0..1.0)
)
val random = Random(1234)
fun nextDouble() = random.nextDouble(-1.0,1.0)
val n = 10000
histogram.fill {
repeat(n){
yield(Vector.ofReal(nextDouble(),nextDouble(),nextDouble()))
}
}
assertEquals(n, histogram.sumBy { it.value.toInt() })
}
}