From c8329eef8a8d21c3fbd0dfac17bf4a16414613f6 Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Sun, 28 Oct 2018 16:47:54 +0300 Subject: [PATCH] Histograms for jvm are in working order. --- .../scientifik/kmath/histogram/Counters.kt | 20 +++ .../scientifik/kmath/histogram/Histogram.kt | 38 ++++-- .../scientifik/kmath/linear/LinearAlgrebra.kt | 12 +- .../scientifik/kmath/structures/NDField.kt | 5 +- .../kmath/structures/NDStructure.kt | 2 +- .../scientifik/kmath/histogram/Counters.kt | 16 +++ .../scientifik/kmath/histogram/Counters.kt | 7 ++ .../kmath/histogram/FastHistogram.kt | 117 ++++++++++++++++++ .../kmath/histogram/UnivariateHistogram.kt | 88 +++++++++++++ .../histogram/MultivariateHistogramTest.kt | 43 +++++++ 10 files changed, 334 insertions(+), 14 deletions(-) create mode 100644 kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Counters.kt create mode 100644 kmath-core/src/jsMain/kotlin/scientifik/kmath/histogram/Counters.kt create mode 100644 kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/Counters.kt create mode 100644 kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/FastHistogram.kt create mode 100644 kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/UnivariateHistogram.kt create mode 100644 kmath-core/src/jvmTest/kotlin/scientifik/kmath/histogram/MultivariateHistogramTest.kt diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Counters.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Counters.kt new file mode 100644 index 000000000..f6cc1f822 --- /dev/null +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Counters.kt @@ -0,0 +1,20 @@ +package scientifik.kmath.histogram + +/* + * Common representation for atomic counters + */ + + +expect class LongCounter(){ + fun decrement() + fun increment() + fun reset() + fun sum(): Long + fun add(l:Long) +} + +expect class DoubleCounter(){ + fun reset() + fun sum(): Double + fun add(d: Double) +} \ No newline at end of file diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Histogram.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Histogram.kt index 0a781e613..79b06ee10 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Histogram.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Histogram.kt @@ -1,12 +1,22 @@ package scientifik.kmath.histogram import scientifik.kmath.linear.RealVector +import scientifik.kmath.linear.toVector import scientifik.kmath.operations.Space +/** + * A simple geometric domain + * TODO move to geometry module + */ +interface Domain { + operator fun contains(vector: RealVector): Boolean + val dimension: Int +} + /** * The bin in the histogram. The histogram is by definition always done in the real space */ -interface Bin { +interface Bin : Domain { /** * The value of this bin */ @@ -14,26 +24,36 @@ interface Bin { val center: RealVector } -/** - * Creates a new bin with zero count corresponding to given point - */ -interface BinFactory { - fun createBin(point: RealVector): B -} - interface Histogram : Iterable { /** * Find existing bin, corresponding to given coordinates */ - fun findBin(point: RealVector): B? + operator fun get(point: RealVector): B? /** * Dimension of the histogram */ val dimension: Int + + /** + * Increment appropriate bin + */ + fun put(point: RealVector) } +fun Histogram<*>.put(vararg point: Double) = put(point.toVector()) + +fun Histogram<*>.fill(sequence: Iterable) = sequence.forEach { put(it) } + +/** + * Pass a sequence builder into histogram + */ +fun Histogram<*>.fill(buider: suspend SequenceScope.() -> Unit) = fill(sequence(buider).asIterable()) + +/** + * A space to perform arithmetic operations on histograms + */ interface HistogramSpace> : Space { /** * Rules for performing operations on bins diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/linear/LinearAlgrebra.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/linear/LinearAlgrebra.kt index 333292997..269a098e3 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/linear/LinearAlgrebra.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/linear/LinearAlgrebra.kt @@ -162,9 +162,8 @@ abstract class VectorSpace(val size: Int, val field: Field) : Space< } -interface Vector : SpaceElement, VectorSpace> { - val size: Int - get() = context.size +interface Vector : SpaceElement, VectorSpace>, Iterable { + val size: Int get() = context.size operator fun get(i: Int): T @@ -181,6 +180,8 @@ interface Vector : SpaceElement, VectorSpace> { fun ofReal(size: Int, initializer: (Int) -> Double) = ArrayVector(ArrayVectorSpace(size, DoubleField, realNDFieldFactory), initializer) + fun ofReal(vararg point: Double) = point.toVector() + fun equals(v1: Vector<*>, v2: Vector<*>): Boolean { if (v1 === v2) return true if (v1.context != v2.context) return false @@ -265,6 +266,10 @@ class ArrayVector internal constructor(override val context: ArrayVecto } override val self: ArrayVector get() = this + + override fun iterator(): Iterator = (0 until size).map { array[it] }.iterator() + + override fun toString(): String = this.joinToString(prefix = "[",postfix = "]", separator = ", "){it.toString()} } typealias RealVector = Vector @@ -284,6 +289,7 @@ interface LinearSolver { fun Array.toVector(field: Field) = Vector.of(size, field) { this[it] } fun DoubleArray.toVector() = Vector.ofReal(this.size) { this[it] } +fun List.toVector() = Vector.ofReal(this.size) { this[it] } /** * Convert matrix to vector if it is possible diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/NDField.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/NDField.kt index 7e0032597..9461bee85 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/NDField.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/NDField.kt @@ -105,9 +105,12 @@ abstract class NDField(val shape: IntArray, val field: Field) : Field(override val context: NDField, private val structure: NDStructure) : FieldElement, NDField>, NDStructure by structure { + + //TODO ensure structure is immutable + override val self: NDArray get() = this diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/NDStructure.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/NDStructure.kt index ba3a91487..a6880362d 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/NDStructure.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/NDStructure.kt @@ -76,7 +76,7 @@ class DefaultStrides(override val shape: IntArray) : Strides { override fun offset(index: IntArray): Int { return index.mapIndexed { i, value -> if (value < 0 || value >= shape[i]) { - throw RuntimeException("Index out of shape bounds: ($i,$value)") + throw RuntimeException("Index $value out of shape bounds: (0,${shape[i]})") } value * strides[i] }.sum() diff --git a/kmath-core/src/jsMain/kotlin/scientifik/kmath/histogram/Counters.kt b/kmath-core/src/jsMain/kotlin/scientifik/kmath/histogram/Counters.kt new file mode 100644 index 000000000..3c2915587 --- /dev/null +++ b/kmath-core/src/jsMain/kotlin/scientifik/kmath/histogram/Counters.kt @@ -0,0 +1,16 @@ +package scientifik.kmath.histogram + +actual class LongCounter{ + private var sum: Long = 0 + actual fun decrement() {sum--} + actual fun increment() {sum++} + actual fun reset() {sum = 0} + actual fun sum(): Long = sum + actual fun add(l: Long) {sum+=l} +} +actual class DoubleCounter{ + private var sum: Double = 0.0 + actual fun reset() {sum = 0.0} + actual fun sum(): Double = sum + actual fun add(d: Double) {sum+=d} +} \ No newline at end of file diff --git a/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/Counters.kt b/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/Counters.kt new file mode 100644 index 000000000..bb3667f7d --- /dev/null +++ b/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/Counters.kt @@ -0,0 +1,7 @@ +package scientifik.kmath.histogram + +import java.util.concurrent.atomic.DoubleAdder +import java.util.concurrent.atomic.LongAdder + +actual typealias LongCounter = LongAdder +actual typealias DoubleCounter = DoubleAdder \ No newline at end of file diff --git a/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/FastHistogram.kt b/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/FastHistogram.kt new file mode 100644 index 000000000..f2f5c04bb --- /dev/null +++ b/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/FastHistogram.kt @@ -0,0 +1,117 @@ +package scientifik.kmath.histogram + +import scientifik.kmath.linear.RealVector +import scientifik.kmath.linear.toVector +import scientifik.kmath.structures.NDStructure +import scientifik.kmath.structures.ndStructure +import kotlin.math.floor + +class MultivariateBin(override val center: RealVector, val sizes: RealVector, val counter: LongCounter = LongCounter()) : Bin { + init { + if (center.size != sizes.size) error("Dimension mismatch in bin creation. Expected ${center.size}, but found ${sizes.size}") + } + + override fun contains(vector: RealVector): Boolean { + assert(vector.size == center.size) + return vector.asSequence().mapIndexed { i, value -> value in (center[i] - sizes[i] / 2)..(center[i] + sizes[i] / 2) }.all { it } + } + + override val value: Number get() = counter.sum() + internal operator fun inc() = this.also { counter.increment() } + + override val dimension: Int get() = center.size +} + +/** + * Uniform multivariate histogram with fixed borders. Based on NDStructure implementation with complexity of m for bin search, where m is the number of dimensions + */ +class FastHistogram( + private val lower: RealVector, + private val upper: RealVector, + private val binNums: IntArray = IntArray(lower.size) { 100 } +) : Histogram { + + init { + // argument checks + if (lower.size != upper.size) error("Dimension mismatch in histogram lower and upper limits.") + if (lower.size != binNums.size) error("Dimension mismatch in bin count.") + if ((upper - lower).any { it <= 0 }) error("Range for one of axis is not strictly positive") + } + + + override val dimension: Int get() = lower.size + + //TODO optimize binSize performance if needed + private val binSize = (upper - lower).mapIndexed { index, value -> value / binNums[index] }.toVector() + + private val bins: NDStructure by lazy { + val actualSizes = IntArray(binNums.size) { binNums[it] + 2 } + ndStructure(actualSizes) { indexArray -> + val center = indexArray.mapIndexed { axis, index -> + when (index) { + 0 -> Double.NEGATIVE_INFINITY + actualSizes[axis] -> Double.POSITIVE_INFINITY + else -> lower[axis] + (index - 1) * binSize[axis] + } + }.toVector() + MultivariateBin(center, binSize) + } + } + + /** + * Get internal [NDStructure] bin index for given axis + */ + private fun getIndex(axis: Int, value: Double): Int { + return when { + value >= upper[axis] -> binNums[axis] + 1 // overflow + value < lower[axis] -> 0 // underflow + else -> floor((value - lower[axis]) / binSize[axis]).toInt() + 1 + } + } + + + override fun get(point: RealVector): MultivariateBin? { + val index = IntArray(dimension) { getIndex(it, point[it]) } + return bins[index] + } + + override fun put(point: RealVector) { + this[point]?.inc() ?: error("Could not find appropriate bin (should not be possible)") + } + + override fun iterator(): Iterator = bins.asSequence().map { it.second }.iterator() + + companion object { + + /** + * Use it like + * ``` + *FastHistogram.fromRanges( + * (-1.0..1.0), + * (-1.0..1.0) + *) + *``` + */ + fun fromRanges(vararg ranges: ClosedFloatingPointRange): FastHistogram { + return FastHistogram(ranges.map { it.start }.toVector(), ranges.map { it.endInclusive }.toVector()) + } + + /** + * Use it like + * ``` + *FastHistogram.fromRanges( + * (-1.0..1.0) to 50, + * (-1.0..1.0) to 32 + *) + *``` + */ + fun fromRanges(vararg ranges: Pair,Int>): FastHistogram { + return FastHistogram( + ranges.map { it.first.start }.toVector(), + ranges.map { it.first.endInclusive }.toVector(), + ranges.map { it.second }.toIntArray() + ) + } + } + +} \ No newline at end of file diff --git a/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/UnivariateHistogram.kt b/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/UnivariateHistogram.kt new file mode 100644 index 000000000..993b1e850 --- /dev/null +++ b/kmath-core/src/jvmMain/kotlin/scientifik/kmath/histogram/UnivariateHistogram.kt @@ -0,0 +1,88 @@ +package scientifik.kmath.histogram + +import scientifik.kmath.linear.RealVector +import scientifik.kmath.linear.toVector +import java.util.* +import kotlin.math.floor + +//TODO move to common + +class UnivariateBin(val position: Double, val size: Double, val counter: LongCounter = LongCounter()) : Bin { + //TODO add weighting + override val value: Number get() = counter.sum() + + override val center: RealVector get() = doubleArrayOf(position).toVector() + + operator fun contains(value: Double): Boolean = value in (position - size / 2)..(position + size / 2) + + override fun contains(vector: RealVector): Boolean = contains(vector[0]) + + internal operator fun inc() = this.also { counter.increment()} + + override val dimension: Int get() = 1 +} + +/** + * Univariate histogram with log(n) bin search speed + */ +class UnivariateHistogram private constructor(private val factory: (Double) -> UnivariateBin) : Histogram { + + private val bins: TreeMap = TreeMap() + + private operator fun get(value: Double): UnivariateBin? { + // check ceiling entry and return it if it is what needed + val ceil = bins.ceilingEntry(value)?.value + if (ceil != null && value in ceil) return ceil + //check floor entry + val floor = bins.floorEntry(value)?.value + if (floor != null && value in floor) return floor + //neither is valid, not found + return null + } + + private fun createBin(value: Double): UnivariateBin = factory(value).also { + synchronized(this) { bins.put(it.position, it) } + } + + override fun get(point: RealVector): UnivariateBin? = get(point[0]) + + override val dimension: Int get() = 1 + + override fun iterator(): Iterator = bins.values.iterator() + + /** + * Thread safe put operation + */ + fun put(value: Double) { + (get(value) ?: createBin(value)).inc() + } + + override fun put(point: RealVector) = put(point[0]) + + companion object { + fun uniform(binSize: Double, start: Double = 0.0): UnivariateHistogram { + return UnivariateHistogram { value -> + val center = start + binSize * floor((value - start) / binSize + 0.5) + UnivariateBin(center, binSize) + } + } + + fun custom(borders: DoubleArray): UnivariateHistogram { + val sorted = borders.sortedArray() + return UnivariateHistogram { value -> + if (value < sorted.first()) { + UnivariateBin(Double.NEGATIVE_INFINITY, Double.MAX_VALUE) + } else if (value > sorted.last()) { + UnivariateBin(Double.POSITIVE_INFINITY, Double.MAX_VALUE) + } else { + val index = (0 until sorted.size).first { value > sorted[it] } + val left = sorted[index] + val right = sorted[index + 1] + UnivariateBin((left + right) / 2, (right - left)) + } + } + } + } +} + +fun UnivariateHistogram.fill(sequence: Iterable) = sequence.forEach { put(it) } diff --git a/kmath-core/src/jvmTest/kotlin/scientifik/kmath/histogram/MultivariateHistogramTest.kt b/kmath-core/src/jvmTest/kotlin/scientifik/kmath/histogram/MultivariateHistogramTest.kt new file mode 100644 index 000000000..e80a7340e --- /dev/null +++ b/kmath-core/src/jvmTest/kotlin/scientifik/kmath/histogram/MultivariateHistogramTest.kt @@ -0,0 +1,43 @@ +package scientifik.kmath.histogram + +import org.junit.Test +import scientifik.kmath.linear.Vector +import kotlin.random.Random +import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertTrue + +class MultivariateHistogramTest { + @Test + fun testSinglePutHistogram() { + val histogram = FastHistogram.fromRanges( + (-1.0..1.0), + (-1.0..1.0) + ) + histogram.put(0.6, 0.6) + val bin = histogram.find { it.value.toInt() > 0 }!! + assertTrue { bin.contains(Vector.ofReal(0.6, 0.6)) } + assertFalse { bin.contains(Vector.ofReal(-0.6, 0.6)) } + } + + @Test + fun testSequentialPut(){ + val histogram = FastHistogram.fromRanges( + (-1.0..1.0), + (-1.0..1.0), + (-1.0..1.0) + ) + val random = Random(1234) + + fun nextDouble() = random.nextDouble(-1.0,1.0) + + val n = 10000 + + histogram.fill { + repeat(n){ + yield(Vector.ofReal(nextDouble(),nextDouble(),nextDouble())) + } + } + assertEquals(n, histogram.sumBy { it.value.toInt() }) + } +} \ No newline at end of file