From d799668083af8cbd32be58bde37b8b78d6eb8d55 Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Sat, 24 Nov 2018 20:55:35 +0300 Subject: [PATCH] Faster implementation for FastHistogram --- .../kmath/histogram/FastHistogram.kt | 107 ++++++++++-------- .../scientifik/kmath/histogram/Histogram.kt | 2 + .../kmath/histogram/PhantomHistogram.kt | 7 +- .../kotlin/scientifik/kmath/linear/Vector.kt | 14 ++- .../scientifik/kmath/structures/Buffers.kt | 4 +- 5 files changed, 77 insertions(+), 57 deletions(-) diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/FastHistogram.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/FastHistogram.kt index f9388f668..37f46b1d7 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/FastHistogram.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/FastHistogram.kt @@ -1,44 +1,48 @@ package scientifik.kmath.histogram import scientifik.kmath.linear.toVector -import scientifik.kmath.structures.Buffer -import scientifik.kmath.structures.ListBuffer -import scientifik.kmath.structures.NDStructure -import scientifik.kmath.structures.ndStructure +import scientifik.kmath.structures.* import kotlin.math.floor -typealias RealPoint = Point - private operator fun RealPoint.minus(other: RealPoint) = ListBuffer((0 until size).map { get(it) - other[it] }) private inline fun Buffer.mapIndexed(crossinline mapper: (Int, Double) -> T): Sequence = (0 until size).asSequence().map { mapper(it, get(it)) } -class MultivariateBin(override val center: RealPoint, val sizes: RealPoint, var counter: Long = 0) : Bin { - init { - if (center.size != sizes.size) error("Dimension mismatch in bin creation. Expected ${center.size}, but found ${sizes.size}") - } - - override fun contains(vector: Buffer): Boolean { - if (vector.size != center.size) error("Dimension mismatch for input vector. Expected ${center.size}, but found ${vector.size}") - return vector.mapIndexed { i, value -> value in (center[i] - sizes[i] / 2)..(center[i] + sizes[i] / 2) }.all { it } - } - - override val value get() = counter - internal operator fun inc() = this.also { counter++ } - - override val dimension: Int get() = center.size -} +//class MultivariateBin(override val center: RealPoint, val sizes: RealPoint, var counter: Long = 0) : Bin { +// init { +// if (center.size != sizes.size) error("Dimension mismatch in bin creation. Expected ${center.size}, but found ${sizes.size}") +// } +// +// override fun contains(vector: Buffer): Boolean { +// if (vector.size != center.size) error("Dimension mismatch for input vector. Expected ${center.size}, but found ${vector.size}") +// return vector.mapIndexed { i, value -> value in (center[i] - sizes[i] / 2)..(center[i] + sizes[i] / 2) }.all { it } +// } +// +// override val value get() = counter +// internal operator fun inc() = this.also { counter++ } +// +// override val dimension: Int get() = center.size +//} /** * Uniform multivariate histogram with fixed borders. Based on NDStructure implementation with complexity of m for bin search, where m is the number of dimensions. - * The histogram is optimized for speed, but have large size in memory */ class FastHistogram( private val lower: RealPoint, private val upper: RealPoint, private val binNums: IntArray = IntArray(lower.size) { 20 } -) : MutableHistogram { +) : MutableHistogram> { + + + private val strides = DefaultStrides(IntArray(binNums.size) { binNums[it] + 2 }) + + private val values: NDStructure = ndStructure(strides) { LongCounter() } + + //private val weight: NDStructure = ndStructure(strides){null} + + //TODO optimize binSize performance if needed + private val binSize: RealPoint = ListBuffer((upper - lower).mapIndexed { index, value -> value / binNums[index] }.toList()) init { // argument checks @@ -50,24 +54,6 @@ class FastHistogram( override val dimension: Int get() = lower.size - //TODO optimize binSize performance if needed - private val binSize: RealPoint = ListBuffer((upper - lower).mapIndexed { index, value -> value / binNums[index] }.toList()) - - private val bins: NDStructure by lazy { - val actualSizes = IntArray(binNums.size) { binNums[it] + 2 } - ndStructure(actualSizes) { indexArray -> - val center = ListBuffer( - indexArray.mapIndexed { axis, index -> - when (index) { - 0 -> Double.NEGATIVE_INFINITY - actualSizes[axis] - 1 -> Double.POSITIVE_INFINITY - else -> lower[axis] + (index.toDouble() - 0.5) * binSize[axis] - } - } - ) - MultivariateBin(center, binSize) - } - } /** * Get internal [NDStructure] bin index for given axis @@ -80,24 +66,51 @@ class FastHistogram( } } + private fun getIndex(point: Buffer): IntArray = IntArray(dimension) { getIndex(it, point[it]) } - override fun get(point: Buffer): MultivariateBin? { - val index = IntArray(dimension) { getIndex(it, point[it]) } - return bins[index] + private fun getValue(index: IntArray): Long { + return values[index].sum() + } + + fun getValue(point: Buffer): Long { + return getValue(getIndex(point)) + } + + private fun getTemplate(index: IntArray): BinTemplate { + val center = index.mapIndexed { axis, i -> + when (i) { + 0 -> Double.NEGATIVE_INFINITY + strides.shape[axis] - 1 -> Double.POSITIVE_INFINITY + else -> lower[axis] + (i.toDouble() - 0.5) * binSize[axis] + } + }.toVector() + return BinTemplate(center, binSize) + } + + fun getTemplate(point: Buffer): BinTemplate { + return getTemplate(getIndex(point)) + } + + override fun get(point: Buffer): PhantomBin? { + val index = getIndex(point) + return PhantomBin(getTemplate(index), getValue(index)) } override fun put(point: Buffer, weight: Double) { if (weight != 1.0) TODO("Implement weighting") - this[point]?.inc() ?: error("Could not find appropriate bin (should not be possible)") + val index = getIndex(point) + values[index].increment() } - override fun iterator(): Iterator = bins.asSequence().map { it.second }.iterator() + override fun iterator(): Iterator> = values.asSequence().map { (index, value) -> + PhantomBin(getTemplate(index), value.sum()) + }.iterator() /** * Convert this histogram into NDStructure containing bin values but not bin descriptions */ fun asND(): NDStructure { - return ndStructure(this.bins.shape) { bins[it].value } + return ndStructure(this.values.shape) { values[it].sum() } } // /** diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Histogram.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Histogram.kt index 8e429087a..86110f140 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Histogram.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/Histogram.kt @@ -6,6 +6,8 @@ import scientifik.kmath.structures.DoubleBuffer typealias Point = Buffer +typealias RealPoint = Buffer + /** * A simple geometric domain * TODO move to geometry module diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/PhantomHistogram.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/PhantomHistogram.kt index 25a1bf33a..ffffb0d7d 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/PhantomHistogram.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/histogram/PhantomHistogram.kt @@ -3,12 +3,13 @@ package scientifik.kmath.histogram import scientifik.kmath.linear.Vector import scientifik.kmath.operations.Space import scientifik.kmath.structures.NDStructure +import scientifik.kmath.structures.asSequence -data class BinTemplate>(val center: Vector, val sizes: Vector) { +data class BinTemplate>(val center: Vector, val sizes: Point) { fun contains(vector: Point): Boolean { if (vector.size != center.size) error("Dimension mismatch for input vector. Expected ${center.size}, but found ${vector.size}") - val upper = center + sizes / 2.0 - val lower = center - sizes / 2.0 + val upper = center.context.run { center + sizes / 2.0} + val lower = center.context.run {center - sizes / 2.0} return vector.asSequence().mapIndexed { i, value -> value in lower[i]..upper[i] }.all { it } diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/linear/Vector.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/linear/Vector.kt index 38bc54ab6..755d58d9b 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/linear/Vector.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/linear/Vector.kt @@ -39,18 +39,24 @@ interface Vector> : SpaceElement, VectorSpace> of(size: Int, field: F, initializer: (Int) -> T) = + fun > of(size: Int, field: F, initializer: (Int) -> T) = ArrayVector(ArrayVectorSpace(size, field), initializer) + private val realSpaceCache = HashMap>() + + private fun getRealSpace(size: Int): ArrayVectorSpace { + return realSpaceCache.getOrPut(size){ArrayVectorSpace(size, DoubleField, realNDFieldFactory)} + } + /** * Create vector of [Double] */ fun ofReal(size: Int, initializer: (Int) -> Double) = - ArrayVector(ArrayVectorSpace(size, DoubleField, realNDFieldFactory), initializer) + ArrayVector(getRealSpace(size), initializer) fun ofReal(vararg point: Double) = point.toVector() - fun equals(v1: Vector<*,*>, v2: Vector<*,*>): Boolean { + fun equals(v1: Vector<*, *>, v2: Vector<*, *>): Boolean { if (v1 === v2) return true if (v1.context != v2.context) return false for (i in 0 until v2.size) { @@ -74,8 +80,6 @@ class ArrayVectorSpace>( } - - class ArrayVector> internal constructor(override val context: VectorSpace, val element: NDElement) : Vector { constructor(context: ArrayVectorSpace, initializer: (Int) -> T) : this(context, context.ndField.produce { list -> initializer(list[0]) }) diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/Buffers.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/Buffers.kt index 5b80fa135..a4bb76060 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/Buffers.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/Buffers.kt @@ -12,14 +12,14 @@ interface Buffer { operator fun iterator(): Iterator - fun asSequence(): Sequence = iterator().asSequence() - /** * A shallow copy of the buffer */ fun copy(): Buffer } +fun Buffer.asSequence(): Sequence = iterator().asSequence() + interface MutableBuffer : Buffer { operator fun set(index: Int, value: T)