diff --git a/examples/build.gradle.kts b/examples/build.gradle.kts index 5dd40b609..a48b4d0d9 100644 --- a/examples/build.gradle.kts +++ b/examples/build.gradle.kts @@ -106,6 +106,7 @@ kotlin.sourceSets.all { with(languageSettings) { useExperimentalAnnotation("kotlin.contracts.ExperimentalContracts") useExperimentalAnnotation("kotlin.ExperimentalUnsignedTypes") + useExperimentalAnnotation("space.kscience.kmath.misc.UnstableKMathAPI") } } diff --git a/examples/src/main/kotlin/space/kscience/kmath/commons/fit/fitWithAutoDiff.kt b/examples/src/main/kotlin/space/kscience/kmath/commons/fit/fitWithAutoDiff.kt index ca4d9c181..04c55b34c 100644 --- a/examples/src/main/kotlin/space/kscience/kmath/commons/fit/fitWithAutoDiff.kt +++ b/examples/src/main/kotlin/space/kscience/kmath/commons/fit/fitWithAutoDiff.kt @@ -13,9 +13,8 @@ import space.kscience.kmath.optimization.OptimizationResult import space.kscience.kmath.real.DoubleVector import space.kscience.kmath.real.map import space.kscience.kmath.real.step -import space.kscience.kmath.stat.Distribution import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.normal +import space.kscience.kmath.stat.distributions.NormalDistribution import space.kscience.kmath.structures.asIterable import space.kscience.kmath.structures.toList import kotlin.math.pow @@ -37,10 +36,9 @@ operator fun TraceValues.invoke(vector: DoubleVector) { /** * Least squares fie with auto-differentiation. Uses `kmath-commons` and `kmath-for-real` modules. */ -fun main() { - +suspend fun main() { //A generator for a normally distributed values - val generator = Distribution.normal() + val generator = NormalDistribution(2.0, 7.0) //A chain/flow of random values with the given seed val chain = generator.sample(RandomGenerator.default(112667)) @@ -53,7 +51,7 @@ fun main() { //Perform an operation on each x value (much more effective, than numpy) val y = x.map { val value = it.pow(2) + it + 1 - value + chain.nextDouble() * sqrt(value) + value + chain.next() * sqrt(value) } // this will also work, but less effective: // val y = x.pow(2)+ x + 1 + chain.nextDouble() @@ -103,4 +101,4 @@ fun main() { } page.makeFile() -} \ No newline at end of file +} diff --git a/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionBenchmark.kt b/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionBenchmark.kt index 1761ed1b5..bfd138502 100644 --- a/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionBenchmark.kt +++ b/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionBenchmark.kt @@ -1,23 +1,24 @@ -package kscience.kmath.commons.prob +package space.kscience.kmath.stat import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.async import kotlinx.coroutines.runBlocking -import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler +import space.kscience.kmath.stat.samplers.GaussianSampler import org.apache.commons.rng.simple.RandomSource -import space.kscience.kmath.stat.* import java.time.Duration import java.time.Instant +import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler +import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler as CMZigguratNormalizedGaussianSampler -private fun runChain(): Duration { +private suspend fun runKMathChained(): Duration { val generator = RandomGenerator.fromSource(RandomSource.MT, 123L) - val normal = Distribution.normal(NormalSamplerMethod.Ziggurat) - val chain = normal.sample(generator) + val normal = GaussianSampler.of(7.0, 2.0) + val chain = normal.sample(generator).blocking() val startTime = Instant.now() var sum = 0.0 repeat(10000001) { counter -> - sum += chain.nextDouble() + sum += chain.next() if (counter % 100000 == 0) { val duration = Duration.between(startTime, Instant.now()) @@ -29,9 +30,15 @@ private fun runChain(): Duration { return Duration.between(startTime, Instant.now()) } -private fun runDirect(): Duration { - val provider = RandomSource.create(RandomSource.MT, 123L) - val sampler = ZigguratNormalizedGaussianSampler(provider) +private fun runApacheDirect(): Duration { + val rng = RandomSource.create(RandomSource.MT, 123L) + + val sampler = CMGaussianSampler.of( + CMZigguratNormalizedGaussianSampler.of(rng), + 7.0, + 2.0 + ) + val startTime = Instant.now() var sum = 0.0 @@ -51,11 +58,9 @@ private fun runDirect(): Duration { /** * Comparing chain sampling performance with direct sampling performance */ -fun main() { - runBlocking(Dispatchers.Default) { - val chainJob = async { runChain() } - val directJob = async { runDirect() } - println("Chain: ${chainJob.await()}") - println("Direct: ${directJob.await()}") - } +fun main(): Unit = runBlocking(Dispatchers.Default) { + val chainJob = async { runKMathChained() } + val directJob = async { runApacheDirect() } + println("KMath Chained: ${chainJob.await()}") + println("Apache Direct: ${directJob.await()}") } diff --git a/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionDemo.kt b/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionDemo.kt index 47b8d8111..aac7d51d4 100644 --- a/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionDemo.kt +++ b/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionDemo.kt @@ -3,14 +3,15 @@ package space.kscience.kmath.stat import kotlinx.coroutines.runBlocking import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.collectWithState +import space.kscience.kmath.stat.distributions.NormalDistribution /** - * The state of distribution averager + * The state of distribution averager. */ private data class AveragingChainState(var num: Int = 0, var value: Double = 0.0) /** - * Averaging + * Averaging. */ private fun Chain.mean(): Chain = collectWithState(AveragingChainState(), { it.copy() }) { chain -> val next = chain.next() @@ -21,7 +22,7 @@ private fun Chain.mean(): Chain = collectWithState(AveragingChai fun main() { - val normal = Distribution.normal() + val normal = NormalDistribution(0.0, 2.0) val chain = normal.sample(RandomGenerator.default).mean() runBlocking { @@ -32,4 +33,4 @@ fun main() { } } } -} \ No newline at end of file +} diff --git a/examples/src/main/kotlin/space/kscience/kmath/structures/StructureReadBenchmark.kt b/examples/src/main/kotlin/space/kscience/kmath/structures/StructureReadBenchmark.kt index 1c8a923c7..8b0a2ed0e 100644 --- a/examples/src/main/kotlin/space/kscience/kmath/structures/StructureReadBenchmark.kt +++ b/examples/src/main/kotlin/space/kscience/kmath/structures/StructureReadBenchmark.kt @@ -34,4 +34,4 @@ fun main() { strides.indices().forEach { res = array[strides.offset(it)] } } println("Array reading finished in $time3 millis") -} \ No newline at end of file +} diff --git a/kmath-commons/src/test/kotlin/space/kscience/kmath/commons/optimization/OptimizeTest.kt b/kmath-commons/src/test/kotlin/space/kscience/kmath/commons/optimization/OptimizeTest.kt index 5f5d3e1e4..36f2639f4 100644 --- a/kmath-commons/src/test/kotlin/space/kscience/kmath/commons/optimization/OptimizeTest.kt +++ b/kmath-commons/src/test/kotlin/space/kscience/kmath/commons/optimization/OptimizeTest.kt @@ -1,13 +1,13 @@ package space.kscience.kmath.commons.optimization -import org.junit.jupiter.api.Test +import kotlinx.coroutines.runBlocking import space.kscience.kmath.commons.expressions.DerivativeStructureExpression import space.kscience.kmath.misc.symbol import space.kscience.kmath.optimization.FunctionOptimization -import space.kscience.kmath.stat.Distribution import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.normal +import space.kscience.kmath.stat.distributions.NormalDistribution import kotlin.math.pow +import kotlin.test.Test internal class OptimizeTest { val x by symbol @@ -34,23 +34,24 @@ internal class OptimizeTest { simplexSteps(x to 2.0, y to 0.5) //this sets simplex optimizer } + println(result.point) println(result.value) } @Test - fun testCmFit() { + fun testCmFit() = runBlocking { val a by symbol val b by symbol val c by symbol val sigma = 1.0 - val generator = Distribution.normal(0.0, sigma) + val generator = NormalDistribution(0.0, sigma) val chain = generator.sample(RandomGenerator.default(112667)) val x = (1..100).map(Int::toDouble) val y = x.map { - it.pow(2) + it + 1 + chain.nextDouble() + it.pow(2) + it + 1 + chain.next() } val yErr = List(x.size) { sigma } @@ -64,5 +65,4 @@ internal class OptimizeTest { println(result) println("Chi2/dof = ${result.value / (x.size - 3)}") } - -} \ No newline at end of file +} diff --git a/kmath-core/src/commonMain/kotlin/space/kscience/kmath/operations/BigInt.kt b/kmath-core/src/commonMain/kotlin/space/kscience/kmath/operations/BigInt.kt index b5e27575b..18fbf0fdd 100644 --- a/kmath-core/src/commonMain/kotlin/space/kscience/kmath/operations/BigInt.kt +++ b/kmath-core/src/commonMain/kotlin/space/kscience/kmath/operations/BigInt.kt @@ -241,18 +241,18 @@ public class BigInt internal constructor( ) private fun compareMagnitudes(mag1: Magnitude, mag2: Magnitude): Int { - when { - mag1.size > mag2.size -> return 1 - mag1.size < mag2.size -> return -1 + return when { + mag1.size > mag2.size -> 1 + mag1.size < mag2.size -> -1 + else -> { - for (i in mag1.size - 1 downTo 0) { - if (mag1[i] > mag2[i]) { - return 1 - } else if (mag1[i] < mag2[i]) { - return -1 - } + for (i in mag1.size - 1 downTo 0) return when { + mag1[i] > mag2[i] -> 1 + mag1[i] < mag2[i] -> -1 + else -> continue } - return 0 + + 0 } } } @@ -302,10 +302,11 @@ public class BigInt internal constructor( var carry = 0uL for (i in mag.indices) { - val cur: ULong = carry + mag[i].toULong() * x.toULong() + val cur = carry + mag[i].toULong() * x.toULong() result[i] = (cur and BASE).toUInt() carry = cur shr BASE_SIZE } + result[resultLength - 1] = (carry and BASE).toUInt() return stripLeadingZeros(result) diff --git a/kmath-core/src/commonMain/kotlin/space/kscience/kmath/structures/Buffer.kt b/kmath-core/src/commonMain/kotlin/space/kscience/kmath/structures/Buffer.kt index 7ce098ed1..168a92c37 100644 --- a/kmath-core/src/commonMain/kotlin/space/kscience/kmath/structures/Buffer.kt +++ b/kmath-core/src/commonMain/kotlin/space/kscience/kmath/structures/Buffer.kt @@ -40,7 +40,6 @@ public interface Buffer { public operator fun iterator(): Iterator public companion object { - /** * Check the element-by-element match of content of two buffers. */ @@ -110,7 +109,6 @@ public interface MutableBuffer : Buffer { public fun copy(): MutableBuffer public companion object { - /** * Creates a [DoubleBuffer] with the specified [size], where each element is calculated by calling the specified * [initializer] function. diff --git a/kmath-core/src/commonTest/kotlin/space/kscience/kmath/structures/NumberNDFieldTest.kt b/kmath-core/src/commonTest/kotlin/space/kscience/kmath/structures/NumberNDFieldTest.kt index fb67f0308..376415a56 100644 --- a/kmath-core/src/commonTest/kotlin/space/kscience/kmath/structures/NumberNDFieldTest.kt +++ b/kmath-core/src/commonTest/kotlin/space/kscience/kmath/structures/NumberNDFieldTest.kt @@ -38,12 +38,11 @@ class NumberNDFieldTest { (i * 10 + j).toDouble() } - for (i in 0..2) { + for (i in 0..2) for (j in 0..2) { val expected = (i * 10 + j).toDouble() assertEquals(expected, array[i, j], "Error at index [$i, $j]") } - } } @Test diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingDoubleChain.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingDoubleChain.kt index ba6adf35b..d024147b4 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingDoubleChain.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingDoubleChain.kt @@ -1,12 +1,13 @@ package space.kscience.kmath.chains /** - * Performance optimized chain for real values + * Chunked, specialized chain for real values. */ -public abstract class BlockingDoubleChain : Chain { - public abstract fun nextDouble(): Double +public interface BlockingDoubleChain : Chain { + public override suspend fun next(): Double - override suspend fun next(): Double = nextDouble() - - public open fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { nextDouble() } + /** + * Returns an [DoubleArray] chunk of [size] values of [next]. + */ + public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() } } diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingIntChain.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingIntChain.kt index 11da7e503..fb2e453ad 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingIntChain.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingIntChain.kt @@ -3,10 +3,7 @@ package space.kscience.kmath.chains /** * Performance optimized chain for integer values */ -public abstract class BlockingIntChain : Chain { - public abstract fun nextInt(): Int - - override suspend fun next(): Int = nextInt() - - public fun nextBlock(size: Int): IntArray = IntArray(size) { nextInt() } +public interface BlockingIntChain : Chain { + public override suspend fun next(): Int + public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() } } diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/Chain.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/Chain.kt index 26d078fcb..a961f2e09 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/Chain.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/Chain.kt @@ -63,12 +63,10 @@ public class MarkovChain(private val seed: suspend () -> R, private public fun value(): R? = value - public override suspend fun next(): R { - mutex.withLock { - val newValue = gen(value ?: seed()) - value = newValue - return newValue - } + public override suspend fun next(): R = mutex.withLock { + val newValue = gen(value ?: seed()) + value = newValue + newValue } public override fun fork(): Chain = MarkovChain(seed = { value ?: seed() }, gen = gen) @@ -90,12 +88,10 @@ public class StatefulChain( public fun value(): R? = value - public override suspend fun next(): R { - mutex.withLock { - val newValue = state.gen(value ?: state.seed()) - value = newValue - return newValue - } + public override suspend fun next(): R = mutex.withLock { + val newValue = state.gen(value ?: state.seed()) + value = newValue + newValue } public override fun fork(): Chain = StatefulChain(forkState(state), seed, forkState, gen) diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/BufferFlow.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/BufferFlow.kt index c271f8889..dc1dd4757 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/BufferFlow.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/BufferFlow.kt @@ -28,7 +28,7 @@ public fun Flow.chunked(bufferSize: Int, bufferFactory: BufferFactory) var counter = 0 this@chunked.collect { element -> - list.add(element) + list += element counter++ if (counter == bufferSize) { diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/RingBuffer.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/RingBuffer.kt index f81ad2f0d..e844b765d 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/RingBuffer.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/RingBuffer.kt @@ -48,11 +48,9 @@ public class RingBuffer( /** * A safe snapshot operation */ - public suspend fun snapshot(): Buffer { - mutex.withLock { - val copy = buffer.copy() - return VirtualBuffer(size) { i -> copy[startIndex.forward(i)] as T } - } + public suspend fun snapshot(): Buffer = mutex.withLock { + val copy = buffer.copy() + VirtualBuffer(size) { i -> copy[startIndex.forward(i)] as T } } public suspend fun push(element: T) { diff --git a/kmath-dimensions/src/commonTest/kotlin/kscience/dimensions/DMatrixContextTest.kt b/kmath-dimensions/src/commonTest/kotlin/kscience/dimensions/DMatrixContextTest.kt index e2a9628ac..58ed82723 100644 --- a/kmath-dimensions/src/commonTest/kotlin/kscience/dimensions/DMatrixContextTest.kt +++ b/kmath-dimensions/src/commonTest/kotlin/kscience/dimensions/DMatrixContextTest.kt @@ -1,4 +1,4 @@ -package kscience.dimensions +package space.kscience.dimensions import space.kscience.kmath.dimensions.D2 import space.kscience.kmath.dimensions.D3 diff --git a/kmath-functions/src/commonMain/kotlin/space/kscience/kmath/functions/Piecewise.kt b/kmath-functions/src/commonMain/kotlin/space/kscience/kmath/functions/Piecewise.kt index 3510973be..2477af618 100644 --- a/kmath-functions/src/commonMain/kotlin/space/kscience/kmath/functions/Piecewise.kt +++ b/kmath-functions/src/commonMain/kotlin/space/kscience/kmath/functions/Piecewise.kt @@ -38,8 +38,8 @@ public class OrderedPiecewisePolynomial>(delimiter: T) : */ public fun putRight(right: T, piece: Polynomial) { require(right > delimiters.last()) { "New delimiter should be to the right of old one" } - delimiters.add(right) - pieces.add(piece) + delimiters += right + pieces += piece } /** diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Distribution.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Distribution.kt index 25f6a87b5..095182160 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Distribution.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Distribution.kt @@ -1,17 +1,29 @@ package space.kscience.kmath.stat +import kotlinx.coroutines.flow.first import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.collect import space.kscience.kmath.structures.Buffer import space.kscience.kmath.structures.BufferFactory -import space.kscience.kmath.structures.DoubleBuffer +import space.kscience.kmath.structures.IntBuffer +import space.kscience.kmath.structures.MutableBuffer +import kotlin.jvm.JvmName -public interface Sampler { +/** + * Sampler that generates chains of values of type [T]. + */ +public fun interface Sampler { + /** + * Generates a chain of samples. + * + * @param generator the randomness provider. + * @return the new chain. + */ public fun sample(generator: RandomGenerator): Chain } /** - * A distribution of typed objects + * A distribution of typed objects. */ public interface Distribution : Sampler { /** @@ -20,11 +32,7 @@ public interface Distribution : Sampler { */ public fun probability(arg: T): Double - /** - * Create a chain of samples from this distribution. - * The chain is not guaranteed to be stateless, but different sample chains should be independent. - */ - override fun sample(generator: RandomGenerator): Chain + public override fun sample(generator: RandomGenerator): Chain /** * An empty companion. Distribution factories should be written as its extensions @@ -63,16 +71,27 @@ public fun Sampler.sampleBuffer( //clear list from previous run tmp.clear() //Fill list - repeat(size) { - tmp.add(chain.next()) - } + repeat(size) { tmp += chain.next() } //return new buffer with elements from tmp bufferFactory(size) { tmp[it] } } } /** - * Generate a bunch of samples from real distributions + * Samples one value from this [Sampler]. */ +public suspend fun Sampler.next(generator: RandomGenerator): T = sample(generator).first() + +/** + * Generates [size] real samples and chunks them into some buffers. + */ +@JvmName("sampleRealBuffer") public fun Sampler.sampleBuffer(generator: RandomGenerator, size: Int): Chain> = - sampleBuffer(generator, size, ::DoubleBuffer) + sampleBuffer(generator, size, MutableBuffer.Companion::double) + +/** + * Generates [size] integer samples and chunks them into some buffers. + */ +@JvmName("sampleIntBuffer") +public fun Sampler.sampleBuffer(generator: RandomGenerator, size: Int): Chain> = + sampleBuffer(generator, size, ::IntBuffer) diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/FactorizedDistribution.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/FactorizedDistribution.kt index ff7a13652..3dd506b67 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/FactorizedDistribution.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/FactorizedDistribution.kt @@ -14,7 +14,7 @@ public interface NamedDistribution : Distribution> public class FactorizedDistribution(public val distributions: Collection>) : NamedDistribution { override fun probability(arg: Map): Double = - distributions.fold(1.0) { acc, distr -> acc * distr.probability(arg) } + distributions.fold(1.0) { acc, dist -> acc * dist.probability(arg) } override fun sample(generator: RandomGenerator): Chain> { val chains = distributions.map { it.sample(generator) } @@ -38,6 +38,6 @@ public class DistributionBuilder { private val distributions = ArrayList>() public infix fun String.to(distribution: Distribution) { - distributions.add(NamedDistributionWrapper(this, distribution)) + distributions += NamedDistributionWrapper(this, distribution) } } diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomChain.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomChain.kt index 978094ffd..2f117a035 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomChain.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomChain.kt @@ -1,17 +1,22 @@ package space.kscience.kmath.stat +import space.kscience.kmath.chains.BlockingDoubleChain +import space.kscience.kmath.chains.BlockingIntChain import space.kscience.kmath.chains.Chain /** * A possibly stateful chain producing random values. + * + * @property generator the underlying [RandomGenerator] instance. */ public class RandomChain( public val generator: RandomGenerator, - private val gen: suspend RandomGenerator.() -> R, + private val gen: suspend RandomGenerator.() -> R ) : Chain { override suspend fun next(): R = generator.gen() - override fun fork(): Chain = RandomChain(generator.fork(), gen) } -public fun RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain = RandomChain(this, gen) \ No newline at end of file +public fun RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain = RandomChain(this, gen) +public fun Chain.blocking(): BlockingDoubleChain = object : Chain by this, BlockingDoubleChain {} +public fun Chain.blocking(): BlockingIntChain = object : Chain by this, BlockingIntChain {} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomGenerator.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomGenerator.kt index 1a4f3b75d..bad2334e9 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomGenerator.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomGenerator.kt @@ -82,6 +82,8 @@ public interface RandomGenerator { /** * Implements [RandomGenerator] by delegating all operations to [Random]. + * + * @property random the underlying [Random] object. */ public class DefaultGenerator(public val random: Random = Random) : RandomGenerator { public override fun nextBoolean(): Boolean = random.nextBoolean() diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/SamplerAlgebra.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/SamplerAlgebra.kt index c5ec99dae..25ec7eca6 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/SamplerAlgebra.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/SamplerAlgebra.kt @@ -8,16 +8,28 @@ import space.kscience.kmath.operations.Group import space.kscience.kmath.operations.ScaleOperations import space.kscience.kmath.operations.invoke -public class BasicSampler(public val chainBuilder: (RandomGenerator) -> Chain) : Sampler { - public override fun sample(generator: RandomGenerator): Chain = chainBuilder(generator) -} - +/** + * Implements [Sampler] by sampling only certain [value]. + * + * @property value the value to sample. + */ public class ConstantSampler(public val value: T) : Sampler { public override fun sample(generator: RandomGenerator): Chain = ConstantChain(value) } /** - * A space for samplers. Allows to perform simple operations on distributions + * Implements [Sampler] by delegating sampling to value of [chainBuilder]. + * + * @property chainBuilder the provider of [Chain]. + */ +public class BasicSampler(public val chainBuilder: (RandomGenerator) -> Chain) : Sampler { + public override fun sample(generator: RandomGenerator): Chain = chainBuilder(generator) +} + +/** + * A space of samplers. Allows to perform simple operations on distributions. + * + * @property algebra the space to provide addition and scalar multiplication for [T]. */ public class SamplerSpace(public val algebra: S) : Group>, ScaleOperations> where S : Group, S : ScaleOperations { @@ -29,8 +41,10 @@ public class SamplerSpace(public val algebra: S) : Group> } public override fun scale(a: Sampler, value: Double): Sampler = BasicSampler { generator -> - a.sample(generator).map { algebra { it * value } } + a.sample(generator).map { a -> + algebra { a * value } + } } - override fun Sampler.unaryMinus(): Sampler = scale(this, -1.0) + public override fun Sampler.unaryMinus(): Sampler = scale(this, -1.0) } diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/distributions/NormalDistribution.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/distributions/NormalDistribution.kt new file mode 100644 index 000000000..6515cbaa7 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/distributions/NormalDistribution.kt @@ -0,0 +1,41 @@ +package space.kscience.kmath.stat.distributions + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.UnivariateDistribution +import space.kscience.kmath.stat.internal.InternalErf +import space.kscience.kmath.stat.samplers.GaussianSampler +import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler +import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler +import kotlin.math.* + +/** + * Implements [UnivariateDistribution] for the normal (gaussian) distribution. + */ +public inline class NormalDistribution(public val sampler: GaussianSampler) : UnivariateDistribution { + public constructor( + mean: Double, + standardDeviation: Double, + normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(), + ) : this(GaussianSampler.of(mean, standardDeviation, normalized)) + + public override fun probability(arg: Double): Double { + val x1 = (arg - sampler.mean) / sampler.standardDeviation + return exp(-0.5 * x1 * x1 - (ln(sampler.standardDeviation) + 0.5 * ln(2 * PI))) + } + + public override fun sample(generator: RandomGenerator): Chain = sampler.sample(generator) + + public override fun cumulative(arg: Double): Double { + val dev = arg - sampler.mean + + return when { + abs(dev) > 40 * sampler.standardDeviation -> if (dev < 0) 0.0 else 1.0 + else -> 0.5 * InternalErf.erfc(-dev / (sampler.standardDeviation * SQRT2)) + } + } + + private companion object { + private val SQRT2 = sqrt(2.0) + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalErf.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalErf.kt new file mode 100644 index 000000000..4e1623867 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalErf.kt @@ -0,0 +1,15 @@ +package space.kscience.kmath.stat.internal + +import kotlin.math.abs + +/** + * Based on Commons Math implementation. + * See [https://commons.apache.org/proper/commons-math/javadocs/api-3.3/org/apache/commons/math3/special/Erf.html]. + */ +internal object InternalErf { + fun erfc(x: Double): Double { + if (abs(x) > 40) return if (x > 0) 0.0 else 2.0 + val ret = InternalGamma.regularizedGammaQ(0.5, x * x, 10000) + return if (x < 0) 2 - ret else ret + } +} \ No newline at end of file diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalGamma.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalGamma.kt new file mode 100644 index 000000000..4f5adbe97 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalGamma.kt @@ -0,0 +1,238 @@ +package space.kscience.kmath.stat.internal + +import kotlin.math.* + +private abstract class ContinuedFraction protected constructor() { + protected abstract fun getA(n: Int, x: Double): Double + protected abstract fun getB(n: Int, x: Double): Double + + fun evaluate(x: Double, maxIterations: Int): Double { + val small = 1e-50 + var hPrev = getA(0, x) + if (hPrev == 0.0 || abs(0.0 - hPrev) <= small) hPrev = small + var n = 1 + var dPrev = 0.0 + var cPrev = hPrev + var hN = hPrev + + while (n < maxIterations) { + val a = getA(n, x) + val b = getB(n, x) + var dN = a + b * dPrev + if (dN == 0.0 || abs(0.0 - dN) <= small) dN = small + var cN = a + b / cPrev + if (cN == 0.0 || abs(0.0 - cN) <= small) cN = small + dN = 1 / dN + val deltaN = cN * dN + hN = hPrev * deltaN + check(!hN.isInfinite()) { "hN is infinite" } + check(!hN.isNaN()) { "hN is NaN" } + if (abs(deltaN - 1.0) < 10e-9) break + dPrev = dN + cPrev = cN + hPrev = hN + n++ + } + + check(n < maxIterations) { "n is more than maxIterations" } + return hN + } +} + +internal object InternalGamma { + const val LANCZOS_G = 607.0 / 128.0 + + private val LANCZOS = doubleArrayOf( + 0.99999999999999709182, + 57.156235665862923517, + -59.597960355475491248, + 14.136097974741747174, + -0.49191381609762019978, + .33994649984811888699e-4, + .46523628927048575665e-4, + -.98374475304879564677e-4, + .15808870322491248884e-3, + -.21026444172410488319e-3, + .21743961811521264320e-3, + -.16431810653676389022e-3, + .84418223983852743293e-4, + -.26190838401581408670e-4, + .36899182659531622704e-5 + ) + + private val HALF_LOG_2_PI = 0.5 * ln(2.0 * PI) + private const val INV_GAMMA1P_M1_A0 = .611609510448141581788E-08 + private const val INV_GAMMA1P_M1_A1 = .624730830116465516210E-08 + private const val INV_GAMMA1P_M1_B1 = .203610414066806987300E+00 + private const val INV_GAMMA1P_M1_B2 = .266205348428949217746E-01 + private const val INV_GAMMA1P_M1_B3 = .493944979382446875238E-03 + private const val INV_GAMMA1P_M1_B4 = -.851419432440314906588E-05 + private const val INV_GAMMA1P_M1_B5 = -.643045481779353022248E-05 + private const val INV_GAMMA1P_M1_B6 = .992641840672773722196E-06 + private const val INV_GAMMA1P_M1_B7 = -.607761895722825260739E-07 + private const val INV_GAMMA1P_M1_B8 = .195755836614639731882E-09 + private const val INV_GAMMA1P_M1_P0 = .6116095104481415817861E-08 + private const val INV_GAMMA1P_M1_P1 = .6871674113067198736152E-08 + private const val INV_GAMMA1P_M1_P2 = .6820161668496170657918E-09 + private const val INV_GAMMA1P_M1_P3 = .4686843322948848031080E-10 + private const val INV_GAMMA1P_M1_P4 = .1572833027710446286995E-11 + private const val INV_GAMMA1P_M1_P5 = -.1249441572276366213222E-12 + private const val INV_GAMMA1P_M1_P6 = .4343529937408594255178E-14 + private const val INV_GAMMA1P_M1_Q1 = .3056961078365221025009E+00 + private const val INV_GAMMA1P_M1_Q2 = .5464213086042296536016E-01 + private const val INV_GAMMA1P_M1_Q3 = .4956830093825887312020E-02 + private const val INV_GAMMA1P_M1_Q4 = .2692369466186361192876E-03 + private const val INV_GAMMA1P_M1_C = -.422784335098467139393487909917598E+00 + private const val INV_GAMMA1P_M1_C0 = .577215664901532860606512090082402E+00 + private const val INV_GAMMA1P_M1_C1 = -.655878071520253881077019515145390E+00 + private const val INV_GAMMA1P_M1_C2 = -.420026350340952355290039348754298E-01 + private const val INV_GAMMA1P_M1_C3 = .166538611382291489501700795102105E+00 + private const val INV_GAMMA1P_M1_C4 = -.421977345555443367482083012891874E-01 + private const val INV_GAMMA1P_M1_C5 = -.962197152787697356211492167234820E-02 + private const val INV_GAMMA1P_M1_C6 = .721894324666309954239501034044657E-02 + private const val INV_GAMMA1P_M1_C7 = -.116516759185906511211397108401839E-02 + private const val INV_GAMMA1P_M1_C8 = -.215241674114950972815729963053648E-03 + private const val INV_GAMMA1P_M1_C9 = .128050282388116186153198626328164E-03 + private const val INV_GAMMA1P_M1_C10 = -.201348547807882386556893914210218E-04 + private const val INV_GAMMA1P_M1_C11 = -.125049348214267065734535947383309E-05 + private const val INV_GAMMA1P_M1_C12 = .113302723198169588237412962033074E-05 + private const val INV_GAMMA1P_M1_C13 = -.205633841697760710345015413002057E-06 + + fun logGamma(x: Double): Double = when { + x.isNaN() || x <= 0.0 -> Double.NaN + x < 0.5 -> logGamma1p(x) - ln(x) + x <= 2.5 -> logGamma1p(x - 0.5 - 0.5) + + x <= 8.0 -> { + val n = floor(x - 1.5).toInt() + val prod = (1..n).fold(1.0, { prod, i -> prod * (x - i) }) + logGamma1p(x - (n + 1)) + ln(prod) + } + + else -> { + val tmp = x + LANCZOS_G + .5 + (x + .5) * ln(tmp) - tmp + HALF_LOG_2_PI + ln(lanczos(x) / x) + } + } + + private fun regularizedGammaP( + a: Double, + x: Double, + maxIterations: Int = Int.MAX_VALUE + ): Double = when { + a.isNaN() || x.isNaN() || a <= 0.0 || x < 0.0 -> Double.NaN + x == 0.0 -> 0.0 + x >= a + 1 -> 1.0 - regularizedGammaQ(a, x, maxIterations) + + else -> { + // calculate series + var n = 0.0 // current element index + var an = 1.0 / a // n-th element in the series + var sum = an // partial sum + + while (abs(an / sum) > 10e-15 && n < maxIterations && sum < Double.POSITIVE_INFINITY) { + // compute next element in the series + n += 1.0 + an *= x / (a + n) + + // update partial sum + sum += an + } + + when { + n >= maxIterations -> throw error("Maximal iterations is exceeded $maxIterations") + sum.isInfinite() -> 1.0 + else -> exp(-x + a * ln(x) - logGamma(a)) * sum + } + } + } + + fun regularizedGammaQ( + a: Double, + x: Double, + maxIterations: Int = Int.MAX_VALUE + ): Double = when { + a.isNaN() || x.isNaN() || a <= 0.0 || x < 0.0 -> Double.NaN + x == 0.0 -> 1.0 + x < a + 1.0 -> 1.0 - regularizedGammaP(a, x, maxIterations) + + else -> 1.0 / object : ContinuedFraction() { + override fun getA(n: Int, x: Double): Double = 2.0 * n + 1.0 - a + x + override fun getB(n: Int, x: Double): Double = n * (a - n) + }.evaluate(x, maxIterations) * exp(-x + a * ln(x) - logGamma(a)) + } + + private fun lanczos(x: Double): Double = + (LANCZOS.size - 1 downTo 1).sumByDouble { LANCZOS[it] / (x + it) } + LANCZOS[0] + + private fun invGamma1pm1(x: Double): Double { + require(x >= -0.5) + require(x <= 1.5) + val ret: Double + val t = if (x <= 0.5) x else x - 0.5 - 0.5 + + if (t < 0.0) { + val a = INV_GAMMA1P_M1_A0 + t * INV_GAMMA1P_M1_A1 + var b = INV_GAMMA1P_M1_B8 + b = INV_GAMMA1P_M1_B7 + t * b + b = INV_GAMMA1P_M1_B6 + t * b + b = INV_GAMMA1P_M1_B5 + t * b + b = INV_GAMMA1P_M1_B4 + t * b + b = INV_GAMMA1P_M1_B3 + t * b + b = INV_GAMMA1P_M1_B2 + t * b + b = INV_GAMMA1P_M1_B1 + t * b + b = 1.0 + t * b + var c = INV_GAMMA1P_M1_C13 + t * (a / b) + c = INV_GAMMA1P_M1_C12 + t * c + c = INV_GAMMA1P_M1_C11 + t * c + c = INV_GAMMA1P_M1_C10 + t * c + c = INV_GAMMA1P_M1_C9 + t * c + c = INV_GAMMA1P_M1_C8 + t * c + c = INV_GAMMA1P_M1_C7 + t * c + c = INV_GAMMA1P_M1_C6 + t * c + c = INV_GAMMA1P_M1_C5 + t * c + c = INV_GAMMA1P_M1_C4 + t * c + c = INV_GAMMA1P_M1_C3 + t * c + c = INV_GAMMA1P_M1_C2 + t * c + c = INV_GAMMA1P_M1_C1 + t * c + c = INV_GAMMA1P_M1_C + t * c + ret = (if (x > 0.5) t * c / x else x * (c + 0.5 + 0.5)) + } else { + var p = INV_GAMMA1P_M1_P6 + p = INV_GAMMA1P_M1_P5 + t * p + p = INV_GAMMA1P_M1_P4 + t * p + p = INV_GAMMA1P_M1_P3 + t * p + p = INV_GAMMA1P_M1_P2 + t * p + p = INV_GAMMA1P_M1_P1 + t * p + p = INV_GAMMA1P_M1_P0 + t * p + var q = INV_GAMMA1P_M1_Q4 + q = INV_GAMMA1P_M1_Q3 + t * q + q = INV_GAMMA1P_M1_Q2 + t * q + q = INV_GAMMA1P_M1_Q1 + t * q + q = 1.0 + t * q + var c = INV_GAMMA1P_M1_C13 + p / q * t + c = INV_GAMMA1P_M1_C12 + t * c + c = INV_GAMMA1P_M1_C11 + t * c + c = INV_GAMMA1P_M1_C10 + t * c + c = INV_GAMMA1P_M1_C9 + t * c + c = INV_GAMMA1P_M1_C8 + t * c + c = INV_GAMMA1P_M1_C7 + t * c + c = INV_GAMMA1P_M1_C6 + t * c + c = INV_GAMMA1P_M1_C5 + t * c + c = INV_GAMMA1P_M1_C4 + t * c + c = INV_GAMMA1P_M1_C3 + t * c + c = INV_GAMMA1P_M1_C2 + t * c + c = INV_GAMMA1P_M1_C1 + t * c + c = INV_GAMMA1P_M1_C0 + t * c + ret = (if (x > 0.5) t / x * (c - 0.5 - 0.5) else x * c) + } + + return ret + } + + private fun logGamma1p(x: Double): Double { + require(x >= -0.5) + require(x <= 1.5) + return -ln1p(invGamma1pm1(x)) + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalUtils.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalUtils.kt new file mode 100644 index 000000000..722eee946 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalUtils.kt @@ -0,0 +1,70 @@ +package space.kscience.kmath.stat.internal + +import kotlin.math.ln +import kotlin.math.min + +internal object InternalUtils { + private val FACTORIALS = longArrayOf( + 1L, 1L, 2L, + 6L, 24L, 120L, + 720L, 5040L, 40320L, + 362880L, 3628800L, 39916800L, + 479001600L, 6227020800L, 87178291200L, + 1307674368000L, 20922789888000L, 355687428096000L, + 6402373705728000L, 121645100408832000L, 2432902008176640000L + ) + + private const val BEGIN_LOG_FACTORIALS = 2 + + fun factorial(n: Int): Long = FACTORIALS[n] + + fun validateProbabilities(probabilities: DoubleArray?): Double { + require(!(probabilities == null || probabilities.isEmpty())) { "Probabilities must not be empty." } + + val sumProb = probabilities.sumByDouble { prob -> + require(!(prob < 0 || prob.isInfinite() || prob.isNaN())) { "Invalid probability: $prob" } + prob + } + + require(!(sumProb.isInfinite() || sumProb <= 0)) { "Invalid sum of probabilities: $sumProb" } + return sumProb + } + + class FactorialLog private constructor(numValues: Int, cache: DoubleArray?) { + private val logFactorials: DoubleArray = DoubleArray(numValues) + + init { + val endCopy: Int + + if (cache != null && cache.size > BEGIN_LOG_FACTORIALS) { + // Copy available values. + endCopy = min(cache.size, numValues) + + cache.copyInto( + logFactorials, + BEGIN_LOG_FACTORIALS, + BEGIN_LOG_FACTORIALS, endCopy + ) + } else + // All values to be computed + endCopy = BEGIN_LOG_FACTORIALS + + // Compute remaining values. + (endCopy until numValues).forEach { i -> + if (i < FACTORIALS.size) + logFactorials[i] = ln(FACTORIALS[i].toDouble()) + else + logFactorials[i] = logFactorials[i - 1] + ln(i.toDouble()) + } + } + + fun value(n: Int): Double { + if (n < logFactorials.size) return logFactorials[n] + return if (n < FACTORIALS.size) ln(FACTORIALS[n].toDouble()) else InternalGamma.logGamma(n + 1.0) + } + + companion object { + fun create(): FactorialLog = FactorialLog(0, null) + } + } +} \ No newline at end of file diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterExponentialSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterExponentialSampler.kt new file mode 100644 index 000000000..504c6b881 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterExponentialSampler.kt @@ -0,0 +1,73 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import space.kscience.kmath.stat.internal.InternalUtils +import kotlin.math.ln +import kotlin.math.pow + +/** + * Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html). + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html]. + */ +public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler { + public override fun sample(generator: RandomGenerator): Chain = generator.chain { + // Step 1: + var a = 0.0 + var u = nextDouble() + + // Step 2 and 3: + while (u < 0.5) { + a += EXPONENTIAL_SA_QI[0] + u *= 2.0 + } + + // Step 4 (now u >= 0.5): + u += u - 1 + // Step 5: + if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u) + // Step 6: + var i = 0 // Should be 1, be we iterate before it in while using 0. + var u2 = nextDouble() + var umin = u2 + + // Step 7 and 8: + do { + ++i + u2 = nextDouble() + if (u2 < umin) umin = u2 + // Step 8: + } while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1. + + mean * (a + umin * EXPONENTIAL_SA_QI[0]) + } + + override fun toString(): String = "Ahrens-Dieter Exponential deviate" + + public companion object { + private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) } + + init { + /** + * Filling EXPONENTIAL_SA_QI table. + * Note that we don't want qi = 0 in the table. + */ + val ln2 = ln(2.0) + var qi = 0.0 + + EXPONENTIAL_SA_QI.indices.forEach { i -> + qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1) + EXPONENTIAL_SA_QI[i] = qi + } + } + + public fun of(mean: Double): AhrensDieterExponentialSampler { + require(mean > 0) { "mean is not strictly positive: $mean" } + return AhrensDieterExponentialSampler(mean) + } + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt new file mode 100644 index 000000000..81182f6cd --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt @@ -0,0 +1,120 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import space.kscience.kmath.stat.next +import kotlin.math.* + +/** + * Sampling from the [gamma distribution](http://mathworld.wolfram.com/GammaDistribution.html). + * - For 0 < alpha < 1: + * Ahrens, J. H. and Dieter, U., Computer methods for sampling from gamma, beta, Poisson and binomial distributions, Computing, 12, 223-246, 1974. + * - For alpha >= 1: + * Marsaglia and Tsang, A Simple Method for Generating Gamma Variables. ACM Transactions on Mathematical Software, Volume 26 Issue 3, September, 2000. + * + * Based on Commons RNG implementation. + * + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterMarsagliaTsangGammaSampler.html]. + */ +public class AhrensDieterMarsagliaTsangGammaSampler private constructor( + alpha: Double, + theta: Double +) : Sampler { + private val delegate: BaseGammaSampler = + if (alpha < 1) AhrensDieterGammaSampler(alpha, theta) else MarsagliaTsangGammaSampler(alpha, theta) + + private abstract class BaseGammaSampler internal constructor( + protected val alpha: Double, + protected val theta: Double + ) : Sampler { + init { + require(alpha > 0) { "alpha is not strictly positive: $alpha" } + require(theta > 0) { "theta is not strictly positive: $theta" } + } + + override fun toString(): String = "Ahrens-Dieter-Marsaglia-Tsang Gamma deviate" + } + + private class AhrensDieterGammaSampler(alpha: Double, theta: Double) : + BaseGammaSampler(alpha, theta) { + private val oneOverAlpha: Double = 1.0 / alpha + private val bGSOptim: Double = 1.0 + alpha / E + + override fun sample(generator: RandomGenerator): Chain = generator.chain { + var x: Double + + // [1]: p. 228, Algorithm GS. + while (true) { + // Step 1: + val u = generator.nextDouble() + val p = bGSOptim * u + + if (p <= 1) { + // Step 2: + x = p.pow(oneOverAlpha) + val u2 = generator.nextDouble() + + if (u2 > exp(-x)) // Reject. + continue + + break + } + + // Step 3: + x = -ln((bGSOptim - p) * oneOverAlpha) + val u2: Double = generator.nextDouble() + if (u2 <= x.pow(alpha - 1.0)) break + // Reject and continue. + } + + x * theta + } + } + + private class MarsagliaTsangGammaSampler(alpha: Double, theta: Double) : + BaseGammaSampler(alpha, theta) { + private val dOptim: Double + private val cOptim: Double + private val gaussian: NormalizedGaussianSampler + + init { + gaussian = ZigguratNormalizedGaussianSampler.of() + dOptim = alpha - ONE_THIRD + cOptim = ONE_THIRD / sqrt(dOptim) + } + + override fun sample(generator: RandomGenerator): Chain = generator.chain { + var v: Double + + while (true) { + val x = gaussian.next(generator) + val oPcTx = 1 + cOptim * x + v = oPcTx * oPcTx * oPcTx + if (v <= 0) continue + val x2 = x * x + val u = generator.nextDouble() + // Squeeze. + if (u < 1 - 0.0331 * x2 * x2) break + if (ln(u) < 0.5 * x2 + dOptim * (1 - v + ln(v))) break + } + + theta * dOptim * v + } + + companion object { + private const val ONE_THIRD = 1.0 / 3.0 + } + } + + public override fun sample(generator: RandomGenerator): Chain = delegate.sample(generator) + public override fun toString(): String = delegate.toString() + + public companion object { + public fun of( + alpha: Double, + theta: Double + ): Sampler = AhrensDieterMarsagliaTsangGammaSampler(alpha, theta) + } +} \ No newline at end of file diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AliasMethodDiscreteSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AliasMethodDiscreteSampler.kt new file mode 100644 index 000000000..cae97db65 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AliasMethodDiscreteSampler.kt @@ -0,0 +1,286 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import space.kscience.kmath.stat.internal.InternalUtils +import kotlin.math.ceil +import kotlin.math.max +import kotlin.math.min + +/** + * Distribution sampler that uses the Alias method. It can be used to sample from n values each with an associated + * probability. This implementation is based on the detailed explanation of the alias method by Keith Schartz and + * implements Vose's algorithm. + * + * Vose, M.D., A linear algorithm for generating random numbers with a given distribution, IEEE Transactions on + * Software Engineering, 17, 972-975, 1991. he algorithm will sample values in O(1) time after a pre-processing step + * of O(n) time. + * + * The alias tables are constructed using fraction probabilities with an assumed denominator of 253. In the generic + * case sampling uses UniformRandomProvider.nextInt(int) and the upper 53-bits from UniformRandomProvider.nextLong(). + * + * Zero padding the input probabilities can be used to make more sampling more efficient. Any zero entry will always be + * aliased removing the requirement to compute a long. Increased sampling speed comes at the cost of increased storage + * space. The algorithm requires approximately 12 bytes of storage per input probability, that is n * 12 for size n. + * Zero-padding only requires 4 bytes of storage per padded value as the probability is known to be zero. + * + * An optimisation is performed for small table sizes that are a power of 2. In this case the sampling uses 1 or 2 + * calls from UniformRandomProvider.nextInt() to generate up to 64-bits for creation of an 11-bit index and 53-bits + * for the long. This optimisation requires a generator with a high cycle length for the lower order bits. + * + * Larger table sizes that are a power of 2 will benefit from fast algorithms for UniformRandomProvider.nextInt(int) + * that exploit the power of 2. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AliasMethodDiscreteSampler.html]. + */ +public open class AliasMethodDiscreteSampler private constructor( + // Deliberate direct storage of input arrays + protected val probability: LongArray, + protected val alias: IntArray +) : Sampler { + + private class SmallTableAliasMethodDiscreteSampler( + probability: LongArray, + alias: IntArray + ) : AliasMethodDiscreteSampler(probability, alias) { + // Assume the table size is a power of 2 and create the mask + private val mask: Int = alias.size - 1 + + override fun sample(generator: RandomGenerator): Chain = generator.chain { + val bits = generator.nextInt() + // Isolate lower bits + val j = bits and mask + + // Optimisation for zero-padded input tables + if (j >= probability.size) + // No probability must use the alias + return@chain alias[j] + + // Create a uniform random deviate as a long. + // This replicates functionality from the o.a.c.rng.core.utils.NumberFactory.makeLong + val longBits = generator.nextInt().toLong() shl 32 or (bits.toLong() and hex_ffffffff) + // Choose between the two. Use a 53-bit long for the probability. + if (longBits ushr 11 < probability[j]) j else alias[j] + } + + private companion object { + private const val hex_ffffffff = 4294967295L + } + } + + public override fun sample(generator: RandomGenerator): Chain = generator.chain { + // This implements the algorithm as per Vose (1991): + // v = uniform() in [0, 1) + // j = uniform(n) in [0, n) + // if v < prob[j] then + // return j + // else + // return alias[j] + val j = generator.nextInt(alias.size) + + // Optimisation for zero-padded input tables + // No probability must use the alias + if (j >= probability.size) return@chain alias[j] + + // Note: We could check the probability before computing a deviate. + // p(j) == 0 => alias[j] + // p(j) == 1 => j + // However it is assumed these edge cases are rare: + // + // The probability table will be 1 for approximately 1/n samples, i.e. only the + // last unpaired probability. This is only worth checking for when the table size (n) + // is small. But in that case the user should zero-pad the table for performance. + // + // The probability table will be 0 when an input probability was zero. We + // will assume this is also rare if modelling a discrete distribution where + // all samples are possible. The edge case for zero-padded tables is handled above. + + // Choose between the two. Use a 53-bit long for the probability. + if (generator.nextLong() ushr 11 < probability[j]) j else alias[j] + } + + public override fun toString(): String = "Alias method" + + public companion object { + private const val DEFAULT_ALPHA = 0 + private const val ZERO = 0.0 + private const val ONE_AS_NUMERATOR = 1L shl 53 + private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble() + private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11 + + public fun of( + probabilities: DoubleArray, + alpha: Int = DEFAULT_ALPHA + ): Sampler { + // The Alias method balances N categories with counts around the mean into N sections, + // each allocated 'mean' observations. + // + // Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a + // 2D array as 4 sections with a height of the mean: + // + // 6 + // 6 + // 6 + // 63 => 6366 -- + // 632 6326 |-- mean + // 6321 6321 -- + // + // section abcd + // + // Each section is divided as: + // a: 6=1/1 + // b: 3=1/1 + // c: 2=2/3; 6=1/3 (6 is the alias) + // d: 1=1/3; 6=2/3 (6 is the alias) + // + // The sample is obtained by randomly selecting a section, then choosing which category + // from the pair based on a uniform random deviate. + val sumProb = InternalUtils.validateProbabilities(probabilities) + // Allow zero-padding + val n = computeSize(probabilities.size, alpha) + // Partition into small and large by splitting on the average. + val mean = sumProb / n + // The cardinality of smallSize + largeSize = n. + // So fill the same array from either end. + val indices = IntArray(n) + var large = n + var small = 0 + + probabilities.indices.forEach { i -> + if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i + } + + small = fillRemainingIndices(probabilities.size, indices, small) + // This may be smaller than the input length if the probabilities were already padded. + val nonZeroIndex = findLastNonZeroIndex(probabilities) + // The probabilities are modified so use a copy. + // Note: probabilities are required only up to last nonZeroIndex + val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1) + // Allocate the final tables. + // Probability table may be truncated (when zero padded). + // The alias table is full length. + val probability = LongArray(remainingProbabilities.size) + val alias = IntArray(n) + + // This loop uses each large in turn to fill the alias table for small probabilities that + // do not reach the requirement to fill an entire section alone (i.e. p < mean). + // Since the sum of the small should be less than the sum of the large it should use up + // all the small first. However floating point round-off can result in + // misclassification of items as small or large. The Vose algorithm handles this using + // a while loop conditioned on the size of both sets and a subsequent loop to use + // unpaired items. + while (large != n && small != 0) { + // Index of the small and the large probabilities. + val j = indices[--small] + val k = indices[large++] + + // Optimisation for zero-padded input: + // p(j) = 0 above the last nonZeroIndex + if (j > nonZeroIndex) + // The entire amount for the section is taken from the alias. + remainingProbabilities[k] -= mean + else { + val pj = remainingProbabilities[j] + // Item j is a small probability that is below the mean. + // Compute the weight of the section for item j: pj / mean. + // This is scaled by 2^53 and the ceiling function used to round-up + // the probability to a numerator of a fraction in the range [1,2^53]. + // Ceiling ensures non-zero values. + probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong() + // The remaining amount for the section is taken from the alias. + // Effectively: probabilities[k] -= (mean - pj) + remainingProbabilities[k] += pj - mean + } + + // If not j then the alias is k + alias[j] = k + + // Add the remaining probability from large to the appropriate list. + if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k + } + + // Final loop conditions to consume unpaired items. + // Note: The large set should never be non-empty but this can occur due to round-off + // error so consume from both. + fillTable(probability, alias, indices, 0, small) + fillTable(probability, alias, indices, large, n) + + // Change the algorithm for small power of 2 sized tables + return if (isSmallPowerOf2(n)) + SmallTableAliasMethodDiscreteSampler(probability, alias) + else + AliasMethodDiscreteSampler(probability, alias) + } + + private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int { + var updatedSmall = small + (length until indices.size).forEach { i -> indices[updatedSmall++] = i } + return updatedSmall + } + + private fun findLastNonZeroIndex(probabilities: DoubleArray): Int { + // No bounds check is performed when decrementing as the array contains at least one + // value above zero. + var nonZeroIndex = probabilities.size - 1 + while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex-- + return nonZeroIndex + } + + private fun computeSize(length: Int, alpha: Int): Int { + // If No padding + if (alpha < 0) return length + // Use the number of leading zeros function to find the next power of 2, + // i.e. ceil(log2(x)) + var pow2 = 32 - numberOfLeadingZeros(length - 1) + // Increase by the alpha. Clip this to limit to a positive integer (2^30) + pow2 = min(30, pow2 + alpha) + // Use max to handle a length above the highest possible power of 2 + return max(length, 1 shl pow2) + } + + private fun fillTable( + probability: LongArray, + alias: IntArray, + indices: IntArray, + start: Int, + end: Int + ) = (start until end).forEach { i -> + val index = indices[i] + probability[index] = ONE_AS_NUMERATOR + alias[index] = index + } + + private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0 + + private fun numberOfLeadingZeros(i: Int): Int { + var mutI = i + if (mutI <= 0) return if (mutI == 0) 32 else 0 + var n = 31 + + if (mutI >= 1 shl 16) { + n -= 16 + mutI = mutI ushr 16 + } + + if (mutI >= 1 shl 8) { + n -= 8 + mutI = mutI ushr 8 + } + + if (mutI >= 1 shl 4) { + n -= 4 + mutI = mutI ushr 4 + } + + if (mutI >= 1 shl 2) { + n -= 2 + mutI = mutI ushr 2 + } + + return n - (mutI ushr 1) + } + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/BoxMullerNormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/BoxMullerNormalizedGaussianSampler.kt new file mode 100644 index 000000000..04beb448d --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/BoxMullerNormalizedGaussianSampler.kt @@ -0,0 +1,48 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import kotlin.math.* + +/** + * [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian + * distribution. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html]. + */ +public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler { + private var nextGaussian: Double = Double.NaN + + public override fun sample(generator: RandomGenerator): Chain = generator.chain { + val random: Double + + if (nextGaussian.isNaN()) { + // Generate a pair of Gaussian numbers. + val x = nextDouble() + val y = nextDouble() + val alpha = 2 * PI * x + val r = sqrt(-2 * ln(y)) + // Return the first element of the generated pair. + random = r * cos(alpha) + // Keep second element of the pair for next invocation. + nextGaussian = r * sin(alpha) + } else { + // Use the second element of the pair (generated at the + // previous invocation). + random = nextGaussian + // Both elements of the pair have been used. + nextGaussian = Double.NaN + } + + random + } + + public override fun toString(): String = "Box-Muller normalized Gaussian deviate" + + public companion object { + public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler() + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/GaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/GaussianSampler.kt new file mode 100644 index 000000000..eba26cfb5 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/GaussianSampler.kt @@ -0,0 +1,43 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.chains.map +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler + +/** + * Sampling from a Gaussian distribution with given mean and standard deviation. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html]. + * + * @property mean the mean of the distribution. + * @property standardDeviation the variance of the distribution. + */ +public class GaussianSampler private constructor( + public val mean: Double, + public val standardDeviation: Double, + private val normalized: NormalizedGaussianSampler +) : Sampler { + public override fun sample(generator: RandomGenerator): Chain = normalized + .sample(generator) + .map { standardDeviation * it + mean } + + override fun toString(): String = "Gaussian deviate [$normalized]" + + public companion object { + public fun of( + mean: Double, + standardDeviation: Double, + normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of() + ): GaussianSampler { + require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" } + + return GaussianSampler( + mean, + standardDeviation, + normalized + ) + } + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/KempSmallMeanPoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/KempSmallMeanPoissonSampler.kt new file mode 100644 index 000000000..1d7f90023 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/KempSmallMeanPoissonSampler.kt @@ -0,0 +1,63 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import kotlin.math.exp + +/** + * Sampler for the Poisson distribution. + * - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253. + * This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead. + * + * Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor. + * + * Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html]. + */ +public class KempSmallMeanPoissonSampler private constructor( + private val p0: Double, + private val mean: Double +) : Sampler { + public override fun sample(generator: RandomGenerator): Chain = generator.chain { + // Note on the algorithm: + // - X is the unknown sample deviate (the output of the algorithm) + // - x is the current value from the distribution + // - p is the probability of the current value x, p(X=x) + // - u is effectively the cumulative probability that the sample X + // is equal or above the current value x, p(X>=x) + // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x + var u = nextDouble() + var x = 0 + var p = p0 + + while (u > p) { + u -= p + // Compute the next probability using a recurrence relation. + // p(x+1) = p(x) * mean / (x+1) + p *= mean / ++x + // The algorithm listed in Kemp (1981) does not check that the rolling probability + // is positive. This check is added to ensure no errors when the limit of the summation + // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic. + if (p == 0.0) return@chain x + } + + x + } + + public override fun toString(): String = "Kemp Small Mean Poisson deviate" + + public companion object { + public fun of(mean: Double): KempSmallMeanPoissonSampler { + require(mean > 0) { "Mean is not strictly positive: $mean" } + val p0 = exp(-mean) + // Probability must be positive. As mean increases then p(0) decreases. + require(p0 > 0) { "No probability for mean: $mean" } + return KempSmallMeanPoissonSampler(p0, mean) + } + } +} + diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/LargeMeanPoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/LargeMeanPoissonSampler.kt new file mode 100644 index 000000000..de1e7cc89 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/LargeMeanPoissonSampler.kt @@ -0,0 +1,130 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.chains.ConstantChain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import space.kscience.kmath.stat.internal.InternalUtils +import space.kscience.kmath.stat.next +import kotlin.math.* + +/** + * Sampler for the Poisson distribution. + * - For large means, we use the rejection algorithm described in + * Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables + * Computing vol. 26 pp. 197-207. + * + * This sampler is suitable for mean >= 40. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html]. + */ +public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler { + private val exponential: Sampler = AhrensDieterExponentialSampler.of(1.0) + private val gaussian: Sampler = ZigguratNormalizedGaussianSampler.of() + private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG + private val lambda: Double = floor(mean) + private val logLambda: Double = ln(lambda) + private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt()) + private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1)) + private val halfDelta: Double = delta / 2 + private val twolpd: Double = 2 * lambda + delta + private val c1: Double = 1 / (8 * lambda) + private val a1: Double = sqrt(PI * twolpd) * exp(c1) + private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd) + private val aSum: Double = a1 + a2 + 1 + private val p1: Double = a1 / aSum + private val p2: Double = a2 / aSum + + private val smallMeanPoissonSampler: Sampler = if (mean - lambda < Double.MIN_VALUE) + NO_SMALL_MEAN_POISSON_SAMPLER + else // Not used. + KempSmallMeanPoissonSampler.of(mean - lambda) + + public override fun sample(generator: RandomGenerator): Chain = generator.chain { + // This will never be null. It may be a no-op delegate that returns zero. + val y2 = smallMeanPoissonSampler.next(generator) + var x: Double + var y: Double + var v: Double + var a: Int + var t: Double + var qr: Double + var qa: Double + + while (true) { + // Step 1: + val u = generator.nextDouble() + + if (u <= p1) { + // Step 2: + val n = gaussian.next(generator) + x = n * sqrt(lambda + halfDelta) - 0.5 + if (x > delta || x < -lambda) continue + y = if (x < 0) floor(x) else ceil(x) + val e = exponential.next(generator) + v = -e - 0.5 * n * n + c1 + } else { + // Step 3: + if (u > p1 + p2) { + y = lambda + break + } + + x = delta + twolpd / delta * exponential.next(generator) + y = ceil(x) + v = -exponential.next(generator) - delta * (x + 1) / twolpd + } + + // The Squeeze Principle + // Step 4.1: + a = if (x < 0) 1 else 0 + t = y * (y + 1) / (2 * lambda) + + // Step 4.2 + if (v < -t && a == 0) { + y += lambda + break + } + + // Step 4.3: + qr = t * ((2 * y + 1) / (6 * lambda) - 1) + qa = qr - t * t / (3 * (lambda + a * (y + 1))) + + // Step 4.4: + if (v < qa) { + y += lambda + break + } + + // Step 4.5: + if (v > qr) continue + + // Step 4.6: + if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) { + y += lambda + break + } + } + + min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt() + } + + private fun getFactorialLog(n: Int): Double = factorialLog.value(n) + public override fun toString(): String = "Large Mean Poisson deviate" + + public companion object { + private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE + private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create() + + private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler = Sampler { ConstantChain(0) } + + public fun of(mean: Double): LargeMeanPoissonSampler { + require(mean >= 1) { "mean is not >= 1: $mean" } + // The algorithm is not valid if Math.floor(mean) is not an integer. + require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" } + return LargeMeanPoissonSampler(mean) + } + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/MarsagliaNormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/MarsagliaNormalizedGaussianSampler.kt new file mode 100644 index 000000000..8a659642f --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/MarsagliaNormalizedGaussianSampler.kt @@ -0,0 +1,61 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import kotlin.math.ln +import kotlin.math.sqrt + +/** + * [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian + * distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in + * [BoxMullerNormalizedGaussianSampler]. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html] + */ +public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler { + private var nextGaussian = Double.NaN + + public override fun sample(generator: RandomGenerator): Chain = generator.chain { + if (nextGaussian.isNaN()) { + val alpha: Double + var x: Double + + // Rejection scheme for selecting a pair that lies within the unit circle. + while (true) { + // Generate a pair of numbers within [-1 , 1). + x = 2.0 * generator.nextDouble() - 1.0 + val y = 2.0 * generator.nextDouble() - 1.0 + val r2 = x * x + y * y + + if (r2 < 1 && r2 > 0) { + // Pair (x, y) is within unit circle. + alpha = sqrt(-2 * ln(r2) / r2) + // Keep second element of the pair for next invocation. + nextGaussian = alpha * y + // Return the first element of the generated pair. + break + } + // Pair is not within the unit circle: Generate another one. + } + + // Return the first element of the generated pair. + alpha * x + } else { + // Use the second element of the pair (generated at the + // previous invocation). + val r = nextGaussian + // Both elements of the pair have been used. + nextGaussian = Double.NaN + r + } + } + + public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate" + + public companion object { + public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler() + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/NormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/NormalizedGaussianSampler.kt new file mode 100644 index 000000000..4eb3d60e0 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/NormalizedGaussianSampler.kt @@ -0,0 +1,9 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.stat.Sampler + +/** + * Marker interface for a sampler that generates values from an N(0,1) + * [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution). + */ +public interface NormalizedGaussianSampler : Sampler diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/PoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/PoissonSampler.kt new file mode 100644 index 000000000..0c0234892 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/PoissonSampler.kt @@ -0,0 +1,30 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler + +/** + * Sampler for the Poisson distribution. + * - For small means, a Poisson process is simulated using uniform deviates, as described in + * Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 + * Important integer-valued distributions: The Poisson distribution. Addison Wesley. + * The Poisson process (and hence, the returned value) is bounded by 1000 * mean. + * - For large means, we use the rejection algorithm described in + * Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html]. + */ +public class PoissonSampler private constructor(mean: Double) : Sampler { + private val poissonSamplerDelegate: Sampler = of(mean) + public override fun sample(generator: RandomGenerator): Chain = poissonSamplerDelegate.sample(generator) + public override fun toString(): String = poissonSamplerDelegate.toString() + + public companion object { + private const val PIVOT = 40.0 + + public fun of(mean: Double): Sampler =// Each sampler should check the input arguments. + if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean) + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/SmallMeanPoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/SmallMeanPoissonSampler.kt new file mode 100644 index 000000000..0fe7ff161 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/SmallMeanPoissonSampler.kt @@ -0,0 +1,50 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import kotlin.math.ceil +import kotlin.math.exp + +/** + * Sampler for the Poisson distribution. + * - For small means, a Poisson process is simulated using uniform deviates, as described in + * Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important + * integer-valued distributions: The Poisson distribution. Addison Wesley. + * - The Poisson process (and hence, the returned value) is bounded by 1000 * mean. + * This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead. + * + * Based on Commons RNG implementation. + * + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html]. + */ +public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler { + private val p0: Double = exp(-mean) + + private val limit: Int = (if (p0 > 0) + ceil(1000 * mean) + else + throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt() + + public override fun sample(generator: RandomGenerator): Chain = generator.chain { + var n = 0 + var r = 1.0 + + while (n < limit) { + r *= nextDouble() + if (r >= p0) n++ else break + } + + n + } + + public override fun toString(): String = "Small Mean Poisson deviate" + + public companion object { + public fun of(mean: Double): SmallMeanPoissonSampler { + require(mean > 0) { "mean is not strictly positive: $mean" } + return SmallMeanPoissonSampler(mean) + } + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/ZigguratNormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/ZigguratNormalizedGaussianSampler.kt new file mode 100644 index 000000000..90815209f --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/ZigguratNormalizedGaussianSampler.kt @@ -0,0 +1,88 @@ +package space.kscience.kmath.stat.samplers + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.stat.chain +import kotlin.math.* + +/** + * [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a + * Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this + * implementation has been adapted from the C code provided therein. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html]. + */ +public class ZigguratNormalizedGaussianSampler private constructor() : + NormalizedGaussianSampler, Sampler { + + private fun sampleOne(generator: RandomGenerator): Double { + val j = generator.nextLong() + val i = (j and LAST.toLong()).toInt() + return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i) + } + + public override fun sample(generator: RandomGenerator): Chain = generator.chain { sampleOne(this) } + public override fun toString(): String = "Ziggurat normalized Gaussian deviate" + + private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double { + var x = hz * W[iz] + + return when { + iz == 0 -> { + var y: Double + + do { + y = -ln(generator.nextDouble()) + x = -ln(generator.nextDouble()) * ONE_OVER_R + } while (y + y < x * x) + + val out = R + x + if (hz > 0) out else -out + } + + F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x + else -> sampleOne(generator) + } + } + + public companion object { + private const val R: Double = 3.442619855899 + private const val ONE_OVER_R: Double = 1 / R + private const val V: Double = 9.91256303526217e-3 + private val MAX: Double = 2.0.pow(63.0) + private val ONE_OVER_MAX: Double = 1.0 / MAX + private const val LEN: Int = 128 + private const val LAST: Int = LEN - 1 + private val K: LongArray = LongArray(LEN) + private val W: DoubleArray = DoubleArray(LEN) + private val F: DoubleArray = DoubleArray(LEN) + + init { + // Filling the tables. + var d = R + var t = d + var fd = gauss(d) + val q = V / fd + K[0] = (d / q * MAX).toLong() + K[1] = 0 + W[0] = q * ONE_OVER_MAX + W[LAST] = d * ONE_OVER_MAX + F[0] = 1.0 + F[LAST] = fd + + (LAST - 1 downTo 1).forEach { i -> + d = sqrt(-2 * ln(V / d + fd)) + fd = gauss(d) + K[i + 1] = (d / t * MAX).toLong() + t = d + F[i] = fd + W[i] = d * ONE_OVER_MAX + } + } + + public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler() + private fun gauss(x: Double): Double = exp(-0.5 * x * x) + } +} diff --git a/kmath-stat/src/jvmMain/kotlin/space/kscience/kmath/stat/RandomSourceGenerator.kt b/kmath-stat/src/jvmMain/kotlin/space/kscience/kmath/stat/RandomSourceGenerator.kt index 9e752d571..f6a5d6605 100644 --- a/kmath-stat/src/jvmMain/kotlin/space/kscience/kmath/stat/RandomSourceGenerator.kt +++ b/kmath-stat/src/jvmMain/kotlin/space/kscience/kmath/stat/RandomSourceGenerator.kt @@ -3,10 +3,14 @@ package space.kscience.kmath.stat import org.apache.commons.rng.UniformRandomProvider import org.apache.commons.rng.simple.RandomSource -public class RandomSourceGenerator(public val source: RandomSource, seed: Long?) : RandomGenerator { - internal val random: UniformRandomProvider = seed?.let { - RandomSource.create(source, seed) - } ?: RandomSource.create(source) +/** + * Implements [RandomGenerator] by delegating all operations to [RandomSource]. + * + * @property source the underlying [RandomSource] object. + */ +public class RandomSourceGenerator internal constructor(public val source: RandomSource, seed: Long?) : RandomGenerator { + internal val random: UniformRandomProvider = seed?.let { RandomSource.create(source, seed) } + ?: RandomSource.create(source) public override fun nextBoolean(): Boolean = random.nextBoolean() public override fun nextDouble(): Double = random.nextDouble() @@ -23,22 +27,84 @@ public class RandomSourceGenerator(public val source: RandomSource, seed: Long?) public override fun fork(): RandomGenerator = RandomSourceGenerator(source, nextLong()) } +/** + * Implements [UniformRandomProvider] by delegating all operations to [RandomGenerator]. + * + * @property generator the underlying [RandomGenerator] object. + */ public inline class RandomGeneratorProvider(public val generator: RandomGenerator) : UniformRandomProvider { + /** + * Generates a [Boolean] value. + * + * @return the next random value. + */ public override fun nextBoolean(): Boolean = generator.nextBoolean() + + /** + * Generates a [Float] value between 0 and 1. + * + * @return the next random value between 0 and 1. + */ public override fun nextFloat(): Float = generator.nextDouble().toFloat() - public override fun nextBytes(bytes: ByteArray) { - generator.fillBytes(bytes) - } + /** + * Generates [Byte] values and places them into a user-supplied array. + * + * The number of random bytes produced is equal to the length of the the byte array. + * + * @param bytes byte array in which to put the random bytes. + */ + public override fun nextBytes(bytes: ByteArray): Unit = generator.fillBytes(bytes) + /** + * Generates [Byte] values and places them into a user-supplied array. + * + * The array is filled with bytes extracted from random integers. This implies that the number of random bytes + * generated may be larger than the length of the byte array. + * + * @param bytes the array in which to put the generated bytes. + * @param start the index at which to start inserting the generated bytes. + * @param len the number of bytes to insert. + */ public override fun nextBytes(bytes: ByteArray, start: Int, len: Int) { generator.fillBytes(bytes, start, start + len) } + /** + * Generates an [Int] value. + * + * @return the next random value. + */ public override fun nextInt(): Int = generator.nextInt() + + /** + * Generates an [Int] value between 0 (inclusive) and the specified value (exclusive). + * + * @param n the bound on the random number to be returned. Must be positive. + * @return a random integer between 0 (inclusive) and [n] (exclusive). + */ public override fun nextInt(n: Int): Int = generator.nextInt(n) + + /** + * Generates a [Double] value between 0 and 1. + * + * @return the next random value between 0 and 1. + */ public override fun nextDouble(): Double = generator.nextDouble() + + /** + * Generates a [Long] value. + * + * @return the next random value. + */ public override fun nextLong(): Long = generator.nextLong() + + /** + * Generates a [Long] value between 0 (inclusive) and the specified value (exclusive). + * + * @param n Bound on the random number to be returned. Must be positive. + * @return a random long value between 0 (inclusive) and [n] (exclusive). + */ public override fun nextLong(n: Long): Long = generator.nextLong(n) } @@ -51,8 +117,14 @@ public fun RandomGenerator.asUniformRandomProvider(): UniformRandomProvider = if else RandomGeneratorProvider(this) +/** + * Returns [RandomSourceGenerator] with given [RandomSource] and [seed]. + */ public fun RandomGenerator.Companion.fromSource(source: RandomSource, seed: Long? = null): RandomSourceGenerator = RandomSourceGenerator(source, seed) +/** + * Returns [RandomSourceGenerator] with [RandomSource.MT] algorithm and given [seed]. + */ public fun RandomGenerator.Companion.mersenneTwister(seed: Long? = null): RandomSourceGenerator = fromSource(RandomSource.MT, seed) diff --git a/kmath-stat/src/jvmMain/kotlin/space/kscience/kmath/stat/distributions.kt b/kmath-stat/src/jvmMain/kotlin/space/kscience/kmath/stat/distributions.kt deleted file mode 100644 index c3d711789..000000000 --- a/kmath-stat/src/jvmMain/kotlin/space/kscience/kmath/stat/distributions.kt +++ /dev/null @@ -1,99 +0,0 @@ -package space.kscience.kmath.stat - -import org.apache.commons.rng.UniformRandomProvider -import org.apache.commons.rng.sampling.distribution.* -import space.kscience.kmath.chains.BlockingDoubleChain -import space.kscience.kmath.chains.BlockingIntChain -import space.kscience.kmath.chains.Chain -import kotlin.math.PI -import kotlin.math.exp -import kotlin.math.pow -import kotlin.math.sqrt - -public abstract class ContinuousSamplerDistribution : Distribution { - private inner class ContinuousSamplerChain(val generator: RandomGenerator) : BlockingDoubleChain() { - private val sampler = buildCMSampler(generator) - - override fun nextDouble(): Double = sampler.sample() - override fun fork(): Chain = ContinuousSamplerChain(generator.fork()) - } - - protected abstract fun buildCMSampler(generator: RandomGenerator): ContinuousSampler - - public override fun sample(generator: RandomGenerator): BlockingDoubleChain = ContinuousSamplerChain(generator) -} - -public abstract class DiscreteSamplerDistribution : Distribution { - private inner class ContinuousSamplerChain(val generator: RandomGenerator) : BlockingIntChain() { - private val sampler = buildSampler(generator) - - override fun nextInt(): Int = sampler.sample() - override fun fork(): Chain = ContinuousSamplerChain(generator.fork()) - } - - protected abstract fun buildSampler(generator: RandomGenerator): DiscreteSampler - - public override fun sample(generator: RandomGenerator): BlockingIntChain = ContinuousSamplerChain(generator) -} - -public enum class NormalSamplerMethod { - BoxMuller, - Marsaglia, - Ziggurat -} - -private fun normalSampler(method: NormalSamplerMethod, provider: UniformRandomProvider): NormalizedGaussianSampler = - when (method) { - NormalSamplerMethod.BoxMuller -> BoxMullerNormalizedGaussianSampler(provider) - NormalSamplerMethod.Marsaglia -> MarsagliaNormalizedGaussianSampler(provider) - NormalSamplerMethod.Ziggurat -> ZigguratNormalizedGaussianSampler(provider) - } - -public fun Distribution.Companion.normal( - method: NormalSamplerMethod = NormalSamplerMethod.Ziggurat, -): ContinuousSamplerDistribution = object : ContinuousSamplerDistribution() { - override fun buildCMSampler(generator: RandomGenerator): ContinuousSampler { - val provider = generator.asUniformRandomProvider() - return normalSampler(method, provider) - } - - override fun probability(arg: Double): Double = exp(-arg.pow(2) / 2) / sqrt(PI * 2) -} - -/** - * A univariate normal distribution with given [mean] and [sigma]. [method] defines commons-rng generation method - */ -public fun Distribution.Companion.normal( - mean: Double, - sigma: Double, - method: NormalSamplerMethod = NormalSamplerMethod.Ziggurat, -): ContinuousSamplerDistribution = object : ContinuousSamplerDistribution() { - private val sigma2 = sigma.pow(2) - private val norm = sigma * sqrt(PI * 2) - - override fun buildCMSampler(generator: RandomGenerator): ContinuousSampler { - val provider = generator.asUniformRandomProvider() - val normalizedSampler = normalSampler(method, provider) - return GaussianSampler(normalizedSampler, mean, sigma) - } - - override fun probability(arg: Double): Double = exp(-(arg - mean).pow(2) / 2 / sigma2) / norm -} - -public fun Distribution.Companion.poisson( - lambda: Double, -): DiscreteSamplerDistribution = object : DiscreteSamplerDistribution() { - private val computedProb: HashMap = hashMapOf(0 to exp(-lambda)) - - override fun buildSampler(generator: RandomGenerator): DiscreteSampler = - PoissonSampler.of(generator.asUniformRandomProvider(), lambda) - - override fun probability(arg: Int): Double { - require(arg >= 0) { "The argument must be >= 0" } - - return if (arg > 40) - exp(-(arg - lambda).pow(2) / 2 / lambda) / sqrt(2 * PI * lambda) - else - computedProb.getOrPut(arg) { probability(arg - 1) * lambda / arg } - } -} diff --git a/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/CommonsDistributionsTest.kt b/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/CommonsDistributionsTest.kt index 70708a5c8..76aac65c4 100644 --- a/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/CommonsDistributionsTest.kt +++ b/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/CommonsDistributionsTest.kt @@ -5,23 +5,22 @@ import kotlinx.coroutines.flow.toList import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test +import space.kscience.kmath.stat.samplers.GaussianSampler internal class CommonsDistributionsTest { @Test fun testNormalDistributionSuspend() { - val distribution = Distribution.normal(7.0, 2.0) + val distribution = GaussianSampler.of(7.0, 2.0) val generator = RandomGenerator.default(1) - val sample = runBlocking { - distribution.sample(generator).take(1000).toList() - } + val sample = runBlocking { distribution.sample(generator).take(1000).toList() } Assertions.assertEquals(7.0, sample.average(), 0.1) } @Test fun testNormalDistributionBlocking() { - val distribution = Distribution.normal(7.0, 2.0) + val distribution = GaussianSampler.of(7.0, 2.0) val generator = RandomGenerator.default(1) - val sample = distribution.sample(generator).nextBlock(1000) + val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) } Assertions.assertEquals(7.0, sample.average(), 0.1) } } diff --git a/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/SamplerTest.kt b/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/SamplerTest.kt index 244b5107f..497a843c0 100644 --- a/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/SamplerTest.kt +++ b/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/SamplerTest.kt @@ -7,11 +7,8 @@ class SamplerTest { @Test fun bufferSamplerTest() { - val sampler: Sampler = - BasicSampler { it.chain { nextDouble() } } + val sampler = Sampler { it.chain { nextDouble() } } val data = sampler.sampleBuffer(RandomGenerator.default, 100) - runBlocking { - println(data.next()) - } + runBlocking { println(data.next()) } } } \ No newline at end of file