From c2bab5d13857ac9d6a5a00f9667d1e21ce8bee95 Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Thu, 1 Apr 2021 18:18:54 +0300 Subject: [PATCH] Fix Samplers and distribution API --- .../kmath/commons/fit/fitWithAutoDiff.kt | 2 +- .../kmath/stat/DistributionBenchmark.kt | 6 +- .../kscience/kmath/stat/DistributionDemo.kt | 2 +- .../commons/optimization/OptimizeTest.kt | 2 +- .../kscience/kmath/chains/BlockingChain.kt | 50 ++++ .../kmath/chains/BlockingDoubleChain.kt | 22 +- .../kscience/kmath/chains/BlockingIntChain.kt | 11 +- .../space/kscience/kmath/chains/Chain.kt | 28 +-- .../kscience/kmath/streaming/BufferFlow.kt | 3 +- kmath-stat/build.gradle.kts | 4 + .../kmath/distributions/Distribution.kt | 38 +++ .../FactorizedDistribution.kt | 3 +- .../distributions/NormalDistribution.kt | 15 +- .../kmath/{stat => }/internal/InternalErf.kt | 2 +- .../{stat => }/internal/InternalGamma.kt | 2 +- .../{stat => }/internal/InternalUtils.kt | 2 +- .../AhrensDieterExponentialSampler.kt | 72 ++++++ .../AhrensDieterMarsagliaTsangGammaSampler.kt | 4 +- .../samplers/AliasMethodDiscreteSampler.kt | 220 +++++++++--------- .../kmath/samplers/BoxMullerSampler.kt | 52 +++++ .../kmath/samplers/ConstantSampler.kt | 13 ++ .../kmath/samplers/GaussianSampler.kt | 34 +++ .../samplers/KempSmallMeanPoissonSampler.kt | 68 ++++++ .../MarsagliaNormalizedGaussianSampler.kt | 61 +++++ .../samplers/NormalizedGaussianSampler.kt | 18 ++ .../kscience/kmath/samplers/PoissonSampler.kt | 203 ++++++++++++++++ .../ZigguratNormalizedGaussianSampler.kt | 88 +++++++ .../space/kscience/kmath/stat/RandomChain.kt | 24 +- .../kscience/kmath/stat/RandomGenerator.kt | 6 + .../stat/{Distribution.kt => Sampler.kt} | 46 +--- .../space/kscience/kmath/stat/Statistic.kt | 2 +- .../kmath/stat/UniformDistribution.kt | 2 + .../AhrensDieterExponentialSampler.kt | 73 ------ .../BoxMullerNormalizedGaussianSampler.kt | 48 ---- .../kmath/stat/samplers/GaussianSampler.kt | 43 ---- .../samplers/KempSmallMeanPoissonSampler.kt | 63 ----- .../stat/samplers/LargeMeanPoissonSampler.kt | 130 ----------- .../MarsagliaNormalizedGaussianSampler.kt | 61 ----- .../samplers/NormalizedGaussianSampler.kt | 9 - .../kmath/stat/samplers/PoissonSampler.kt | 30 --- .../stat/samplers/SmallMeanPoissonSampler.kt | 50 ---- .../ZigguratNormalizedGaussianSampler.kt | 88 ------- .../kmath/stat/CommonsDistributionsTest.kt | 19 +- .../kscience/kmath/stat/StatisticTest.kt | 2 +- 44 files changed, 915 insertions(+), 806 deletions(-) create mode 100644 kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingChain.kt create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/Distribution.kt rename kmath-stat/src/commonMain/kotlin/space/kscience/kmath/{stat => distributions}/FactorizedDistribution.kt (94%) rename kmath-stat/src/commonMain/kotlin/space/kscience/kmath/{stat => }/distributions/NormalDistribution.kt (71%) rename kmath-stat/src/commonMain/kotlin/space/kscience/kmath/{stat => }/internal/InternalErf.kt (90%) rename kmath-stat/src/commonMain/kotlin/space/kscience/kmath/{stat => }/internal/InternalGamma.kt (99%) rename kmath-stat/src/commonMain/kotlin/space/kscience/kmath/{stat => }/internal/InternalUtils.kt (98%) create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AhrensDieterExponentialSampler.kt rename kmath-stat/src/commonMain/kotlin/space/kscience/kmath/{stat => }/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt (97%) rename kmath-stat/src/commonMain/kotlin/space/kscience/kmath/{stat => }/samplers/AliasMethodDiscreteSampler.kt (58%) create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/BoxMullerSampler.kt create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/ConstantSampler.kt create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/GaussianSampler.kt create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/KempSmallMeanPoissonSampler.kt create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/MarsagliaNormalizedGaussianSampler.kt create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/NormalizedGaussianSampler.kt create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/PoissonSampler.kt create mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/ZigguratNormalizedGaussianSampler.kt rename kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/{Distribution.kt => Sampler.kt} (54%) delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterExponentialSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/BoxMullerNormalizedGaussianSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/GaussianSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/KempSmallMeanPoissonSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/LargeMeanPoissonSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/MarsagliaNormalizedGaussianSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/NormalizedGaussianSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/PoissonSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/SmallMeanPoissonSampler.kt delete mode 100644 kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/ZigguratNormalizedGaussianSampler.kt diff --git a/examples/src/main/kotlin/space/kscience/kmath/commons/fit/fitWithAutoDiff.kt b/examples/src/main/kotlin/space/kscience/kmath/commons/fit/fitWithAutoDiff.kt index 04c55b34c..02534ac98 100644 --- a/examples/src/main/kotlin/space/kscience/kmath/commons/fit/fitWithAutoDiff.kt +++ b/examples/src/main/kotlin/space/kscience/kmath/commons/fit/fitWithAutoDiff.kt @@ -7,6 +7,7 @@ import kscience.plotly.models.ScatterMode import kscience.plotly.models.TraceValues import space.kscience.kmath.commons.optimization.chiSquared import space.kscience.kmath.commons.optimization.minimize +import space.kscience.kmath.distributions.NormalDistribution import space.kscience.kmath.misc.symbol import space.kscience.kmath.optimization.FunctionOptimization import space.kscience.kmath.optimization.OptimizationResult @@ -14,7 +15,6 @@ import space.kscience.kmath.real.DoubleVector import space.kscience.kmath.real.map import space.kscience.kmath.real.step import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.distributions.NormalDistribution import space.kscience.kmath.structures.asIterable import space.kscience.kmath.structures.toList import kotlin.math.pow diff --git a/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionBenchmark.kt b/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionBenchmark.kt index bfd138502..5cf96adaa 100644 --- a/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionBenchmark.kt +++ b/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionBenchmark.kt @@ -3,8 +3,8 @@ package space.kscience.kmath.stat import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.async import kotlinx.coroutines.runBlocking -import space.kscience.kmath.stat.samplers.GaussianSampler import org.apache.commons.rng.simple.RandomSource +import space.kscience.kmath.samplers.GaussianSampler import java.time.Duration import java.time.Instant import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler @@ -12,8 +12,8 @@ import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSa private suspend fun runKMathChained(): Duration { val generator = RandomGenerator.fromSource(RandomSource.MT, 123L) - val normal = GaussianSampler.of(7.0, 2.0) - val chain = normal.sample(generator).blocking() + val normal = GaussianSampler(7.0, 2.0) + val chain = normal.sample(generator) val startTime = Instant.now() var sum = 0.0 diff --git a/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionDemo.kt b/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionDemo.kt index aac7d51d4..1e8542bd8 100644 --- a/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionDemo.kt +++ b/examples/src/main/kotlin/space/kscience/kmath/stat/DistributionDemo.kt @@ -3,7 +3,7 @@ package space.kscience.kmath.stat import kotlinx.coroutines.runBlocking import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.collectWithState -import space.kscience.kmath.stat.distributions.NormalDistribution +import space.kscience.kmath.distributions.NormalDistribution /** * The state of distribution averager. diff --git a/kmath-commons/src/test/kotlin/space/kscience/kmath/commons/optimization/OptimizeTest.kt b/kmath-commons/src/test/kotlin/space/kscience/kmath/commons/optimization/OptimizeTest.kt index 36f2639f4..a51c407c2 100644 --- a/kmath-commons/src/test/kotlin/space/kscience/kmath/commons/optimization/OptimizeTest.kt +++ b/kmath-commons/src/test/kotlin/space/kscience/kmath/commons/optimization/OptimizeTest.kt @@ -2,10 +2,10 @@ package space.kscience.kmath.commons.optimization import kotlinx.coroutines.runBlocking import space.kscience.kmath.commons.expressions.DerivativeStructureExpression +import space.kscience.kmath.distributions.NormalDistribution import space.kscience.kmath.misc.symbol import space.kscience.kmath.optimization.FunctionOptimization import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.distributions.NormalDistribution import kotlin.math.pow import kotlin.test.Test diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingChain.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingChain.kt new file mode 100644 index 000000000..429175126 --- /dev/null +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingChain.kt @@ -0,0 +1,50 @@ +package space.kscience.kmath.chains + +import space.kscience.kmath.structures.Buffer + + +public interface BufferChain : Chain { + public suspend fun nextBuffer(size: Int): Buffer + override suspend fun fork(): BufferChain +} + +/** + * A chain with blocking generator that could be used without suspension + */ +public interface BlockingChain : Chain { + /** + * Get the next value without concurrency support. Not guaranteed to be thread safe. + */ + public fun nextBlocking(): T + + override suspend fun next(): T = nextBlocking() + + override suspend fun fork(): BlockingChain +} + + +public interface BlockingBufferChain : BlockingChain, BufferChain { + + public fun nextBufferBlocking(size: Int): Buffer + + public override fun nextBlocking(): T = nextBufferBlocking(1)[0] + + public override suspend fun nextBuffer(size: Int): Buffer = nextBufferBlocking(size) + + override suspend fun fork(): BlockingBufferChain +} + + +public suspend inline fun Chain.nextBuffer(size: Int): Buffer = if (this is BufferChain) { + nextBuffer(size) +} else { + Buffer.auto(size) { next() } +} + +public inline fun BlockingChain.nextBufferBlocking( + size: Int, +): Buffer = if (this is BlockingBufferChain) { + nextBufferBlocking(size) +} else { + Buffer.auto(size) { nextBlocking() } +} \ No newline at end of file diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingDoubleChain.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingDoubleChain.kt index d024147b4..c2153ff6a 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingDoubleChain.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingDoubleChain.kt @@ -1,13 +1,27 @@ package space.kscience.kmath.chains +import space.kscience.kmath.structures.DoubleBuffer + /** - * Chunked, specialized chain for real values. + * Chunked, specialized chain for double values, which supports blocking [nextBlocking] operation */ -public interface BlockingDoubleChain : Chain { - public override suspend fun next(): Double +public interface BlockingDoubleChain : BlockingBufferChain { /** * Returns an [DoubleArray] chunk of [size] values of [next]. */ - public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() } + public override fun nextBufferBlocking(size: Int): DoubleBuffer + + override suspend fun fork(): BlockingDoubleChain + + public companion object } + +public fun BlockingDoubleChain.map(transform: (Double) -> Double): BlockingDoubleChain = object : BlockingDoubleChain { + override fun nextBufferBlocking(size: Int): DoubleBuffer { + val block = this@map.nextBufferBlocking(size) + return DoubleBuffer(size) { transform(block[it]) } + } + + override suspend fun fork(): BlockingDoubleChain = this@map.fork().map(transform) +} \ No newline at end of file diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingIntChain.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingIntChain.kt index fb2e453ad..21a498646 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingIntChain.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/BlockingIntChain.kt @@ -1,9 +1,12 @@ package space.kscience.kmath.chains +import space.kscience.kmath.structures.IntBuffer + /** * Performance optimized chain for integer values */ -public interface BlockingIntChain : Chain { - public override suspend fun next(): Int - public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() } -} +public interface BlockingIntChain : BlockingBufferChain { + override fun nextBufferBlocking(size: Int): IntBuffer + + override suspend fun fork(): BlockingIntChain +} \ No newline at end of file diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/Chain.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/Chain.kt index a961f2e09..adeaea5a7 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/Chain.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/chains/Chain.kt @@ -24,20 +24,20 @@ import kotlinx.coroutines.sync.withLock /** * A not-necessary-Markov chain of some type - * @param R - the chain element type + * @param T - the chain element type */ -public interface Chain : Flow { +public interface Chain : Flow { /** * Generate next value, changing state if needed */ - public suspend fun next(): R + public suspend fun next(): T /** * Create a copy of current chain state. Consuming resulting chain does not affect initial chain */ - public fun fork(): Chain + public suspend fun fork(): Chain - override suspend fun collect(collector: FlowCollector): Unit = + override suspend fun collect(collector: FlowCollector): Unit = flow { while (true) emit(next()) }.collect(collector) public companion object @@ -51,7 +51,7 @@ public fun Sequence.asChain(): Chain = iterator().asChain() */ public class SimpleChain(private val gen: suspend () -> R) : Chain { public override suspend fun next(): R = gen() - public override fun fork(): Chain = this + public override suspend fun fork(): Chain = this } /** @@ -69,7 +69,7 @@ public class MarkovChain(private val seed: suspend () -> R, private newValue } - public override fun fork(): Chain = MarkovChain(seed = { value ?: seed() }, gen = gen) + public override suspend fun fork(): Chain = MarkovChain(seed = { value ?: seed() }, gen = gen) } /** @@ -94,7 +94,7 @@ public class StatefulChain( newValue } - public override fun fork(): Chain = StatefulChain(forkState(state), seed, forkState, gen) + public override suspend fun fork(): Chain = StatefulChain(forkState(state), seed, forkState, gen) } /** @@ -102,7 +102,7 @@ public class StatefulChain( */ public class ConstantChain(public val value: T) : Chain { public override suspend fun next(): T = value - public override fun fork(): Chain = this + public override suspend fun fork(): Chain = this } /** @@ -111,7 +111,7 @@ public class ConstantChain(public val value: T) : Chain { */ public fun Chain.map(func: suspend (T) -> R): Chain = object : Chain { override suspend fun next(): R = func(this@map.next()) - override fun fork(): Chain = this@map.fork().map(func) + override suspend fun fork(): Chain = this@map.fork().map(func) } /** @@ -127,7 +127,7 @@ public fun Chain.filter(block: (T) -> Boolean): Chain = object : Chain return next } - override fun fork(): Chain = this@filter.fork().filter(block) + override suspend fun fork(): Chain = this@filter.fork().filter(block) } /** @@ -135,7 +135,7 @@ public fun Chain.filter(block: (T) -> Boolean): Chain = object : Chain */ public fun Chain.collect(mapper: suspend (Chain) -> R): Chain = object : Chain { override suspend fun next(): R = mapper(this@collect) - override fun fork(): Chain = this@collect.fork().collect(mapper) + override suspend fun fork(): Chain = this@collect.fork().collect(mapper) } public fun Chain.collectWithState( @@ -145,7 +145,7 @@ public fun Chain.collectWithState( ): Chain = object : Chain { override suspend fun next(): R = state.mapper(this@collectWithState) - override fun fork(): Chain = + override suspend fun fork(): Chain = this@collectWithState.fork().collectWithState(stateFork(state), stateFork, mapper) } @@ -154,5 +154,5 @@ public fun Chain.collectWithState( */ public fun Chain.zip(other: Chain, block: suspend (T, U) -> R): Chain = object : Chain { override suspend fun next(): R = block(this@zip.next(), other.next()) - override fun fork(): Chain = this@zip.fork().zip(other.fork(), block) + override suspend fun fork(): Chain = this@zip.fork().zip(other.fork(), block) } diff --git a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/BufferFlow.kt b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/BufferFlow.kt index dc1dd4757..655f94cdf 100644 --- a/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/BufferFlow.kt +++ b/kmath-coroutines/src/commonMain/kotlin/space/kscience/kmath/streaming/BufferFlow.kt @@ -6,7 +6,6 @@ import space.kscience.kmath.chains.BlockingDoubleChain import space.kscience.kmath.structures.Buffer import space.kscience.kmath.structures.BufferFactory import space.kscience.kmath.structures.DoubleBuffer -import space.kscience.kmath.structures.asBuffer /** * Create a [Flow] from buffer @@ -50,7 +49,7 @@ public fun Flow.chunked(bufferSize: Int): Flow = flow { if (this@chunked is BlockingDoubleChain) { // performance optimization for blocking primitive chain - while (true) emit(nextBlock(bufferSize).asBuffer()) + while (true) emit(nextBufferBlocking(bufferSize)) } else { val array = DoubleArray(bufferSize) var counter = 0 diff --git a/kmath-stat/build.gradle.kts b/kmath-stat/build.gradle.kts index bc3890b1e..c2ebb7ea1 100644 --- a/kmath-stat/build.gradle.kts +++ b/kmath-stat/build.gradle.kts @@ -2,6 +2,10 @@ plugins { id("ru.mipt.npm.gradle.mpp") } +kscience{ + useAtomic() +} + kotlin.sourceSets { commonMain { dependencies { diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/Distribution.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/Distribution.kt new file mode 100644 index 000000000..fcad8ef99 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/Distribution.kt @@ -0,0 +1,38 @@ +package space.kscience.kmath.distributions + +import space.kscience.kmath.chains.Chain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler + +/** + * A distribution of typed objects. + */ +public interface Distribution : Sampler { + /** + * A probability value for given argument [arg]. + * For continuous distributions returns PDF + */ + public fun probability(arg: T): Double + + public override fun sample(generator: RandomGenerator): Chain + + /** + * An empty companion. Distribution factories should be written as its extensions + */ + public companion object +} + +public interface UnivariateDistribution> : Distribution { + /** + * Cumulative distribution for ordered parameter (CDF) + */ + public fun cumulative(arg: T): Double +} + +/** + * Compute probability integral in an interval + */ +public fun > UnivariateDistribution.integral(from: T, to: T): Double { + require(to > from) + return cumulative(to) - cumulative(from) +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/FactorizedDistribution.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/FactorizedDistribution.kt similarity index 94% rename from kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/FactorizedDistribution.kt rename to kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/FactorizedDistribution.kt index 3dd506b67..e69086af4 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/FactorizedDistribution.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/FactorizedDistribution.kt @@ -1,7 +1,8 @@ -package space.kscience.kmath.stat +package space.kscience.kmath.distributions import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.SimpleChain +import space.kscience.kmath.stat.RandomGenerator /** * A multivariate distribution which takes a map of parameters diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/distributions/NormalDistribution.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/NormalDistribution.kt similarity index 71% rename from kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/distributions/NormalDistribution.kt rename to kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/NormalDistribution.kt index 6515cbaa7..15593aed5 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/distributions/NormalDistribution.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/distributions/NormalDistribution.kt @@ -1,12 +1,11 @@ -package space.kscience.kmath.stat.distributions +package space.kscience.kmath.distributions import space.kscience.kmath.chains.Chain +import space.kscience.kmath.internal.InternalErf +import space.kscience.kmath.samplers.GaussianSampler +import space.kscience.kmath.samplers.NormalizedGaussianSampler +import space.kscience.kmath.samplers.ZigguratNormalizedGaussianSampler import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.UnivariateDistribution -import space.kscience.kmath.stat.internal.InternalErf -import space.kscience.kmath.stat.samplers.GaussianSampler -import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler -import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler import kotlin.math.* /** @@ -16,8 +15,8 @@ public inline class NormalDistribution(public val sampler: GaussianSampler) : Un public constructor( mean: Double, standardDeviation: Double, - normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(), - ) : this(GaussianSampler.of(mean, standardDeviation, normalized)) + normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler, + ) : this(GaussianSampler(mean, standardDeviation, normalized)) public override fun probability(arg: Double): Double { val x1 = (arg - sampler.mean) / sampler.standardDeviation diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalErf.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalErf.kt similarity index 90% rename from kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalErf.kt rename to kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalErf.kt index 4e1623867..3b9110c1a 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalErf.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalErf.kt @@ -1,4 +1,4 @@ -package space.kscience.kmath.stat.internal +package space.kscience.kmath.internal import kotlin.math.abs diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalGamma.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalGamma.kt similarity index 99% rename from kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalGamma.kt rename to kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalGamma.kt index 4f5adbe97..96f5c66db 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalGamma.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalGamma.kt @@ -1,4 +1,4 @@ -package space.kscience.kmath.stat.internal +package space.kscience.kmath.internal import kotlin.math.* diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalUtils.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalUtils.kt similarity index 98% rename from kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalUtils.kt rename to kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalUtils.kt index 722eee946..832689b27 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/internal/InternalUtils.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/internal/InternalUtils.kt @@ -1,4 +1,4 @@ -package space.kscience.kmath.stat.internal +package space.kscience.kmath.internal import kotlin.math.ln import kotlin.math.min diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AhrensDieterExponentialSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AhrensDieterExponentialSampler.kt new file mode 100644 index 000000000..0b8ecfb31 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AhrensDieterExponentialSampler.kt @@ -0,0 +1,72 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingDoubleChain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.structures.DoubleBuffer +import kotlin.math.ln +import kotlin.math.pow + +/** + * Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html). + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html]. + */ +public class AhrensDieterExponentialSampler(public val mean: Double) : Sampler { + + init { + require(mean > 0) { "mean is not strictly positive: $mean" } + } + + public override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain { + override fun nextBlocking(): Double { + // Step 1: + var a = 0.0 + var u = generator.nextDouble() + + // Step 2 and 3: + while (u < 0.5) { + a += EXPONENTIAL_SA_QI[0] + u *= 2.0 + } + + // Step 4 (now u >= 0.5): + u += u - 1 + // Step 5: + if (u <= EXPONENTIAL_SA_QI[0]) return mean * (a + u) + // Step 6: + var i = 0 // Should be 1, be we iterate before it in while using 0. + var u2 = generator.nextDouble() + var umin = u2 + + // Step 7 and 8: + do { + ++i + u2 = generator.nextDouble() + if (u2 < umin) umin = u2 + // Step 8: + } while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1. + + return mean * (a + umin * EXPONENTIAL_SA_QI[0]) + } + + override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() } + + override suspend fun fork(): BlockingDoubleChain = sample(generator.fork()) + } + + public companion object { + private val EXPONENTIAL_SA_QI by lazy { + val ln2 = ln(2.0) + var qi = 0.0 + + DoubleArray(16) { i -> + qi += ln2.pow(i + 1.0) / space.kscience.kmath.internal.InternalUtils.factorial(i + 1) + qi + } + } + } + +} + diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt similarity index 97% rename from kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt rename to kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt index 81182f6cd..c8a49106b 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt @@ -1,4 +1,4 @@ -package space.kscience.kmath.stat.samplers +package space.kscience.kmath.samplers import space.kscience.kmath.chains.Chain import space.kscience.kmath.stat.RandomGenerator @@ -80,7 +80,7 @@ public class AhrensDieterMarsagliaTsangGammaSampler private constructor( private val gaussian: NormalizedGaussianSampler init { - gaussian = ZigguratNormalizedGaussianSampler.of() + gaussian = ZigguratNormalizedGaussianSampler dOptim = alpha - ONE_THIRD cOptim = ONE_THIRD / sqrt(dOptim) } diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AliasMethodDiscreteSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AliasMethodDiscreteSampler.kt similarity index 58% rename from kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AliasMethodDiscreteSampler.kt rename to kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AliasMethodDiscreteSampler.kt index cae97db65..fe670a4e4 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AliasMethodDiscreteSampler.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/AliasMethodDiscreteSampler.kt @@ -1,10 +1,10 @@ -package space.kscience.kmath.stat.samplers +package space.kscience.kmath.samplers import space.kscience.kmath.chains.Chain +import space.kscience.kmath.internal.InternalUtils import space.kscience.kmath.stat.RandomGenerator import space.kscience.kmath.stat.Sampler import space.kscience.kmath.stat.chain -import space.kscience.kmath.stat.internal.InternalUtils import kotlin.math.ceil import kotlin.math.max import kotlin.math.min @@ -39,12 +39,12 @@ import kotlin.math.min public open class AliasMethodDiscreteSampler private constructor( // Deliberate direct storage of input arrays protected val probability: LongArray, - protected val alias: IntArray + protected val alias: IntArray, ) : Sampler { private class SmallTableAliasMethodDiscreteSampler( probability: LongArray, - alias: IntArray + alias: IntArray, ) : AliasMethodDiscreteSampler(probability, alias) { // Assume the table size is a power of 2 and create the mask private val mask: Int = alias.size - 1 @@ -111,110 +111,6 @@ public open class AliasMethodDiscreteSampler private constructor( private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble() private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11 - public fun of( - probabilities: DoubleArray, - alpha: Int = DEFAULT_ALPHA - ): Sampler { - // The Alias method balances N categories with counts around the mean into N sections, - // each allocated 'mean' observations. - // - // Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a - // 2D array as 4 sections with a height of the mean: - // - // 6 - // 6 - // 6 - // 63 => 6366 -- - // 632 6326 |-- mean - // 6321 6321 -- - // - // section abcd - // - // Each section is divided as: - // a: 6=1/1 - // b: 3=1/1 - // c: 2=2/3; 6=1/3 (6 is the alias) - // d: 1=1/3; 6=2/3 (6 is the alias) - // - // The sample is obtained by randomly selecting a section, then choosing which category - // from the pair based on a uniform random deviate. - val sumProb = InternalUtils.validateProbabilities(probabilities) - // Allow zero-padding - val n = computeSize(probabilities.size, alpha) - // Partition into small and large by splitting on the average. - val mean = sumProb / n - // The cardinality of smallSize + largeSize = n. - // So fill the same array from either end. - val indices = IntArray(n) - var large = n - var small = 0 - - probabilities.indices.forEach { i -> - if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i - } - - small = fillRemainingIndices(probabilities.size, indices, small) - // This may be smaller than the input length if the probabilities were already padded. - val nonZeroIndex = findLastNonZeroIndex(probabilities) - // The probabilities are modified so use a copy. - // Note: probabilities are required only up to last nonZeroIndex - val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1) - // Allocate the final tables. - // Probability table may be truncated (when zero padded). - // The alias table is full length. - val probability = LongArray(remainingProbabilities.size) - val alias = IntArray(n) - - // This loop uses each large in turn to fill the alias table for small probabilities that - // do not reach the requirement to fill an entire section alone (i.e. p < mean). - // Since the sum of the small should be less than the sum of the large it should use up - // all the small first. However floating point round-off can result in - // misclassification of items as small or large. The Vose algorithm handles this using - // a while loop conditioned on the size of both sets and a subsequent loop to use - // unpaired items. - while (large != n && small != 0) { - // Index of the small and the large probabilities. - val j = indices[--small] - val k = indices[large++] - - // Optimisation for zero-padded input: - // p(j) = 0 above the last nonZeroIndex - if (j > nonZeroIndex) - // The entire amount for the section is taken from the alias. - remainingProbabilities[k] -= mean - else { - val pj = remainingProbabilities[j] - // Item j is a small probability that is below the mean. - // Compute the weight of the section for item j: pj / mean. - // This is scaled by 2^53 and the ceiling function used to round-up - // the probability to a numerator of a fraction in the range [1,2^53]. - // Ceiling ensures non-zero values. - probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong() - // The remaining amount for the section is taken from the alias. - // Effectively: probabilities[k] -= (mean - pj) - remainingProbabilities[k] += pj - mean - } - - // If not j then the alias is k - alias[j] = k - - // Add the remaining probability from large to the appropriate list. - if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k - } - - // Final loop conditions to consume unpaired items. - // Note: The large set should never be non-empty but this can occur due to round-off - // error so consume from both. - fillTable(probability, alias, indices, 0, small) - fillTable(probability, alias, indices, large, n) - - // Change the algorithm for small power of 2 sized tables - return if (isSmallPowerOf2(n)) - SmallTableAliasMethodDiscreteSampler(probability, alias) - else - AliasMethodDiscreteSampler(probability, alias) - } - private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int { var updatedSmall = small (length until indices.size).forEach { i -> indices[updatedSmall++] = i } @@ -246,7 +142,7 @@ public open class AliasMethodDiscreteSampler private constructor( alias: IntArray, indices: IntArray, start: Int, - end: Int + end: Int, ) = (start until end).forEach { i -> val index = indices[i] probability[index] = ONE_AS_NUMERATOR @@ -283,4 +179,110 @@ public open class AliasMethodDiscreteSampler private constructor( return n - (mutI ushr 1) } } + + @Suppress("FunctionName") + public fun AliasMethodDiscreteSampler( + probabilities: DoubleArray, + alpha: Int = DEFAULT_ALPHA, + ): Sampler { + // The Alias method balances N categories with counts around the mean into N sections, + // each allocated 'mean' observations. + // + // Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a + // 2D array as 4 sections with a height of the mean: + // + // 6 + // 6 + // 6 + // 63 => 6366 -- + // 632 6326 |-- mean + // 6321 6321 -- + // + // section abcd + // + // Each section is divided as: + // a: 6=1/1 + // b: 3=1/1 + // c: 2=2/3; 6=1/3 (6 is the alias) + // d: 1=1/3; 6=2/3 (6 is the alias) + // + // The sample is obtained by randomly selecting a section, then choosing which category + // from the pair based on a uniform random deviate. + val sumProb = InternalUtils.validateProbabilities(probabilities) + // Allow zero-padding + val n = computeSize(probabilities.size, alpha) + // Partition into small and large by splitting on the average. + val mean = sumProb / n + // The cardinality of smallSize + largeSize = n. + // So fill the same array from either end. + val indices = IntArray(n) + var large = n + var small = 0 + + probabilities.indices.forEach { i -> + if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i + } + + small = fillRemainingIndices(probabilities.size, indices, small) + // This may be smaller than the input length if the probabilities were already padded. + val nonZeroIndex = findLastNonZeroIndex(probabilities) + // The probabilities are modified so use a copy. + // Note: probabilities are required only up to last nonZeroIndex + val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1) + // Allocate the final tables. + // Probability table may be truncated (when zero padded). + // The alias table is full length. + val probability = LongArray(remainingProbabilities.size) + val alias = IntArray(n) + + // This loop uses each large in turn to fill the alias table for small probabilities that + // do not reach the requirement to fill an entire section alone (i.e. p < mean). + // Since the sum of the small should be less than the sum of the large it should use up + // all the small first. However floating point round-off can result in + // misclassification of items as small or large. The Vose algorithm handles this using + // a while loop conditioned on the size of both sets and a subsequent loop to use + // unpaired items. + while (large != n && small != 0) { + // Index of the small and the large probabilities. + val j = indices[--small] + val k = indices[large++] + + // Optimisation for zero-padded input: + // p(j) = 0 above the last nonZeroIndex + if (j > nonZeroIndex) + // The entire amount for the section is taken from the alias. + remainingProbabilities[k] -= mean + else { + val pj = remainingProbabilities[j] + // Item j is a small probability that is below the mean. + // Compute the weight of the section for item j: pj / mean. + // This is scaled by 2^53 and the ceiling function used to round-up + // the probability to a numerator of a fraction in the range [1,2^53]. + // Ceiling ensures non-zero values. + probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong() + // The remaining amount for the section is taken from the alias. + // Effectively: probabilities[k] -= (mean - pj) + remainingProbabilities[k] += pj - mean + } + + // If not j then the alias is k + alias[j] = k + + // Add the remaining probability from large to the appropriate list. + if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k + } + + // Final loop conditions to consume unpaired items. + // Note: The large set should never be non-empty but this can occur due to round-off + // error so consume from both. + fillTable(probability, alias, indices, 0, small) + fillTable(probability, alias, indices, large, n) + + // Change the algorithm for small power of 2 sized tables + return if (isSmallPowerOf2(n)) { + SmallTableAliasMethodDiscreteSampler(probability, alias) + } else { + AliasMethodDiscreteSampler(probability, alias) + } + } } diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/BoxMullerSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/BoxMullerSampler.kt new file mode 100644 index 000000000..1f1871cbb --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/BoxMullerSampler.kt @@ -0,0 +1,52 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingDoubleChain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.structures.DoubleBuffer +import kotlin.math.* + +/** + * [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian + * distribution. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html]. + */ + +public object BoxMullerSampler : NormalizedGaussianSampler { + override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain { + var state = Double.NaN + + override fun nextBufferBlocking(size: Int): DoubleBuffer { + val xs = generator.nextDoubleBuffer(size) + val ys = generator.nextDoubleBuffer(size) + + return DoubleBuffer(size) { index -> + if (state.isNaN()) { + // Generate a pair of Gaussian numbers. + val x = xs[index] + val y = ys[index] + val alpha = 2 * PI * x + val r = sqrt(-2 * ln(y)) + + // Keep second element of the pair for next invocation. + state = r * sin(alpha) + + // Return the first element of the generated pair. + r * cos(alpha) + } else { + // Use the second element of the pair (generated at the + // previous invocation). + state.also { + // Both elements of the pair have been used. + state = Double.NaN + } + } + } + } + + + override suspend fun fork(): BlockingDoubleChain = sample(generator.fork()) + } + +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/ConstantSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/ConstantSampler.kt new file mode 100644 index 000000000..0f8d13305 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/ConstantSampler.kt @@ -0,0 +1,13 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingBufferChain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.structures.Buffer + +public class ConstantSampler(public val const: T) : Sampler { + override fun sample(generator: RandomGenerator): BlockingBufferChain = object : BlockingBufferChain { + override fun nextBufferBlocking(size: Int): Buffer = Buffer.boxing(size) { const } + override suspend fun fork(): BlockingBufferChain = this + } +} \ No newline at end of file diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/GaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/GaussianSampler.kt new file mode 100644 index 000000000..26047830c --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/GaussianSampler.kt @@ -0,0 +1,34 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingDoubleChain +import space.kscience.kmath.chains.map +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler + +/** + * Sampling from a Gaussian distribution with given mean and standard deviation. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html]. + * + * @property mean the mean of the distribution. + * @property standardDeviation the variance of the distribution. + */ +public class GaussianSampler( + public val mean: Double, + public val standardDeviation: Double, + private val normalized: NormalizedGaussianSampler = BoxMullerSampler +) : Sampler { + + init { + require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" } + } + + public override fun sample(generator: RandomGenerator): BlockingDoubleChain = normalized + .sample(generator) + .map { standardDeviation * it + mean } + + override fun toString(): String = "N($mean, $standardDeviation)" + + public companion object +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/KempSmallMeanPoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/KempSmallMeanPoissonSampler.kt new file mode 100644 index 000000000..0f8e6b089 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/KempSmallMeanPoissonSampler.kt @@ -0,0 +1,68 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingIntChain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.structures.IntBuffer +import kotlin.math.exp + +/** + * Sampler for the Poisson distribution. + * - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253. + * This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead. + * + * Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor. + * + * Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html]. + */ +public class KempSmallMeanPoissonSampler internal constructor( + private val p0: Double, + private val mean: Double, +) : Sampler { + public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain { + override fun nextBlocking(): Int { + //TODO move to nextBufferBlocking + // Note on the algorithm: + // - X is the unknown sample deviate (the output of the algorithm) + // - x is the current value from the distribution + // - p is the probability of the current value x, p(X=x) + // - u is effectively the cumulative probability that the sample X + // is equal or above the current value x, p(X>=x) + // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x + var u = generator.nextDouble() + var x = 0 + var p = p0 + + while (u > p) { + u -= p + // Compute the next probability using a recurrence relation. + // p(x+1) = p(x) * mean / (x+1) + p *= mean / ++x + // The algorithm listed in Kemp (1981) does not check that the rolling probability + // is positive. This check is added to ensure no errors when the limit of the summation + // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic. + if (p == 0.0) return x + } + + return x + } + + override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() } + + override suspend fun fork(): BlockingIntChain = sample(generator.fork()) + } + + public override fun toString(): String = "Kemp Small Mean Poisson deviate" +} + +public fun KempSmallMeanPoissonSampler(mean: Double): KempSmallMeanPoissonSampler { + require(mean > 0) { "Mean is not strictly positive: $mean" } + val p0 = exp(-mean) + // Probability must be positive. As mean increases then p(0) decreases. + require(p0 > 0) { "No probability for mean: $mean" } + return KempSmallMeanPoissonSampler(p0, mean) +} + diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/MarsagliaNormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/MarsagliaNormalizedGaussianSampler.kt new file mode 100644 index 000000000..b93bcc106 --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/MarsagliaNormalizedGaussianSampler.kt @@ -0,0 +1,61 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingDoubleChain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.structures.DoubleBuffer +import kotlin.math.ln +import kotlin.math.sqrt + +/** + * [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian + * distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in + * [BoxMullerNormalizedGaussianSampler]. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html] + */ +public object MarsagliaNormalizedGaussianSampler : NormalizedGaussianSampler { + + override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain { + var nextGaussian = Double.NaN + + override fun nextBlocking(): Double { + return if (nextGaussian.isNaN()) { + val alpha: Double + var x: Double + + // Rejection scheme for selecting a pair that lies within the unit circle. + while (true) { + // Generate a pair of numbers within [-1 , 1). + x = 2.0 * generator.nextDouble() - 1.0 + val y = 2.0 * generator.nextDouble() - 1.0 + val r2 = x * x + y * y + + if (r2 < 1 && r2 > 0) { + // Pair (x, y) is within unit circle. + alpha = sqrt(-2 * ln(r2) / r2) + // Keep second element of the pair for next invocation. + nextGaussian = alpha * y + // Return the first element of the generated pair. + break + } + // Pair is not within the unit circle: Generate another one. + } + + // Return the first element of the generated pair. + alpha * x + } else { + // Use the second element of the pair (generated at the + // previous invocation). + val r = nextGaussian + // Both elements of the pair have been used. + nextGaussian = Double.NaN + r + } + } + + override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() } + + override suspend fun fork(): BlockingDoubleChain = sample(generator.fork()) + } +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/NormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/NormalizedGaussianSampler.kt new file mode 100644 index 000000000..6d3daadab --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/NormalizedGaussianSampler.kt @@ -0,0 +1,18 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingDoubleChain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler + +public interface BlockingDoubleSampler: Sampler{ + override fun sample(generator: RandomGenerator): BlockingDoubleChain +} + + +/** + * Marker interface for a sampler that generates values from an N(0,1) + * [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution). + */ +public fun interface NormalizedGaussianSampler : BlockingDoubleSampler{ + public companion object +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/PoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/PoissonSampler.kt new file mode 100644 index 000000000..c2e8e2c1c --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/PoissonSampler.kt @@ -0,0 +1,203 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingIntChain +import space.kscience.kmath.internal.InternalUtils +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.stat.Sampler +import space.kscience.kmath.structures.IntBuffer +import kotlin.math.* + + +private const val PIVOT = 40.0 + +/** + * Sampler for the Poisson distribution. + * - For small means, a Poisson process is simulated using uniform deviates, as described in + * Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 + * Important integer-valued distributions: The Poisson distribution. Addison Wesley. + * The Poisson process (and hence, the returned value) is bounded by 1000 * mean. + * - For large means, we use the rejection algorithm described in + * Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html]. + */ +@Suppress("FunctionName") +public fun PoissonSampler(mean: Double): Sampler { + return if (mean < PIVOT) SmallMeanPoissonSampler(mean) else LargeMeanPoissonSampler(mean) +} + +/** + * Sampler for the Poisson distribution. + * - For small means, a Poisson process is simulated using uniform deviates, as described in + * Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important + * integer-valued distributions: The Poisson distribution. Addison Wesley. + * - The Poisson process (and hence, the returned value) is bounded by 1000 * mean. + * This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead. + * + * Based on Commons RNG implementation. + * + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html]. + */ +public class SmallMeanPoissonSampler(public val mean: Double) : Sampler { + + init { + require(mean > 0) { "mean is not strictly positive: $mean" } + } + + private val p0: Double = exp(-mean) + + private val limit: Int = if (p0 > 0) { + ceil(1000 * mean) + } else { + throw IllegalArgumentException("No p(x=0) probability for mean: $mean") + }.toInt() + + public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain { + override fun nextBlocking(): Int { + var n = 0 + var r = 1.0 + + while (n < limit) { + r *= generator.nextDouble() + if (r >= p0) n++ else break + } + + return n + } + + override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() } + + override suspend fun fork(): BlockingIntChain = sample(generator.fork()) + } + + public override fun toString(): String = "Small Mean Poisson deviate" +} + + +/** + * Sampler for the Poisson distribution. + * - For large means, we use the rejection algorithm described in + * Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables + * Computing vol. 26 pp. 197-207. + * + * This sampler is suitable for mean >= 40. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html]. + */ +public class LargeMeanPoissonSampler(public val mean: Double) : Sampler { + + init { + require(mean >= 1) { "mean is not >= 1: $mean" } + // The algorithm is not valid if Math.floor(mean) is not an integer. + require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" } + } + + private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG + private val lambda: Double = floor(mean) + private val logLambda: Double = ln(lambda) + private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt()) + private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1)) + private val halfDelta: Double = delta / 2 + private val twolpd: Double = 2 * lambda + delta + private val c1: Double = 1 / (8 * lambda) + private val a1: Double = sqrt(PI * twolpd) * exp(c1) + private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd) + private val aSum: Double = a1 + a2 + 1 + private val p1: Double = a1 / aSum + private val p2: Double = a2 / aSum + + public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain { + override fun nextBlocking(): Int { + val exponential = AhrensDieterExponentialSampler(1.0).sample(generator) + val gaussian = ZigguratNormalizedGaussianSampler.sample(generator) + + val smallMeanPoissonSampler = if (mean - lambda < Double.MIN_VALUE) { + null + } else { + KempSmallMeanPoissonSampler(mean - lambda).sample(generator) + } + + val y2 = smallMeanPoissonSampler?.nextBlocking() ?: 0 + var x: Double + var y: Double + var v: Double + var a: Int + var t: Double + var qr: Double + var qa: Double + + while (true) { + // Step 1: + val u = generator.nextDouble() + + if (u <= p1) { + // Step 2: + val n = gaussian.nextBlocking() + x = n * sqrt(lambda + halfDelta) - 0.5 + if (x > delta || x < -lambda) continue + y = if (x < 0) floor(x) else ceil(x) + val e = exponential.nextBlocking() + v = -e - 0.5 * n * n + c1 + } else { + // Step 3: + if (u > p1 + p2) { + y = lambda + break + } + + x = delta + twolpd / delta * exponential.nextBlocking() + y = ceil(x) + v = -exponential.nextBlocking() - delta * (x + 1) / twolpd + } + + // The Squeeze Principle + // Step 4.1: + a = if (x < 0) 1 else 0 + t = y * (y + 1) / (2 * lambda) + + // Step 4.2 + if (v < -t && a == 0) { + y += lambda + break + } + + // Step 4.3: + qr = t * ((2 * y + 1) / (6 * lambda) - 1) + qa = qr - t * t / (3 * (lambda + a * (y + 1))) + + // Step 4.4: + if (v < qa) { + y += lambda + break + } + + // Step 4.5: + if (v > qr) continue + + // Step 4.6: + if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) { + y += lambda + break + } + } + + return min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt() + } + + override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() } + + override suspend fun fork(): BlockingIntChain = sample(generator.fork()) + } + + private fun getFactorialLog(n: Int): Double = factorialLog.value(n) + public override fun toString(): String = "Large Mean Poisson deviate" + + public companion object { + private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE + private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create() + } +} + + diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/ZigguratNormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/ZigguratNormalizedGaussianSampler.kt new file mode 100644 index 000000000..70f5c248d --- /dev/null +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/samplers/ZigguratNormalizedGaussianSampler.kt @@ -0,0 +1,88 @@ +package space.kscience.kmath.samplers + +import space.kscience.kmath.chains.BlockingDoubleChain +import space.kscience.kmath.stat.RandomGenerator +import space.kscience.kmath.structures.DoubleBuffer +import kotlin.math.* + +/** + * [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a + * Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this + * implementation has been adapted from the C code provided therein. + * + * Based on Commons RNG implementation. + * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html]. + */ +public object ZigguratNormalizedGaussianSampler : NormalizedGaussianSampler { + + private const val R: Double = 3.442619855899 + private const val ONE_OVER_R: Double = 1 / R + private const val V: Double = 9.91256303526217e-3 + private val MAX: Double = 2.0.pow(63.0) + private val ONE_OVER_MAX: Double = 1.0 / MAX + private const val LEN: Int = 128 + private const val LAST: Int = LEN - 1 + private val K: LongArray = LongArray(LEN) + private val W: DoubleArray = DoubleArray(LEN) + private val F: DoubleArray = DoubleArray(LEN) + + init { + // Filling the tables. + var d = R + var t = d + var fd = gauss(d) + val q = V / fd + K[0] = (d / q * MAX).toLong() + K[1] = 0 + W[0] = q * ONE_OVER_MAX + W[LAST] = d * ONE_OVER_MAX + F[0] = 1.0 + F[LAST] = fd + + (LAST - 1 downTo 1).forEach { i -> + d = sqrt(-2 * ln(V / d + fd)) + fd = gauss(d) + K[i + 1] = (d / t * MAX).toLong() + t = d + F[i] = fd + W[i] = d * ONE_OVER_MAX + } + } + + private fun gauss(x: Double): Double = exp(-0.5 * x * x) + + private fun sampleOne(generator: RandomGenerator): Double { + val j = generator.nextLong() + val i = (j and LAST.toLong()).toInt() + return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i) + } + + override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain { + override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { sampleOne(generator) } + + override suspend fun fork(): BlockingDoubleChain = sample(generator.fork()) + } + + + private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double { + var x = hz * W[iz] + + return when { + iz == 0 -> { + var y: Double + + do { + y = -ln(generator.nextDouble()) + x = -ln(generator.nextDouble()) * ONE_OVER_R + } while (y + y < x * x) + + val out = R + x + if (hz > 0) out else -out + } + + F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x + else -> sampleOne(generator) + } + } + +} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomChain.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomChain.kt index 2f117a035..9e3e265dc 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomChain.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomChain.kt @@ -1,8 +1,8 @@ package space.kscience.kmath.stat import space.kscience.kmath.chains.BlockingDoubleChain -import space.kscience.kmath.chains.BlockingIntChain import space.kscience.kmath.chains.Chain +import space.kscience.kmath.structures.DoubleBuffer /** * A possibly stateful chain producing random values. @@ -11,12 +11,24 @@ import space.kscience.kmath.chains.Chain */ public class RandomChain( public val generator: RandomGenerator, - private val gen: suspend RandomGenerator.() -> R + private val gen: suspend RandomGenerator.() -> R, ) : Chain { override suspend fun next(): R = generator.gen() - override fun fork(): Chain = RandomChain(generator.fork(), gen) + override suspend fun fork(): Chain = RandomChain(generator.fork(), gen) +} + +/** + * Create a generic random chain with provided [generator] + */ +public fun RandomGenerator.chain(generator: suspend RandomGenerator.() -> R): RandomChain = RandomChain(this, generator) + +/** + * A type-specific double chunk random chain + */ +public class UniformDoubleChain(public val generator: RandomGenerator) : BlockingDoubleChain { + public override fun nextBufferBlocking(size: Int): DoubleBuffer = generator.nextDoubleBuffer(size) + override suspend fun nextBuffer(size: Int): DoubleBuffer = nextBufferBlocking(size) + + override suspend fun fork(): UniformDoubleChain = UniformDoubleChain(generator.fork()) } -public fun RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain = RandomChain(this, gen) -public fun Chain.blocking(): BlockingDoubleChain = object : Chain by this, BlockingDoubleChain {} -public fun Chain.blocking(): BlockingIntChain = object : Chain by this, BlockingIntChain {} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomGenerator.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomGenerator.kt index bad2334e9..c40513efc 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomGenerator.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/RandomGenerator.kt @@ -1,5 +1,6 @@ package space.kscience.kmath.stat +import space.kscience.kmath.structures.DoubleBuffer import kotlin.random.Random /** @@ -16,6 +17,11 @@ public interface RandomGenerator { */ public fun nextDouble(): Double + /** + * A chunk of doubles of given [size] + */ + public fun nextDoubleBuffer(size: Int): DoubleBuffer = DoubleBuffer(size) { nextDouble() } + /** * Gets the next random `Int` from the random number generator. * diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Distribution.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Sampler.kt similarity index 54% rename from kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Distribution.kt rename to kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Sampler.kt index 095182160..8d024b2b9 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Distribution.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Sampler.kt @@ -3,16 +3,13 @@ package space.kscience.kmath.stat import kotlinx.coroutines.flow.first import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.collect -import space.kscience.kmath.structures.Buffer -import space.kscience.kmath.structures.BufferFactory -import space.kscience.kmath.structures.IntBuffer -import space.kscience.kmath.structures.MutableBuffer +import space.kscience.kmath.structures.* import kotlin.jvm.JvmName /** - * Sampler that generates chains of values of type [T]. + * Sampler that generates chains of values of type [T] in a chain of type [C]. */ -public fun interface Sampler { +public fun interface Sampler { /** * Generates a chain of samples. * @@ -22,39 +19,6 @@ public fun interface Sampler { public fun sample(generator: RandomGenerator): Chain } -/** - * A distribution of typed objects. - */ -public interface Distribution : Sampler { - /** - * A probability value for given argument [arg]. - * For continuous distributions returns PDF - */ - public fun probability(arg: T): Double - - public override fun sample(generator: RandomGenerator): Chain - - /** - * An empty companion. Distribution factories should be written as its extensions - */ - public companion object -} - -public interface UnivariateDistribution> : Distribution { - /** - * Cumulative distribution for ordered parameter (CDF) - */ - public fun cumulative(arg: T): Double -} - -/** - * Compute probability integral in an interval - */ -public fun > UnivariateDistribution.integral(from: T, to: T): Double { - require(to > from) - return cumulative(to) - cumulative(from) -} - /** * Sample a bunch of values */ @@ -71,7 +35,7 @@ public fun Sampler.sampleBuffer( //clear list from previous run tmp.clear() //Fill list - repeat(size) { tmp += chain.next() } + repeat(size) { tmp.add(chain.next()) } //return new buffer with elements from tmp bufferFactory(size) { tmp[it] } } @@ -87,7 +51,7 @@ public suspend fun Sampler.next(generator: RandomGenerator): T = sa */ @JvmName("sampleRealBuffer") public fun Sampler.sampleBuffer(generator: RandomGenerator, size: Int): Chain> = - sampleBuffer(generator, size, MutableBuffer.Companion::double) + sampleBuffer(generator, size, ::DoubleBuffer) /** * Generates [size] integer samples and chunks them into some buffers. diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Statistic.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Statistic.kt index 689182115..67f55aea6 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Statistic.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/Statistic.kt @@ -81,7 +81,7 @@ public class Mean( public companion object { //TODO replace with optimized version which respects overflow - public val real: Mean = Mean(DoubleField) { sum, count -> sum / count } + public val double: Mean = Mean(DoubleField) { sum, count -> sum / count } public val int: Mean = Mean(IntRing) { sum, count -> sum / count } public val long: Mean = Mean(LongRing) { sum, count -> sum / count } } diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/UniformDistribution.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/UniformDistribution.kt index 4fc0905b8..4fc759e0c 100644 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/UniformDistribution.kt +++ b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/UniformDistribution.kt @@ -2,6 +2,8 @@ package space.kscience.kmath.stat import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.SimpleChain +import space.kscience.kmath.distributions.Distribution +import space.kscience.kmath.distributions.UnivariateDistribution public class UniformDistribution(public val range: ClosedFloatingPointRange) : UnivariateDistribution { private val length: Double = range.endInclusive - range.start diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterExponentialSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterExponentialSampler.kt deleted file mode 100644 index 504c6b881..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/AhrensDieterExponentialSampler.kt +++ /dev/null @@ -1,73 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler -import space.kscience.kmath.stat.chain -import space.kscience.kmath.stat.internal.InternalUtils -import kotlin.math.ln -import kotlin.math.pow - -/** - * Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html). - * - * Based on Commons RNG implementation. - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html]. - */ -public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler { - public override fun sample(generator: RandomGenerator): Chain = generator.chain { - // Step 1: - var a = 0.0 - var u = nextDouble() - - // Step 2 and 3: - while (u < 0.5) { - a += EXPONENTIAL_SA_QI[0] - u *= 2.0 - } - - // Step 4 (now u >= 0.5): - u += u - 1 - // Step 5: - if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u) - // Step 6: - var i = 0 // Should be 1, be we iterate before it in while using 0. - var u2 = nextDouble() - var umin = u2 - - // Step 7 and 8: - do { - ++i - u2 = nextDouble() - if (u2 < umin) umin = u2 - // Step 8: - } while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1. - - mean * (a + umin * EXPONENTIAL_SA_QI[0]) - } - - override fun toString(): String = "Ahrens-Dieter Exponential deviate" - - public companion object { - private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) } - - init { - /** - * Filling EXPONENTIAL_SA_QI table. - * Note that we don't want qi = 0 in the table. - */ - val ln2 = ln(2.0) - var qi = 0.0 - - EXPONENTIAL_SA_QI.indices.forEach { i -> - qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1) - EXPONENTIAL_SA_QI[i] = qi - } - } - - public fun of(mean: Double): AhrensDieterExponentialSampler { - require(mean > 0) { "mean is not strictly positive: $mean" } - return AhrensDieterExponentialSampler(mean) - } - } -} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/BoxMullerNormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/BoxMullerNormalizedGaussianSampler.kt deleted file mode 100644 index 04beb448d..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/BoxMullerNormalizedGaussianSampler.kt +++ /dev/null @@ -1,48 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler -import space.kscience.kmath.stat.chain -import kotlin.math.* - -/** - * [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian - * distribution. - * - * Based on Commons RNG implementation. - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html]. - */ -public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler { - private var nextGaussian: Double = Double.NaN - - public override fun sample(generator: RandomGenerator): Chain = generator.chain { - val random: Double - - if (nextGaussian.isNaN()) { - // Generate a pair of Gaussian numbers. - val x = nextDouble() - val y = nextDouble() - val alpha = 2 * PI * x - val r = sqrt(-2 * ln(y)) - // Return the first element of the generated pair. - random = r * cos(alpha) - // Keep second element of the pair for next invocation. - nextGaussian = r * sin(alpha) - } else { - // Use the second element of the pair (generated at the - // previous invocation). - random = nextGaussian - // Both elements of the pair have been used. - nextGaussian = Double.NaN - } - - random - } - - public override fun toString(): String = "Box-Muller normalized Gaussian deviate" - - public companion object { - public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler() - } -} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/GaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/GaussianSampler.kt deleted file mode 100644 index eba26cfb5..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/GaussianSampler.kt +++ /dev/null @@ -1,43 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.chains.map -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler - -/** - * Sampling from a Gaussian distribution with given mean and standard deviation. - * - * Based on Commons RNG implementation. - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html]. - * - * @property mean the mean of the distribution. - * @property standardDeviation the variance of the distribution. - */ -public class GaussianSampler private constructor( - public val mean: Double, - public val standardDeviation: Double, - private val normalized: NormalizedGaussianSampler -) : Sampler { - public override fun sample(generator: RandomGenerator): Chain = normalized - .sample(generator) - .map { standardDeviation * it + mean } - - override fun toString(): String = "Gaussian deviate [$normalized]" - - public companion object { - public fun of( - mean: Double, - standardDeviation: Double, - normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of() - ): GaussianSampler { - require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" } - - return GaussianSampler( - mean, - standardDeviation, - normalized - ) - } - } -} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/KempSmallMeanPoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/KempSmallMeanPoissonSampler.kt deleted file mode 100644 index 1d7f90023..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/KempSmallMeanPoissonSampler.kt +++ /dev/null @@ -1,63 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler -import space.kscience.kmath.stat.chain -import kotlin.math.exp - -/** - * Sampler for the Poisson distribution. - * - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253. - * This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead. - * - * Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor. - * - * Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double. - * - * Based on Commons RNG implementation. - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html]. - */ -public class KempSmallMeanPoissonSampler private constructor( - private val p0: Double, - private val mean: Double -) : Sampler { - public override fun sample(generator: RandomGenerator): Chain = generator.chain { - // Note on the algorithm: - // - X is the unknown sample deviate (the output of the algorithm) - // - x is the current value from the distribution - // - p is the probability of the current value x, p(X=x) - // - u is effectively the cumulative probability that the sample X - // is equal or above the current value x, p(X>=x) - // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x - var u = nextDouble() - var x = 0 - var p = p0 - - while (u > p) { - u -= p - // Compute the next probability using a recurrence relation. - // p(x+1) = p(x) * mean / (x+1) - p *= mean / ++x - // The algorithm listed in Kemp (1981) does not check that the rolling probability - // is positive. This check is added to ensure no errors when the limit of the summation - // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic. - if (p == 0.0) return@chain x - } - - x - } - - public override fun toString(): String = "Kemp Small Mean Poisson deviate" - - public companion object { - public fun of(mean: Double): KempSmallMeanPoissonSampler { - require(mean > 0) { "Mean is not strictly positive: $mean" } - val p0 = exp(-mean) - // Probability must be positive. As mean increases then p(0) decreases. - require(p0 > 0) { "No probability for mean: $mean" } - return KempSmallMeanPoissonSampler(p0, mean) - } - } -} - diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/LargeMeanPoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/LargeMeanPoissonSampler.kt deleted file mode 100644 index de1e7cc89..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/LargeMeanPoissonSampler.kt +++ /dev/null @@ -1,130 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.chains.ConstantChain -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler -import space.kscience.kmath.stat.chain -import space.kscience.kmath.stat.internal.InternalUtils -import space.kscience.kmath.stat.next -import kotlin.math.* - -/** - * Sampler for the Poisson distribution. - * - For large means, we use the rejection algorithm described in - * Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables - * Computing vol. 26 pp. 197-207. - * - * This sampler is suitable for mean >= 40. - * - * Based on Commons RNG implementation. - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html]. - */ -public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler { - private val exponential: Sampler = AhrensDieterExponentialSampler.of(1.0) - private val gaussian: Sampler = ZigguratNormalizedGaussianSampler.of() - private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG - private val lambda: Double = floor(mean) - private val logLambda: Double = ln(lambda) - private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt()) - private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1)) - private val halfDelta: Double = delta / 2 - private val twolpd: Double = 2 * lambda + delta - private val c1: Double = 1 / (8 * lambda) - private val a1: Double = sqrt(PI * twolpd) * exp(c1) - private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd) - private val aSum: Double = a1 + a2 + 1 - private val p1: Double = a1 / aSum - private val p2: Double = a2 / aSum - - private val smallMeanPoissonSampler: Sampler = if (mean - lambda < Double.MIN_VALUE) - NO_SMALL_MEAN_POISSON_SAMPLER - else // Not used. - KempSmallMeanPoissonSampler.of(mean - lambda) - - public override fun sample(generator: RandomGenerator): Chain = generator.chain { - // This will never be null. It may be a no-op delegate that returns zero. - val y2 = smallMeanPoissonSampler.next(generator) - var x: Double - var y: Double - var v: Double - var a: Int - var t: Double - var qr: Double - var qa: Double - - while (true) { - // Step 1: - val u = generator.nextDouble() - - if (u <= p1) { - // Step 2: - val n = gaussian.next(generator) - x = n * sqrt(lambda + halfDelta) - 0.5 - if (x > delta || x < -lambda) continue - y = if (x < 0) floor(x) else ceil(x) - val e = exponential.next(generator) - v = -e - 0.5 * n * n + c1 - } else { - // Step 3: - if (u > p1 + p2) { - y = lambda - break - } - - x = delta + twolpd / delta * exponential.next(generator) - y = ceil(x) - v = -exponential.next(generator) - delta * (x + 1) / twolpd - } - - // The Squeeze Principle - // Step 4.1: - a = if (x < 0) 1 else 0 - t = y * (y + 1) / (2 * lambda) - - // Step 4.2 - if (v < -t && a == 0) { - y += lambda - break - } - - // Step 4.3: - qr = t * ((2 * y + 1) / (6 * lambda) - 1) - qa = qr - t * t / (3 * (lambda + a * (y + 1))) - - // Step 4.4: - if (v < qa) { - y += lambda - break - } - - // Step 4.5: - if (v > qr) continue - - // Step 4.6: - if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) { - y += lambda - break - } - } - - min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt() - } - - private fun getFactorialLog(n: Int): Double = factorialLog.value(n) - public override fun toString(): String = "Large Mean Poisson deviate" - - public companion object { - private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE - private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create() - - private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler = Sampler { ConstantChain(0) } - - public fun of(mean: Double): LargeMeanPoissonSampler { - require(mean >= 1) { "mean is not >= 1: $mean" } - // The algorithm is not valid if Math.floor(mean) is not an integer. - require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" } - return LargeMeanPoissonSampler(mean) - } - } -} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/MarsagliaNormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/MarsagliaNormalizedGaussianSampler.kt deleted file mode 100644 index 8a659642f..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/MarsagliaNormalizedGaussianSampler.kt +++ /dev/null @@ -1,61 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler -import space.kscience.kmath.stat.chain -import kotlin.math.ln -import kotlin.math.sqrt - -/** - * [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian - * distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in - * [BoxMullerNormalizedGaussianSampler]. - * - * Based on Commons RNG implementation. - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html] - */ -public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler { - private var nextGaussian = Double.NaN - - public override fun sample(generator: RandomGenerator): Chain = generator.chain { - if (nextGaussian.isNaN()) { - val alpha: Double - var x: Double - - // Rejection scheme for selecting a pair that lies within the unit circle. - while (true) { - // Generate a pair of numbers within [-1 , 1). - x = 2.0 * generator.nextDouble() - 1.0 - val y = 2.0 * generator.nextDouble() - 1.0 - val r2 = x * x + y * y - - if (r2 < 1 && r2 > 0) { - // Pair (x, y) is within unit circle. - alpha = sqrt(-2 * ln(r2) / r2) - // Keep second element of the pair for next invocation. - nextGaussian = alpha * y - // Return the first element of the generated pair. - break - } - // Pair is not within the unit circle: Generate another one. - } - - // Return the first element of the generated pair. - alpha * x - } else { - // Use the second element of the pair (generated at the - // previous invocation). - val r = nextGaussian - // Both elements of the pair have been used. - nextGaussian = Double.NaN - r - } - } - - public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate" - - public companion object { - public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler() - } -} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/NormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/NormalizedGaussianSampler.kt deleted file mode 100644 index 4eb3d60e0..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/NormalizedGaussianSampler.kt +++ /dev/null @@ -1,9 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.stat.Sampler - -/** - * Marker interface for a sampler that generates values from an N(0,1) - * [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution). - */ -public interface NormalizedGaussianSampler : Sampler diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/PoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/PoissonSampler.kt deleted file mode 100644 index 0c0234892..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/PoissonSampler.kt +++ /dev/null @@ -1,30 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler - -/** - * Sampler for the Poisson distribution. - * - For small means, a Poisson process is simulated using uniform deviates, as described in - * Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 - * Important integer-valued distributions: The Poisson distribution. Addison Wesley. - * The Poisson process (and hence, the returned value) is bounded by 1000 * mean. - * - For large means, we use the rejection algorithm described in - * Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207. - * - * Based on Commons RNG implementation. - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html]. - */ -public class PoissonSampler private constructor(mean: Double) : Sampler { - private val poissonSamplerDelegate: Sampler = of(mean) - public override fun sample(generator: RandomGenerator): Chain = poissonSamplerDelegate.sample(generator) - public override fun toString(): String = poissonSamplerDelegate.toString() - - public companion object { - private const val PIVOT = 40.0 - - public fun of(mean: Double): Sampler =// Each sampler should check the input arguments. - if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean) - } -} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/SmallMeanPoissonSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/SmallMeanPoissonSampler.kt deleted file mode 100644 index 0fe7ff161..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/SmallMeanPoissonSampler.kt +++ /dev/null @@ -1,50 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler -import space.kscience.kmath.stat.chain -import kotlin.math.ceil -import kotlin.math.exp - -/** - * Sampler for the Poisson distribution. - * - For small means, a Poisson process is simulated using uniform deviates, as described in - * Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important - * integer-valued distributions: The Poisson distribution. Addison Wesley. - * - The Poisson process (and hence, the returned value) is bounded by 1000 * mean. - * This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead. - * - * Based on Commons RNG implementation. - * - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html]. - */ -public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler { - private val p0: Double = exp(-mean) - - private val limit: Int = (if (p0 > 0) - ceil(1000 * mean) - else - throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt() - - public override fun sample(generator: RandomGenerator): Chain = generator.chain { - var n = 0 - var r = 1.0 - - while (n < limit) { - r *= nextDouble() - if (r >= p0) n++ else break - } - - n - } - - public override fun toString(): String = "Small Mean Poisson deviate" - - public companion object { - public fun of(mean: Double): SmallMeanPoissonSampler { - require(mean > 0) { "mean is not strictly positive: $mean" } - return SmallMeanPoissonSampler(mean) - } - } -} diff --git a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/ZigguratNormalizedGaussianSampler.kt b/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/ZigguratNormalizedGaussianSampler.kt deleted file mode 100644 index 90815209f..000000000 --- a/kmath-stat/src/commonMain/kotlin/space/kscience/kmath/stat/samplers/ZigguratNormalizedGaussianSampler.kt +++ /dev/null @@ -1,88 +0,0 @@ -package space.kscience.kmath.stat.samplers - -import space.kscience.kmath.chains.Chain -import space.kscience.kmath.stat.RandomGenerator -import space.kscience.kmath.stat.Sampler -import space.kscience.kmath.stat.chain -import kotlin.math.* - -/** - * [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a - * Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this - * implementation has been adapted from the C code provided therein. - * - * Based on Commons RNG implementation. - * See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html]. - */ -public class ZigguratNormalizedGaussianSampler private constructor() : - NormalizedGaussianSampler, Sampler { - - private fun sampleOne(generator: RandomGenerator): Double { - val j = generator.nextLong() - val i = (j and LAST.toLong()).toInt() - return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i) - } - - public override fun sample(generator: RandomGenerator): Chain = generator.chain { sampleOne(this) } - public override fun toString(): String = "Ziggurat normalized Gaussian deviate" - - private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double { - var x = hz * W[iz] - - return when { - iz == 0 -> { - var y: Double - - do { - y = -ln(generator.nextDouble()) - x = -ln(generator.nextDouble()) * ONE_OVER_R - } while (y + y < x * x) - - val out = R + x - if (hz > 0) out else -out - } - - F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x - else -> sampleOne(generator) - } - } - - public companion object { - private const val R: Double = 3.442619855899 - private const val ONE_OVER_R: Double = 1 / R - private const val V: Double = 9.91256303526217e-3 - private val MAX: Double = 2.0.pow(63.0) - private val ONE_OVER_MAX: Double = 1.0 / MAX - private const val LEN: Int = 128 - private const val LAST: Int = LEN - 1 - private val K: LongArray = LongArray(LEN) - private val W: DoubleArray = DoubleArray(LEN) - private val F: DoubleArray = DoubleArray(LEN) - - init { - // Filling the tables. - var d = R - var t = d - var fd = gauss(d) - val q = V / fd - K[0] = (d / q * MAX).toLong() - K[1] = 0 - W[0] = q * ONE_OVER_MAX - W[LAST] = d * ONE_OVER_MAX - F[0] = 1.0 - F[LAST] = fd - - (LAST - 1 downTo 1).forEach { i -> - d = sqrt(-2 * ln(V / d + fd)) - fd = gauss(d) - K[i + 1] = (d / t * MAX).toLong() - t = d - F[i] = fd - W[i] = d * ONE_OVER_MAX - } - } - - public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler() - private fun gauss(x: Double): Double = exp(-0.5 * x * x) - } -} diff --git a/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/CommonsDistributionsTest.kt b/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/CommonsDistributionsTest.kt index 76aac65c4..c6b9cb17a 100644 --- a/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/CommonsDistributionsTest.kt +++ b/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/CommonsDistributionsTest.kt @@ -5,22 +5,23 @@ import kotlinx.coroutines.flow.toList import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test -import space.kscience.kmath.stat.samplers.GaussianSampler +import space.kscience.kmath.samplers.GaussianSampler +import space.kscience.kmath.structures.asBuffer internal class CommonsDistributionsTest { @Test - fun testNormalDistributionSuspend() { - val distribution = GaussianSampler.of(7.0, 2.0) + fun testNormalDistributionSuspend() = runBlocking { + val distribution = GaussianSampler(7.0, 2.0) val generator = RandomGenerator.default(1) - val sample = runBlocking { distribution.sample(generator).take(1000).toList() } - Assertions.assertEquals(7.0, sample.average(), 0.1) + val sample = distribution.sample(generator).take(1000).toList().asBuffer() + Assertions.assertEquals(7.0, Mean.double(sample), 0.2) } @Test - fun testNormalDistributionBlocking() { - val distribution = GaussianSampler.of(7.0, 2.0) + fun testNormalDistributionBlocking() = runBlocking { + val distribution = GaussianSampler(7.0, 2.0) val generator = RandomGenerator.default(1) - val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) } - Assertions.assertEquals(7.0, sample.average(), 0.1) + val sample = distribution.sample(generator).nextBufferBlocking(1000) + Assertions.assertEquals(7.0, Mean.double(sample), 0.2) } } diff --git a/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/StatisticTest.kt b/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/StatisticTest.kt index 156e618f9..3c9d6a2e4 100644 --- a/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/StatisticTest.kt +++ b/kmath-stat/src/jvmTest/kotlin/space/kscience/kmath/stat/StatisticTest.kt @@ -20,7 +20,7 @@ internal class StatisticTest { @Test fun testParallelMean() { runBlocking { - val average = Mean.real + val average = Mean.double .flow(chunked) //create a flow with results .drop(99) // Skip first 99 values and use one with total data .first() //get 1e5 data samples average