Fix Samplers and distribution API

This commit is contained in:
Alexander Nozik 2021-04-01 18:18:54 +03:00
parent ae911fcc2f
commit c2bab5d138
44 changed files with 915 additions and 806 deletions

View File

@ -7,6 +7,7 @@ import kscience.plotly.models.ScatterMode
import kscience.plotly.models.TraceValues
import space.kscience.kmath.commons.optimization.chiSquared
import space.kscience.kmath.commons.optimization.minimize
import space.kscience.kmath.distributions.NormalDistribution
import space.kscience.kmath.misc.symbol
import space.kscience.kmath.optimization.FunctionOptimization
import space.kscience.kmath.optimization.OptimizationResult
@ -14,7 +15,6 @@ import space.kscience.kmath.real.DoubleVector
import space.kscience.kmath.real.map
import space.kscience.kmath.real.step
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.distributions.NormalDistribution
import space.kscience.kmath.structures.asIterable
import space.kscience.kmath.structures.toList
import kotlin.math.pow

View File

@ -3,8 +3,8 @@ package space.kscience.kmath.stat
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.runBlocking
import space.kscience.kmath.stat.samplers.GaussianSampler
import org.apache.commons.rng.simple.RandomSource
import space.kscience.kmath.samplers.GaussianSampler
import java.time.Duration
import java.time.Instant
import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler
@ -12,8 +12,8 @@ import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSa
private suspend fun runKMathChained(): Duration {
val generator = RandomGenerator.fromSource(RandomSource.MT, 123L)
val normal = GaussianSampler.of(7.0, 2.0)
val chain = normal.sample(generator).blocking()
val normal = GaussianSampler(7.0, 2.0)
val chain = normal.sample(generator)
val startTime = Instant.now()
var sum = 0.0

View File

@ -3,7 +3,7 @@ package space.kscience.kmath.stat
import kotlinx.coroutines.runBlocking
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.collectWithState
import space.kscience.kmath.stat.distributions.NormalDistribution
import space.kscience.kmath.distributions.NormalDistribution
/**
* The state of distribution averager.

View File

@ -2,10 +2,10 @@ package space.kscience.kmath.commons.optimization
import kotlinx.coroutines.runBlocking
import space.kscience.kmath.commons.expressions.DerivativeStructureExpression
import space.kscience.kmath.distributions.NormalDistribution
import space.kscience.kmath.misc.symbol
import space.kscience.kmath.optimization.FunctionOptimization
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.distributions.NormalDistribution
import kotlin.math.pow
import kotlin.test.Test

View File

@ -0,0 +1,50 @@
package space.kscience.kmath.chains
import space.kscience.kmath.structures.Buffer
public interface BufferChain<out T> : Chain<T> {
public suspend fun nextBuffer(size: Int): Buffer<T>
override suspend fun fork(): BufferChain<T>
}
/**
* A chain with blocking generator that could be used without suspension
*/
public interface BlockingChain<out T> : Chain<T> {
/**
* Get the next value without concurrency support. Not guaranteed to be thread safe.
*/
public fun nextBlocking(): T
override suspend fun next(): T = nextBlocking()
override suspend fun fork(): BlockingChain<T>
}
public interface BlockingBufferChain<out T> : BlockingChain<T>, BufferChain<T> {
public fun nextBufferBlocking(size: Int): Buffer<T>
public override fun nextBlocking(): T = nextBufferBlocking(1)[0]
public override suspend fun nextBuffer(size: Int): Buffer<T> = nextBufferBlocking(size)
override suspend fun fork(): BlockingBufferChain<T>
}
public suspend inline fun <reified T : Any> Chain<T>.nextBuffer(size: Int): Buffer<T> = if (this is BufferChain) {
nextBuffer(size)
} else {
Buffer.auto(size) { next() }
}
public inline fun <reified T : Any> BlockingChain<T>.nextBufferBlocking(
size: Int,
): Buffer<T> = if (this is BlockingBufferChain) {
nextBufferBlocking(size)
} else {
Buffer.auto(size) { nextBlocking() }
}

View File

@ -1,13 +1,27 @@
package space.kscience.kmath.chains
import space.kscience.kmath.structures.DoubleBuffer
/**
* Chunked, specialized chain for real values.
* Chunked, specialized chain for double values, which supports blocking [nextBlocking] operation
*/
public interface BlockingDoubleChain : Chain<Double> {
public override suspend fun next(): Double
public interface BlockingDoubleChain : BlockingBufferChain<Double> {
/**
* Returns an [DoubleArray] chunk of [size] values of [next].
*/
public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() }
public override fun nextBufferBlocking(size: Int): DoubleBuffer
override suspend fun fork(): BlockingDoubleChain
public companion object
}
public fun BlockingDoubleChain.map(transform: (Double) -> Double): BlockingDoubleChain = object : BlockingDoubleChain {
override fun nextBufferBlocking(size: Int): DoubleBuffer {
val block = this@map.nextBufferBlocking(size)
return DoubleBuffer(size) { transform(block[it]) }
}
override suspend fun fork(): BlockingDoubleChain = this@map.fork().map(transform)
}

View File

@ -1,9 +1,12 @@
package space.kscience.kmath.chains
import space.kscience.kmath.structures.IntBuffer
/**
* Performance optimized chain for integer values
*/
public interface BlockingIntChain : Chain<Int> {
public override suspend fun next(): Int
public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
public interface BlockingIntChain : BlockingBufferChain<Int> {
override fun nextBufferBlocking(size: Int): IntBuffer
override suspend fun fork(): BlockingIntChain
}

View File

@ -24,20 +24,20 @@ import kotlinx.coroutines.sync.withLock
/**
* A not-necessary-Markov chain of some type
* @param R - the chain element type
* @param T - the chain element type
*/
public interface Chain<out R> : Flow<R> {
public interface Chain<out T> : Flow<T> {
/**
* Generate next value, changing state if needed
*/
public suspend fun next(): R
public suspend fun next(): T
/**
* Create a copy of current chain state. Consuming resulting chain does not affect initial chain
*/
public fun fork(): Chain<R>
public suspend fun fork(): Chain<T>
override suspend fun collect(collector: FlowCollector<R>): Unit =
override suspend fun collect(collector: FlowCollector<T>): Unit =
flow { while (true) emit(next()) }.collect(collector)
public companion object
@ -51,7 +51,7 @@ public fun <T> Sequence<T>.asChain(): Chain<T> = iterator().asChain()
*/
public class SimpleChain<out R>(private val gen: suspend () -> R) : Chain<R> {
public override suspend fun next(): R = gen()
public override fun fork(): Chain<R> = this
public override suspend fun fork(): Chain<R> = this
}
/**
@ -69,7 +69,7 @@ public class MarkovChain<out R : Any>(private val seed: suspend () -> R, private
newValue
}
public override fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
public override suspend fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
}
/**
@ -94,7 +94,7 @@ public class StatefulChain<S, out R>(
newValue
}
public override fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)
public override suspend fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)
}
/**
@ -102,7 +102,7 @@ public class StatefulChain<S, out R>(
*/
public class ConstantChain<out T>(public val value: T) : Chain<T> {
public override suspend fun next(): T = value
public override fun fork(): Chain<T> = this
public override suspend fun fork(): Chain<T> = this
}
/**
@ -111,7 +111,7 @@ public class ConstantChain<out T>(public val value: T) : Chain<T> {
*/
public fun <T, R> Chain<T>.map(func: suspend (T) -> R): Chain<R> = object : Chain<R> {
override suspend fun next(): R = func(this@map.next())
override fun fork(): Chain<R> = this@map.fork().map(func)
override suspend fun fork(): Chain<R> = this@map.fork().map(func)
}
/**
@ -127,7 +127,7 @@ public fun <T> Chain<T>.filter(block: (T) -> Boolean): Chain<T> = object : Chain
return next
}
override fun fork(): Chain<T> = this@filter.fork().filter(block)
override suspend fun fork(): Chain<T> = this@filter.fork().filter(block)
}
/**
@ -135,7 +135,7 @@ public fun <T> Chain<T>.filter(block: (T) -> Boolean): Chain<T> = object : Chain
*/
public fun <T, R> Chain<T>.collect(mapper: suspend (Chain<T>) -> R): Chain<R> = object : Chain<R> {
override suspend fun next(): R = mapper(this@collect)
override fun fork(): Chain<R> = this@collect.fork().collect(mapper)
override suspend fun fork(): Chain<R> = this@collect.fork().collect(mapper)
}
public fun <T, S, R> Chain<T>.collectWithState(
@ -145,7 +145,7 @@ public fun <T, S, R> Chain<T>.collectWithState(
): Chain<R> = object : Chain<R> {
override suspend fun next(): R = state.mapper(this@collectWithState)
override fun fork(): Chain<R> =
override suspend fun fork(): Chain<R> =
this@collectWithState.fork().collectWithState(stateFork(state), stateFork, mapper)
}
@ -154,5 +154,5 @@ public fun <T, S, R> Chain<T>.collectWithState(
*/
public fun <T, U, R> Chain<T>.zip(other: Chain<U>, block: suspend (T, U) -> R): Chain<R> = object : Chain<R> {
override suspend fun next(): R = block(this@zip.next(), other.next())
override fun fork(): Chain<R> = this@zip.fork().zip(other.fork(), block)
override suspend fun fork(): Chain<R> = this@zip.fork().zip(other.fork(), block)
}

View File

@ -6,7 +6,6 @@ import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.structures.Buffer
import space.kscience.kmath.structures.BufferFactory
import space.kscience.kmath.structures.DoubleBuffer
import space.kscience.kmath.structures.asBuffer
/**
* Create a [Flow] from buffer
@ -50,7 +49,7 @@ public fun Flow<Double>.chunked(bufferSize: Int): Flow<DoubleBuffer> = flow {
if (this@chunked is BlockingDoubleChain) {
// performance optimization for blocking primitive chain
while (true) emit(nextBlock(bufferSize).asBuffer())
while (true) emit(nextBufferBlocking(bufferSize))
} else {
val array = DoubleArray(bufferSize)
var counter = 0

View File

@ -2,6 +2,10 @@ plugins {
id("ru.mipt.npm.gradle.mpp")
}
kscience{
useAtomic()
}
kotlin.sourceSets {
commonMain {
dependencies {

View File

@ -0,0 +1,38 @@
package space.kscience.kmath.distributions
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* A distribution of typed objects.
*/
public interface Distribution<T : Any> : Sampler<T> {
/**
* A probability value for given argument [arg].
* For continuous distributions returns PDF
*/
public fun probability(arg: T): Double
public override fun sample(generator: RandomGenerator): Chain<T>
/**
* An empty companion. Distribution factories should be written as its extensions
*/
public companion object
}
public interface UnivariateDistribution<T : Comparable<T>> : Distribution<T> {
/**
* Cumulative distribution for ordered parameter (CDF)
*/
public fun cumulative(arg: T): Double
}
/**
* Compute probability integral in an interval
*/
public fun <T : Comparable<T>> UnivariateDistribution<T>.integral(from: T, to: T): Double {
require(to > from)
return cumulative(to) - cumulative(from)
}

View File

@ -1,7 +1,8 @@
package space.kscience.kmath.stat
package space.kscience.kmath.distributions
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.SimpleChain
import space.kscience.kmath.stat.RandomGenerator
/**
* A multivariate distribution which takes a map of parameters

View File

@ -1,12 +1,11 @@
package space.kscience.kmath.stat.distributions
package space.kscience.kmath.distributions
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.internal.InternalErf
import space.kscience.kmath.samplers.GaussianSampler
import space.kscience.kmath.samplers.NormalizedGaussianSampler
import space.kscience.kmath.samplers.ZigguratNormalizedGaussianSampler
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.UnivariateDistribution
import space.kscience.kmath.stat.internal.InternalErf
import space.kscience.kmath.stat.samplers.GaussianSampler
import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler
import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler
import kotlin.math.*
/**
@ -16,8 +15,8 @@ public inline class NormalDistribution(public val sampler: GaussianSampler) : Un
public constructor(
mean: Double,
standardDeviation: Double,
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(),
) : this(GaussianSampler.of(mean, standardDeviation, normalized))
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler,
) : this(GaussianSampler(mean, standardDeviation, normalized))
public override fun probability(arg: Double): Double {
val x1 = (arg - sampler.mean) / sampler.standardDeviation

View File

@ -1,4 +1,4 @@
package space.kscience.kmath.stat.internal
package space.kscience.kmath.internal
import kotlin.math.abs

View File

@ -1,4 +1,4 @@
package space.kscience.kmath.stat.internal
package space.kscience.kmath.internal
import kotlin.math.*

View File

@ -1,4 +1,4 @@
package space.kscience.kmath.stat.internal
package space.kscience.kmath.internal
import kotlin.math.ln
import kotlin.math.min

View File

@ -0,0 +1,72 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.math.ln
import kotlin.math.pow
/**
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
*/
public class AhrensDieterExponentialSampler(public val mean: Double) : Sampler<Double> {
init {
require(mean > 0) { "mean is not strictly positive: $mean" }
}
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
override fun nextBlocking(): Double {
// Step 1:
var a = 0.0
var u = generator.nextDouble()
// Step 2 and 3:
while (u < 0.5) {
a += EXPONENTIAL_SA_QI[0]
u *= 2.0
}
// Step 4 (now u >= 0.5):
u += u - 1
// Step 5:
if (u <= EXPONENTIAL_SA_QI[0]) return mean * (a + u)
// Step 6:
var i = 0 // Should be 1, be we iterate before it in while using 0.
var u2 = generator.nextDouble()
var umin = u2
// Step 7 and 8:
do {
++i
u2 = generator.nextDouble()
if (u2 < umin) umin = u2
// Step 8:
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
return mean * (a + umin * EXPONENTIAL_SA_QI[0])
}
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
}
public companion object {
private val EXPONENTIAL_SA_QI by lazy {
val ln2 = ln(2.0)
var qi = 0.0
DoubleArray(16) { i ->
qi += ln2.pow(i + 1.0) / space.kscience.kmath.internal.InternalUtils.factorial(i + 1)
qi
}
}
}
}

View File

@ -1,4 +1,4 @@
package space.kscience.kmath.stat.samplers
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
@ -80,7 +80,7 @@ public class AhrensDieterMarsagliaTsangGammaSampler private constructor(
private val gaussian: NormalizedGaussianSampler
init {
gaussian = ZigguratNormalizedGaussianSampler.of()
gaussian = ZigguratNormalizedGaussianSampler
dOptim = alpha - ONE_THIRD
cOptim = ONE_THIRD / sqrt(dOptim)
}

View File

@ -1,10 +1,10 @@
package space.kscience.kmath.stat.samplers
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.internal.InternalUtils
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import kotlin.math.ceil
import kotlin.math.max
import kotlin.math.min
@ -39,12 +39,12 @@ import kotlin.math.min
public open class AliasMethodDiscreteSampler private constructor(
// Deliberate direct storage of input arrays
protected val probability: LongArray,
protected val alias: IntArray
protected val alias: IntArray,
) : Sampler<Int> {
private class SmallTableAliasMethodDiscreteSampler(
probability: LongArray,
alias: IntArray
alias: IntArray,
) : AliasMethodDiscreteSampler(probability, alias) {
// Assume the table size is a power of 2 and create the mask
private val mask: Int = alias.size - 1
@ -111,9 +111,79 @@ public open class AliasMethodDiscreteSampler private constructor(
private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
public fun of(
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
var updatedSmall = small
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
return updatedSmall
}
private fun findLastNonZeroIndex(probabilities: DoubleArray): Int {
// No bounds check is performed when decrementing as the array contains at least one
// value above zero.
var nonZeroIndex = probabilities.size - 1
while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex--
return nonZeroIndex
}
private fun computeSize(length: Int, alpha: Int): Int {
// If No padding
if (alpha < 0) return length
// Use the number of leading zeros function to find the next power of 2,
// i.e. ceil(log2(x))
var pow2 = 32 - numberOfLeadingZeros(length - 1)
// Increase by the alpha. Clip this to limit to a positive integer (2^30)
pow2 = min(30, pow2 + alpha)
// Use max to handle a length above the highest possible power of 2
return max(length, 1 shl pow2)
}
private fun fillTable(
probability: LongArray,
alias: IntArray,
indices: IntArray,
start: Int,
end: Int,
) = (start until end).forEach { i ->
val index = indices[i]
probability[index] = ONE_AS_NUMERATOR
alias[index] = index
}
private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0
private fun numberOfLeadingZeros(i: Int): Int {
var mutI = i
if (mutI <= 0) return if (mutI == 0) 32 else 0
var n = 31
if (mutI >= 1 shl 16) {
n -= 16
mutI = mutI ushr 16
}
if (mutI >= 1 shl 8) {
n -= 8
mutI = mutI ushr 8
}
if (mutI >= 1 shl 4) {
n -= 4
mutI = mutI ushr 4
}
if (mutI >= 1 shl 2) {
n -= 2
mutI = mutI ushr 2
}
return n - (mutI ushr 1)
}
}
@Suppress("FunctionName")
public fun AliasMethodDiscreteSampler(
probabilities: DoubleArray,
alpha: Int = DEFAULT_ALPHA
alpha: Int = DEFAULT_ALPHA,
): Sampler<Int> {
// The Alias method balances N categories with counts around the mean into N sections,
// each allocated 'mean' observations.
@ -209,78 +279,10 @@ public open class AliasMethodDiscreteSampler private constructor(
fillTable(probability, alias, indices, large, n)
// Change the algorithm for small power of 2 sized tables
return if (isSmallPowerOf2(n))
return if (isSmallPowerOf2(n)) {
SmallTableAliasMethodDiscreteSampler(probability, alias)
else
} else {
AliasMethodDiscreteSampler(probability, alias)
}
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
var updatedSmall = small
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
return updatedSmall
}
private fun findLastNonZeroIndex(probabilities: DoubleArray): Int {
// No bounds check is performed when decrementing as the array contains at least one
// value above zero.
var nonZeroIndex = probabilities.size - 1
while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex--
return nonZeroIndex
}
private fun computeSize(length: Int, alpha: Int): Int {
// If No padding
if (alpha < 0) return length
// Use the number of leading zeros function to find the next power of 2,
// i.e. ceil(log2(x))
var pow2 = 32 - numberOfLeadingZeros(length - 1)
// Increase by the alpha. Clip this to limit to a positive integer (2^30)
pow2 = min(30, pow2 + alpha)
// Use max to handle a length above the highest possible power of 2
return max(length, 1 shl pow2)
}
private fun fillTable(
probability: LongArray,
alias: IntArray,
indices: IntArray,
start: Int,
end: Int
) = (start until end).forEach { i ->
val index = indices[i]
probability[index] = ONE_AS_NUMERATOR
alias[index] = index
}
private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0
private fun numberOfLeadingZeros(i: Int): Int {
var mutI = i
if (mutI <= 0) return if (mutI == 0) 32 else 0
var n = 31
if (mutI >= 1 shl 16) {
n -= 16
mutI = mutI ushr 16
}
if (mutI >= 1 shl 8) {
n -= 8
mutI = mutI ushr 8
}
if (mutI >= 1 shl 4) {
n -= 4
mutI = mutI ushr 4
}
if (mutI >= 1 shl 2) {
n -= 2
mutI = mutI ushr 2
}
return n - (mutI ushr 1)
}
}
}

View File

@ -0,0 +1,52 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.math.*
/**
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
* distribution.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
*/
public object BoxMullerSampler : NormalizedGaussianSampler {
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
var state = Double.NaN
override fun nextBufferBlocking(size: Int): DoubleBuffer {
val xs = generator.nextDoubleBuffer(size)
val ys = generator.nextDoubleBuffer(size)
return DoubleBuffer(size) { index ->
if (state.isNaN()) {
// Generate a pair of Gaussian numbers.
val x = xs[index]
val y = ys[index]
val alpha = 2 * PI * x
val r = sqrt(-2 * ln(y))
// Keep second element of the pair for next invocation.
state = r * sin(alpha)
// Return the first element of the generated pair.
r * cos(alpha)
} else {
// Use the second element of the pair (generated at the
// previous invocation).
state.also {
// Both elements of the pair have been used.
state = Double.NaN
}
}
}
}
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
}
}

View File

@ -0,0 +1,13 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingBufferChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.structures.Buffer
public class ConstantSampler<T : Any>(public val const: T) : Sampler<T> {
override fun sample(generator: RandomGenerator): BlockingBufferChain<T> = object : BlockingBufferChain<T> {
override fun nextBufferBlocking(size: Int): Buffer<T> = Buffer.boxing(size) { const }
override suspend fun fork(): BlockingBufferChain<T> = this
}
}

View File

@ -0,0 +1,34 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.chains.map
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampling from a Gaussian distribution with given mean and standard deviation.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
*
* @property mean the mean of the distribution.
* @property standardDeviation the variance of the distribution.
*/
public class GaussianSampler(
public val mean: Double,
public val standardDeviation: Double,
private val normalized: NormalizedGaussianSampler = BoxMullerSampler
) : Sampler<Double> {
init {
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
}
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = normalized
.sample(generator)
.map { standardDeviation * it + mean }
override fun toString(): String = "N($mean, $standardDeviation)"
public companion object
}

View File

@ -0,0 +1,68 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.structures.IntBuffer
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
*
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
*
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
*/
public class KempSmallMeanPoissonSampler internal constructor(
private val p0: Double,
private val mean: Double,
) : Sampler<Int> {
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
override fun nextBlocking(): Int {
//TODO move to nextBufferBlocking
// Note on the algorithm:
// - X is the unknown sample deviate (the output of the algorithm)
// - x is the current value from the distribution
// - p is the probability of the current value x, p(X=x)
// - u is effectively the cumulative probability that the sample X
// is equal or above the current value x, p(X>=x)
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
var u = generator.nextDouble()
var x = 0
var p = p0
while (u > p) {
u -= p
// Compute the next probability using a recurrence relation.
// p(x+1) = p(x) * mean / (x+1)
p *= mean / ++x
// The algorithm listed in Kemp (1981) does not check that the rolling probability
// is positive. This check is added to ensure no errors when the limit of the summation
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
if (p == 0.0) return x
}
return x
}
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
}
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
}
public fun KempSmallMeanPoissonSampler(mean: Double): KempSmallMeanPoissonSampler {
require(mean > 0) { "Mean is not strictly positive: $mean" }
val p0 = exp(-mean)
// Probability must be positive. As mean increases then p(0) decreases.
require(p0 > 0) { "No probability for mean: $mean" }
return KempSmallMeanPoissonSampler(p0, mean)
}

View File

@ -0,0 +1,61 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.math.ln
import kotlin.math.sqrt
/**
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
* [BoxMullerNormalizedGaussianSampler].
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
*/
public object MarsagliaNormalizedGaussianSampler : NormalizedGaussianSampler {
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
var nextGaussian = Double.NaN
override fun nextBlocking(): Double {
return if (nextGaussian.isNaN()) {
val alpha: Double
var x: Double
// Rejection scheme for selecting a pair that lies within the unit circle.
while (true) {
// Generate a pair of numbers within [-1 , 1).
x = 2.0 * generator.nextDouble() - 1.0
val y = 2.0 * generator.nextDouble() - 1.0
val r2 = x * x + y * y
if (r2 < 1 && r2 > 0) {
// Pair (x, y) is within unit circle.
alpha = sqrt(-2 * ln(r2) / r2)
// Keep second element of the pair for next invocation.
nextGaussian = alpha * y
// Return the first element of the generated pair.
break
}
// Pair is not within the unit circle: Generate another one.
}
// Return the first element of the generated pair.
alpha * x
} else {
// Use the second element of the pair (generated at the
// previous invocation).
val r = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
r
}
}
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
}
}

View File

@ -0,0 +1,18 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
public interface BlockingDoubleSampler: Sampler<Double>{
override fun sample(generator: RandomGenerator): BlockingDoubleChain
}
/**
* Marker interface for a sampler that generates values from an N(0,1)
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
*/
public fun interface NormalizedGaussianSampler : BlockingDoubleSampler{
public companion object
}

View File

@ -0,0 +1,203 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.internal.InternalUtils
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.structures.IntBuffer
import kotlin.math.*
private const val PIVOT = 40.0
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
*/
@Suppress("FunctionName")
public fun PoissonSampler(mean: Double): Sampler<Int> {
return if (mean < PIVOT) SmallMeanPoissonSampler(mean) else LargeMeanPoissonSampler(mean)
}
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
* integer-valued distributions: The Poisson distribution. Addison Wesley.
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
*
* Based on Commons RNG implementation.
*
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
*/
public class SmallMeanPoissonSampler(public val mean: Double) : Sampler<Int> {
init {
require(mean > 0) { "mean is not strictly positive: $mean" }
}
private val p0: Double = exp(-mean)
private val limit: Int = if (p0 > 0) {
ceil(1000 * mean)
} else {
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")
}.toInt()
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
override fun nextBlocking(): Int {
var n = 0
var r = 1.0
while (n < limit) {
r *= generator.nextDouble()
if (r >= p0) n++ else break
}
return n
}
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
}
public override fun toString(): String = "Small Mean Poisson deviate"
}
/**
* Sampler for the Poisson distribution.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
* Computing vol. 26 pp. 197-207.
*
* This sampler is suitable for mean >= 40.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
*/
public class LargeMeanPoissonSampler(public val mean: Double) : Sampler<Int> {
init {
require(mean >= 1) { "mean is not >= 1: $mean" }
// The algorithm is not valid if Math.floor(mean) is not an integer.
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
}
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
private val lambda: Double = floor(mean)
private val logLambda: Double = ln(lambda)
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
private val halfDelta: Double = delta / 2
private val twolpd: Double = 2 * lambda + delta
private val c1: Double = 1 / (8 * lambda)
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
private val aSum: Double = a1 + a2 + 1
private val p1: Double = a1 / aSum
private val p2: Double = a2 / aSum
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
override fun nextBlocking(): Int {
val exponential = AhrensDieterExponentialSampler(1.0).sample(generator)
val gaussian = ZigguratNormalizedGaussianSampler.sample(generator)
val smallMeanPoissonSampler = if (mean - lambda < Double.MIN_VALUE) {
null
} else {
KempSmallMeanPoissonSampler(mean - lambda).sample(generator)
}
val y2 = smallMeanPoissonSampler?.nextBlocking() ?: 0
var x: Double
var y: Double
var v: Double
var a: Int
var t: Double
var qr: Double
var qa: Double
while (true) {
// Step 1:
val u = generator.nextDouble()
if (u <= p1) {
// Step 2:
val n = gaussian.nextBlocking()
x = n * sqrt(lambda + halfDelta) - 0.5
if (x > delta || x < -lambda) continue
y = if (x < 0) floor(x) else ceil(x)
val e = exponential.nextBlocking()
v = -e - 0.5 * n * n + c1
} else {
// Step 3:
if (u > p1 + p2) {
y = lambda
break
}
x = delta + twolpd / delta * exponential.nextBlocking()
y = ceil(x)
v = -exponential.nextBlocking() - delta * (x + 1) / twolpd
}
// The Squeeze Principle
// Step 4.1:
a = if (x < 0) 1 else 0
t = y * (y + 1) / (2 * lambda)
// Step 4.2
if (v < -t && a == 0) {
y += lambda
break
}
// Step 4.3:
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
// Step 4.4:
if (v < qa) {
y += lambda
break
}
// Step 4.5:
if (v > qr) continue
// Step 4.6:
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
y += lambda
break
}
}
return min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
}
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
}
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
public override fun toString(): String = "Large Mean Poisson deviate"
public companion object {
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
}
}

View File

@ -0,0 +1,88 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.math.*
/**
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
* implementation has been adapted from the C code provided therein.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
*/
public object ZigguratNormalizedGaussianSampler : NormalizedGaussianSampler {
private const val R: Double = 3.442619855899
private const val ONE_OVER_R: Double = 1 / R
private const val V: Double = 9.91256303526217e-3
private val MAX: Double = 2.0.pow(63.0)
private val ONE_OVER_MAX: Double = 1.0 / MAX
private const val LEN: Int = 128
private const val LAST: Int = LEN - 1
private val K: LongArray = LongArray(LEN)
private val W: DoubleArray = DoubleArray(LEN)
private val F: DoubleArray = DoubleArray(LEN)
init {
// Filling the tables.
var d = R
var t = d
var fd = gauss(d)
val q = V / fd
K[0] = (d / q * MAX).toLong()
K[1] = 0
W[0] = q * ONE_OVER_MAX
W[LAST] = d * ONE_OVER_MAX
F[0] = 1.0
F[LAST] = fd
(LAST - 1 downTo 1).forEach { i ->
d = sqrt(-2 * ln(V / d + fd))
fd = gauss(d)
K[i + 1] = (d / t * MAX).toLong()
t = d
F[i] = fd
W[i] = d * ONE_OVER_MAX
}
}
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
private fun sampleOne(generator: RandomGenerator): Double {
val j = generator.nextLong()
val i = (j and LAST.toLong()).toInt()
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
}
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { sampleOne(generator) }
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
}
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
var x = hz * W[iz]
return when {
iz == 0 -> {
var y: Double
do {
y = -ln(generator.nextDouble())
x = -ln(generator.nextDouble()) * ONE_OVER_R
} while (y + y < x * x)
val out = R + x
if (hz > 0) out else -out
}
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
else -> sampleOne(generator)
}
}
}

View File

@ -1,8 +1,8 @@
package space.kscience.kmath.stat
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.structures.DoubleBuffer
/**
* A possibly stateful chain producing random values.
@ -11,12 +11,24 @@ import space.kscience.kmath.chains.Chain
*/
public class RandomChain<out R>(
public val generator: RandomGenerator,
private val gen: suspend RandomGenerator.() -> R
private val gen: suspend RandomGenerator.() -> R,
) : Chain<R> {
override suspend fun next(): R = generator.gen()
override fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
override suspend fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
}
/**
* Create a generic random chain with provided [generator]
*/
public fun <R> RandomGenerator.chain(generator: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, generator)
/**
* A type-specific double chunk random chain
*/
public class UniformDoubleChain(public val generator: RandomGenerator) : BlockingDoubleChain {
public override fun nextBufferBlocking(size: Int): DoubleBuffer = generator.nextDoubleBuffer(size)
override suspend fun nextBuffer(size: Int): DoubleBuffer = nextBufferBlocking(size)
override suspend fun fork(): UniformDoubleChain = UniformDoubleChain(generator.fork())
}
public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
public fun Chain<Double>.blocking(): BlockingDoubleChain = object : Chain<Double> by this, BlockingDoubleChain {}
public fun Chain<Int>.blocking(): BlockingIntChain = object : Chain<Int> by this, BlockingIntChain {}

View File

@ -1,5 +1,6 @@
package space.kscience.kmath.stat
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.random.Random
/**
@ -16,6 +17,11 @@ public interface RandomGenerator {
*/
public fun nextDouble(): Double
/**
* A chunk of doubles of given [size]
*/
public fun nextDoubleBuffer(size: Int): DoubleBuffer = DoubleBuffer(size) { nextDouble() }
/**
* Gets the next random `Int` from the random number generator.
*

View File

@ -3,16 +3,13 @@ package space.kscience.kmath.stat
import kotlinx.coroutines.flow.first
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.collect
import space.kscience.kmath.structures.Buffer
import space.kscience.kmath.structures.BufferFactory
import space.kscience.kmath.structures.IntBuffer
import space.kscience.kmath.structures.MutableBuffer
import space.kscience.kmath.structures.*
import kotlin.jvm.JvmName
/**
* Sampler that generates chains of values of type [T].
* Sampler that generates chains of values of type [T] in a chain of type [C].
*/
public fun interface Sampler<T : Any> {
public fun interface Sampler<out T : Any> {
/**
* Generates a chain of samples.
*
@ -22,39 +19,6 @@ public fun interface Sampler<T : Any> {
public fun sample(generator: RandomGenerator): Chain<T>
}
/**
* A distribution of typed objects.
*/
public interface Distribution<T : Any> : Sampler<T> {
/**
* A probability value for given argument [arg].
* For continuous distributions returns PDF
*/
public fun probability(arg: T): Double
public override fun sample(generator: RandomGenerator): Chain<T>
/**
* An empty companion. Distribution factories should be written as its extensions
*/
public companion object
}
public interface UnivariateDistribution<T : Comparable<T>> : Distribution<T> {
/**
* Cumulative distribution for ordered parameter (CDF)
*/
public fun cumulative(arg: T): Double
}
/**
* Compute probability integral in an interval
*/
public fun <T : Comparable<T>> UnivariateDistribution<T>.integral(from: T, to: T): Double {
require(to > from)
return cumulative(to) - cumulative(from)
}
/**
* Sample a bunch of values
*/
@ -71,7 +35,7 @@ public fun <T : Any> Sampler<T>.sampleBuffer(
//clear list from previous run
tmp.clear()
//Fill list
repeat(size) { tmp += chain.next() }
repeat(size) { tmp.add(chain.next()) }
//return new buffer with elements from tmp
bufferFactory(size) { tmp[it] }
}
@ -87,7 +51,7 @@ public suspend fun <T : Any> Sampler<T>.next(generator: RandomGenerator): T = sa
*/
@JvmName("sampleRealBuffer")
public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> =
sampleBuffer(generator, size, MutableBuffer.Companion::double)
sampleBuffer(generator, size, ::DoubleBuffer)
/**
* Generates [size] integer samples and chunks them into some buffers.

View File

@ -81,7 +81,7 @@ public class Mean<T>(
public companion object {
//TODO replace with optimized version which respects overflow
public val real: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count }
public val double: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count }
public val int: Mean<Int> = Mean(IntRing) { sum, count -> sum / count }
public val long: Mean<Long> = Mean(LongRing) { sum, count -> sum / count }
}

View File

@ -2,6 +2,8 @@ package space.kscience.kmath.stat
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.SimpleChain
import space.kscience.kmath.distributions.Distribution
import space.kscience.kmath.distributions.UnivariateDistribution
public class UniformDistribution(public val range: ClosedFloatingPointRange<Double>) : UnivariateDistribution<Double> {
private val length: Double = range.endInclusive - range.start

View File

@ -1,73 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import kotlin.math.ln
import kotlin.math.pow
/**
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
*/
public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler<Double> {
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
// Step 1:
var a = 0.0
var u = nextDouble()
// Step 2 and 3:
while (u < 0.5) {
a += EXPONENTIAL_SA_QI[0]
u *= 2.0
}
// Step 4 (now u >= 0.5):
u += u - 1
// Step 5:
if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u)
// Step 6:
var i = 0 // Should be 1, be we iterate before it in while using 0.
var u2 = nextDouble()
var umin = u2
// Step 7 and 8:
do {
++i
u2 = nextDouble()
if (u2 < umin) umin = u2
// Step 8:
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
mean * (a + umin * EXPONENTIAL_SA_QI[0])
}
override fun toString(): String = "Ahrens-Dieter Exponential deviate"
public companion object {
private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) }
init {
/**
* Filling EXPONENTIAL_SA_QI table.
* Note that we don't want qi = 0 in the table.
*/
val ln2 = ln(2.0)
var qi = 0.0
EXPONENTIAL_SA_QI.indices.forEach { i ->
qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1)
EXPONENTIAL_SA_QI[i] = qi
}
}
public fun of(mean: Double): AhrensDieterExponentialSampler {
require(mean > 0) { "mean is not strictly positive: $mean" }
return AhrensDieterExponentialSampler(mean)
}
}
}

View File

@ -1,48 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.*
/**
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
* distribution.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
*/
public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
private var nextGaussian: Double = Double.NaN
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
val random: Double
if (nextGaussian.isNaN()) {
// Generate a pair of Gaussian numbers.
val x = nextDouble()
val y = nextDouble()
val alpha = 2 * PI * x
val r = sqrt(-2 * ln(y))
// Return the first element of the generated pair.
random = r * cos(alpha)
// Keep second element of the pair for next invocation.
nextGaussian = r * sin(alpha)
} else {
// Use the second element of the pair (generated at the
// previous invocation).
random = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
}
random
}
public override fun toString(): String = "Box-Muller normalized Gaussian deviate"
public companion object {
public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler()
}
}

View File

@ -1,43 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.map
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampling from a Gaussian distribution with given mean and standard deviation.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
*
* @property mean the mean of the distribution.
* @property standardDeviation the variance of the distribution.
*/
public class GaussianSampler private constructor(
public val mean: Double,
public val standardDeviation: Double,
private val normalized: NormalizedGaussianSampler
) : Sampler<Double> {
public override fun sample(generator: RandomGenerator): Chain<Double> = normalized
.sample(generator)
.map { standardDeviation * it + mean }
override fun toString(): String = "Gaussian deviate [$normalized]"
public companion object {
public fun of(
mean: Double,
standardDeviation: Double,
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of()
): GaussianSampler {
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
return GaussianSampler(
mean,
standardDeviation,
normalized
)
}
}
}

View File

@ -1,63 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
*
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
*
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
*/
public class KempSmallMeanPoissonSampler private constructor(
private val p0: Double,
private val mean: Double
) : Sampler<Int> {
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// Note on the algorithm:
// - X is the unknown sample deviate (the output of the algorithm)
// - x is the current value from the distribution
// - p is the probability of the current value x, p(X=x)
// - u is effectively the cumulative probability that the sample X
// is equal or above the current value x, p(X>=x)
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
var u = nextDouble()
var x = 0
var p = p0
while (u > p) {
u -= p
// Compute the next probability using a recurrence relation.
// p(x+1) = p(x) * mean / (x+1)
p *= mean / ++x
// The algorithm listed in Kemp (1981) does not check that the rolling probability
// is positive. This check is added to ensure no errors when the limit of the summation
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
if (p == 0.0) return@chain x
}
x
}
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
public companion object {
public fun of(mean: Double): KempSmallMeanPoissonSampler {
require(mean > 0) { "Mean is not strictly positive: $mean" }
val p0 = exp(-mean)
// Probability must be positive. As mean increases then p(0) decreases.
require(p0 > 0) { "No probability for mean: $mean" }
return KempSmallMeanPoissonSampler(p0, mean)
}
}
}

View File

@ -1,130 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.ConstantChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import space.kscience.kmath.stat.next
import kotlin.math.*
/**
* Sampler for the Poisson distribution.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
* Computing vol. 26 pp. 197-207.
*
* This sampler is suitable for mean >= 40.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
*/
public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler<Int> {
private val exponential: Sampler<Double> = AhrensDieterExponentialSampler.of(1.0)
private val gaussian: Sampler<Double> = ZigguratNormalizedGaussianSampler.of()
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
private val lambda: Double = floor(mean)
private val logLambda: Double = ln(lambda)
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
private val halfDelta: Double = delta / 2
private val twolpd: Double = 2 * lambda + delta
private val c1: Double = 1 / (8 * lambda)
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
private val aSum: Double = a1 + a2 + 1
private val p1: Double = a1 / aSum
private val p2: Double = a2 / aSum
private val smallMeanPoissonSampler: Sampler<Int> = if (mean - lambda < Double.MIN_VALUE)
NO_SMALL_MEAN_POISSON_SAMPLER
else // Not used.
KempSmallMeanPoissonSampler.of(mean - lambda)
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// This will never be null. It may be a no-op delegate that returns zero.
val y2 = smallMeanPoissonSampler.next(generator)
var x: Double
var y: Double
var v: Double
var a: Int
var t: Double
var qr: Double
var qa: Double
while (true) {
// Step 1:
val u = generator.nextDouble()
if (u <= p1) {
// Step 2:
val n = gaussian.next(generator)
x = n * sqrt(lambda + halfDelta) - 0.5
if (x > delta || x < -lambda) continue
y = if (x < 0) floor(x) else ceil(x)
val e = exponential.next(generator)
v = -e - 0.5 * n * n + c1
} else {
// Step 3:
if (u > p1 + p2) {
y = lambda
break
}
x = delta + twolpd / delta * exponential.next(generator)
y = ceil(x)
v = -exponential.next(generator) - delta * (x + 1) / twolpd
}
// The Squeeze Principle
// Step 4.1:
a = if (x < 0) 1 else 0
t = y * (y + 1) / (2 * lambda)
// Step 4.2
if (v < -t && a == 0) {
y += lambda
break
}
// Step 4.3:
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
// Step 4.4:
if (v < qa) {
y += lambda
break
}
// Step 4.5:
if (v > qr) continue
// Step 4.6:
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
y += lambda
break
}
}
min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
}
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
public override fun toString(): String = "Large Mean Poisson deviate"
public companion object {
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = Sampler { ConstantChain(0) }
public fun of(mean: Double): LargeMeanPoissonSampler {
require(mean >= 1) { "mean is not >= 1: $mean" }
// The algorithm is not valid if Math.floor(mean) is not an integer.
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
return LargeMeanPoissonSampler(mean)
}
}
}

View File

@ -1,61 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.ln
import kotlin.math.sqrt
/**
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
* [BoxMullerNormalizedGaussianSampler].
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
*/
public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
private var nextGaussian = Double.NaN
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
if (nextGaussian.isNaN()) {
val alpha: Double
var x: Double
// Rejection scheme for selecting a pair that lies within the unit circle.
while (true) {
// Generate a pair of numbers within [-1 , 1).
x = 2.0 * generator.nextDouble() - 1.0
val y = 2.0 * generator.nextDouble() - 1.0
val r2 = x * x + y * y
if (r2 < 1 && r2 > 0) {
// Pair (x, y) is within unit circle.
alpha = sqrt(-2 * ln(r2) / r2)
// Keep second element of the pair for next invocation.
nextGaussian = alpha * y
// Return the first element of the generated pair.
break
}
// Pair is not within the unit circle: Generate another one.
}
// Return the first element of the generated pair.
alpha * x
} else {
// Use the second element of the pair (generated at the
// previous invocation).
val r = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
r
}
}
public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate"
public companion object {
public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler()
}
}

View File

@ -1,9 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.stat.Sampler
/**
* Marker interface for a sampler that generates values from an N(0,1)
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
*/
public interface NormalizedGaussianSampler : Sampler<Double>

View File

@ -1,30 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
*/
public class PoissonSampler private constructor(mean: Double) : Sampler<Int> {
private val poissonSamplerDelegate: Sampler<Int> = of(mean)
public override fun sample(generator: RandomGenerator): Chain<Int> = poissonSamplerDelegate.sample(generator)
public override fun toString(): String = poissonSamplerDelegate.toString()
public companion object {
private const val PIVOT = 40.0
public fun of(mean: Double): Sampler<Int> =// Each sampler should check the input arguments.
if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean)
}
}

View File

@ -1,50 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.ceil
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
* integer-valued distributions: The Poisson distribution. Addison Wesley.
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
*
* Based on Commons RNG implementation.
*
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
*/
public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler<Int> {
private val p0: Double = exp(-mean)
private val limit: Int = (if (p0 > 0)
ceil(1000 * mean)
else
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt()
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
var n = 0
var r = 1.0
while (n < limit) {
r *= nextDouble()
if (r >= p0) n++ else break
}
n
}
public override fun toString(): String = "Small Mean Poisson deviate"
public companion object {
public fun of(mean: Double): SmallMeanPoissonSampler {
require(mean > 0) { "mean is not strictly positive: $mean" }
return SmallMeanPoissonSampler(mean)
}
}
}

View File

@ -1,88 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.*
/**
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
* implementation has been adapted from the C code provided therein.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
*/
public class ZigguratNormalizedGaussianSampler private constructor() :
NormalizedGaussianSampler, Sampler<Double> {
private fun sampleOne(generator: RandomGenerator): Double {
val j = generator.nextLong()
val i = (j and LAST.toLong()).toInt()
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
}
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain { sampleOne(this) }
public override fun toString(): String = "Ziggurat normalized Gaussian deviate"
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
var x = hz * W[iz]
return when {
iz == 0 -> {
var y: Double
do {
y = -ln(generator.nextDouble())
x = -ln(generator.nextDouble()) * ONE_OVER_R
} while (y + y < x * x)
val out = R + x
if (hz > 0) out else -out
}
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
else -> sampleOne(generator)
}
}
public companion object {
private const val R: Double = 3.442619855899
private const val ONE_OVER_R: Double = 1 / R
private const val V: Double = 9.91256303526217e-3
private val MAX: Double = 2.0.pow(63.0)
private val ONE_OVER_MAX: Double = 1.0 / MAX
private const val LEN: Int = 128
private const val LAST: Int = LEN - 1
private val K: LongArray = LongArray(LEN)
private val W: DoubleArray = DoubleArray(LEN)
private val F: DoubleArray = DoubleArray(LEN)
init {
// Filling the tables.
var d = R
var t = d
var fd = gauss(d)
val q = V / fd
K[0] = (d / q * MAX).toLong()
K[1] = 0
W[0] = q * ONE_OVER_MAX
W[LAST] = d * ONE_OVER_MAX
F[0] = 1.0
F[LAST] = fd
(LAST - 1 downTo 1).forEach { i ->
d = sqrt(-2 * ln(V / d + fd))
fd = gauss(d)
K[i + 1] = (d / t * MAX).toLong()
t = d
F[i] = fd
W[i] = d * ONE_OVER_MAX
}
}
public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler()
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
}
}

View File

@ -5,22 +5,23 @@ import kotlinx.coroutines.flow.toList
import kotlinx.coroutines.runBlocking
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
import space.kscience.kmath.stat.samplers.GaussianSampler
import space.kscience.kmath.samplers.GaussianSampler
import space.kscience.kmath.structures.asBuffer
internal class CommonsDistributionsTest {
@Test
fun testNormalDistributionSuspend() {
val distribution = GaussianSampler.of(7.0, 2.0)
fun testNormalDistributionSuspend() = runBlocking {
val distribution = GaussianSampler(7.0, 2.0)
val generator = RandomGenerator.default(1)
val sample = runBlocking { distribution.sample(generator).take(1000).toList() }
Assertions.assertEquals(7.0, sample.average(), 0.1)
val sample = distribution.sample(generator).take(1000).toList().asBuffer()
Assertions.assertEquals(7.0, Mean.double(sample), 0.2)
}
@Test
fun testNormalDistributionBlocking() {
val distribution = GaussianSampler.of(7.0, 2.0)
fun testNormalDistributionBlocking() = runBlocking {
val distribution = GaussianSampler(7.0, 2.0)
val generator = RandomGenerator.default(1)
val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) }
Assertions.assertEquals(7.0, sample.average(), 0.1)
val sample = distribution.sample(generator).nextBufferBlocking(1000)
Assertions.assertEquals(7.0, Mean.double(sample), 0.2)
}
}

View File

@ -20,7 +20,7 @@ internal class StatisticTest {
@Test
fun testParallelMean() {
runBlocking {
val average = Mean.real
val average = Mean.double
.flow(chunked) //create a flow with results
.drop(99) // Skip first 99 values and use one with total data
.first() //get 1e5 data samples average