Fix Samplers and distribution API

This commit is contained in:
Alexander Nozik 2021-04-01 18:18:54 +03:00
parent ae911fcc2f
commit c2bab5d138
44 changed files with 915 additions and 806 deletions

View File

@ -7,6 +7,7 @@ import kscience.plotly.models.ScatterMode
import kscience.plotly.models.TraceValues import kscience.plotly.models.TraceValues
import space.kscience.kmath.commons.optimization.chiSquared import space.kscience.kmath.commons.optimization.chiSquared
import space.kscience.kmath.commons.optimization.minimize import space.kscience.kmath.commons.optimization.minimize
import space.kscience.kmath.distributions.NormalDistribution
import space.kscience.kmath.misc.symbol import space.kscience.kmath.misc.symbol
import space.kscience.kmath.optimization.FunctionOptimization import space.kscience.kmath.optimization.FunctionOptimization
import space.kscience.kmath.optimization.OptimizationResult import space.kscience.kmath.optimization.OptimizationResult
@ -14,7 +15,6 @@ import space.kscience.kmath.real.DoubleVector
import space.kscience.kmath.real.map import space.kscience.kmath.real.map
import space.kscience.kmath.real.step import space.kscience.kmath.real.step
import space.kscience.kmath.stat.RandomGenerator import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.distributions.NormalDistribution
import space.kscience.kmath.structures.asIterable import space.kscience.kmath.structures.asIterable
import space.kscience.kmath.structures.toList import space.kscience.kmath.structures.toList
import kotlin.math.pow import kotlin.math.pow

View File

@ -3,8 +3,8 @@ package space.kscience.kmath.stat
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async import kotlinx.coroutines.async
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import space.kscience.kmath.stat.samplers.GaussianSampler
import org.apache.commons.rng.simple.RandomSource import org.apache.commons.rng.simple.RandomSource
import space.kscience.kmath.samplers.GaussianSampler
import java.time.Duration import java.time.Duration
import java.time.Instant import java.time.Instant
import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler
@ -12,8 +12,8 @@ import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSa
private suspend fun runKMathChained(): Duration { private suspend fun runKMathChained(): Duration {
val generator = RandomGenerator.fromSource(RandomSource.MT, 123L) val generator = RandomGenerator.fromSource(RandomSource.MT, 123L)
val normal = GaussianSampler.of(7.0, 2.0) val normal = GaussianSampler(7.0, 2.0)
val chain = normal.sample(generator).blocking() val chain = normal.sample(generator)
val startTime = Instant.now() val startTime = Instant.now()
var sum = 0.0 var sum = 0.0

View File

@ -3,7 +3,7 @@ package space.kscience.kmath.stat
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.collectWithState import space.kscience.kmath.chains.collectWithState
import space.kscience.kmath.stat.distributions.NormalDistribution import space.kscience.kmath.distributions.NormalDistribution
/** /**
* The state of distribution averager. * The state of distribution averager.

View File

@ -2,10 +2,10 @@ package space.kscience.kmath.commons.optimization
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import space.kscience.kmath.commons.expressions.DerivativeStructureExpression import space.kscience.kmath.commons.expressions.DerivativeStructureExpression
import space.kscience.kmath.distributions.NormalDistribution
import space.kscience.kmath.misc.symbol import space.kscience.kmath.misc.symbol
import space.kscience.kmath.optimization.FunctionOptimization import space.kscience.kmath.optimization.FunctionOptimization
import space.kscience.kmath.stat.RandomGenerator import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.distributions.NormalDistribution
import kotlin.math.pow import kotlin.math.pow
import kotlin.test.Test import kotlin.test.Test

View File

@ -0,0 +1,50 @@
package space.kscience.kmath.chains
import space.kscience.kmath.structures.Buffer
public interface BufferChain<out T> : Chain<T> {
public suspend fun nextBuffer(size: Int): Buffer<T>
override suspend fun fork(): BufferChain<T>
}
/**
* A chain with blocking generator that could be used without suspension
*/
public interface BlockingChain<out T> : Chain<T> {
/**
* Get the next value without concurrency support. Not guaranteed to be thread safe.
*/
public fun nextBlocking(): T
override suspend fun next(): T = nextBlocking()
override suspend fun fork(): BlockingChain<T>
}
public interface BlockingBufferChain<out T> : BlockingChain<T>, BufferChain<T> {
public fun nextBufferBlocking(size: Int): Buffer<T>
public override fun nextBlocking(): T = nextBufferBlocking(1)[0]
public override suspend fun nextBuffer(size: Int): Buffer<T> = nextBufferBlocking(size)
override suspend fun fork(): BlockingBufferChain<T>
}
public suspend inline fun <reified T : Any> Chain<T>.nextBuffer(size: Int): Buffer<T> = if (this is BufferChain) {
nextBuffer(size)
} else {
Buffer.auto(size) { next() }
}
public inline fun <reified T : Any> BlockingChain<T>.nextBufferBlocking(
size: Int,
): Buffer<T> = if (this is BlockingBufferChain) {
nextBufferBlocking(size)
} else {
Buffer.auto(size) { nextBlocking() }
}

View File

@ -1,13 +1,27 @@
package space.kscience.kmath.chains package space.kscience.kmath.chains
import space.kscience.kmath.structures.DoubleBuffer
/** /**
* Chunked, specialized chain for real values. * Chunked, specialized chain for double values, which supports blocking [nextBlocking] operation
*/ */
public interface BlockingDoubleChain : Chain<Double> { public interface BlockingDoubleChain : BlockingBufferChain<Double> {
public override suspend fun next(): Double
/** /**
* Returns an [DoubleArray] chunk of [size] values of [next]. * Returns an [DoubleArray] chunk of [size] values of [next].
*/ */
public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() } public override fun nextBufferBlocking(size: Int): DoubleBuffer
override suspend fun fork(): BlockingDoubleChain
public companion object
} }
public fun BlockingDoubleChain.map(transform: (Double) -> Double): BlockingDoubleChain = object : BlockingDoubleChain {
override fun nextBufferBlocking(size: Int): DoubleBuffer {
val block = this@map.nextBufferBlocking(size)
return DoubleBuffer(size) { transform(block[it]) }
}
override suspend fun fork(): BlockingDoubleChain = this@map.fork().map(transform)
}

View File

@ -1,9 +1,12 @@
package space.kscience.kmath.chains package space.kscience.kmath.chains
import space.kscience.kmath.structures.IntBuffer
/** /**
* Performance optimized chain for integer values * Performance optimized chain for integer values
*/ */
public interface BlockingIntChain : Chain<Int> { public interface BlockingIntChain : BlockingBufferChain<Int> {
public override suspend fun next(): Int override fun nextBufferBlocking(size: Int): IntBuffer
public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
} override suspend fun fork(): BlockingIntChain
}

View File

@ -24,20 +24,20 @@ import kotlinx.coroutines.sync.withLock
/** /**
* A not-necessary-Markov chain of some type * A not-necessary-Markov chain of some type
* @param R - the chain element type * @param T - the chain element type
*/ */
public interface Chain<out R> : Flow<R> { public interface Chain<out T> : Flow<T> {
/** /**
* Generate next value, changing state if needed * Generate next value, changing state if needed
*/ */
public suspend fun next(): R public suspend fun next(): T
/** /**
* Create a copy of current chain state. Consuming resulting chain does not affect initial chain * Create a copy of current chain state. Consuming resulting chain does not affect initial chain
*/ */
public fun fork(): Chain<R> public suspend fun fork(): Chain<T>
override suspend fun collect(collector: FlowCollector<R>): Unit = override suspend fun collect(collector: FlowCollector<T>): Unit =
flow { while (true) emit(next()) }.collect(collector) flow { while (true) emit(next()) }.collect(collector)
public companion object public companion object
@ -51,7 +51,7 @@ public fun <T> Sequence<T>.asChain(): Chain<T> = iterator().asChain()
*/ */
public class SimpleChain<out R>(private val gen: suspend () -> R) : Chain<R> { public class SimpleChain<out R>(private val gen: suspend () -> R) : Chain<R> {
public override suspend fun next(): R = gen() public override suspend fun next(): R = gen()
public override fun fork(): Chain<R> = this public override suspend fun fork(): Chain<R> = this
} }
/** /**
@ -69,7 +69,7 @@ public class MarkovChain<out R : Any>(private val seed: suspend () -> R, private
newValue newValue
} }
public override fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen) public override suspend fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
} }
/** /**
@ -94,7 +94,7 @@ public class StatefulChain<S, out R>(
newValue newValue
} }
public override fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen) public override suspend fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)
} }
/** /**
@ -102,7 +102,7 @@ public class StatefulChain<S, out R>(
*/ */
public class ConstantChain<out T>(public val value: T) : Chain<T> { public class ConstantChain<out T>(public val value: T) : Chain<T> {
public override suspend fun next(): T = value public override suspend fun next(): T = value
public override fun fork(): Chain<T> = this public override suspend fun fork(): Chain<T> = this
} }
/** /**
@ -111,7 +111,7 @@ public class ConstantChain<out T>(public val value: T) : Chain<T> {
*/ */
public fun <T, R> Chain<T>.map(func: suspend (T) -> R): Chain<R> = object : Chain<R> { public fun <T, R> Chain<T>.map(func: suspend (T) -> R): Chain<R> = object : Chain<R> {
override suspend fun next(): R = func(this@map.next()) override suspend fun next(): R = func(this@map.next())
override fun fork(): Chain<R> = this@map.fork().map(func) override suspend fun fork(): Chain<R> = this@map.fork().map(func)
} }
/** /**
@ -127,7 +127,7 @@ public fun <T> Chain<T>.filter(block: (T) -> Boolean): Chain<T> = object : Chain
return next return next
} }
override fun fork(): Chain<T> = this@filter.fork().filter(block) override suspend fun fork(): Chain<T> = this@filter.fork().filter(block)
} }
/** /**
@ -135,7 +135,7 @@ public fun <T> Chain<T>.filter(block: (T) -> Boolean): Chain<T> = object : Chain
*/ */
public fun <T, R> Chain<T>.collect(mapper: suspend (Chain<T>) -> R): Chain<R> = object : Chain<R> { public fun <T, R> Chain<T>.collect(mapper: suspend (Chain<T>) -> R): Chain<R> = object : Chain<R> {
override suspend fun next(): R = mapper(this@collect) override suspend fun next(): R = mapper(this@collect)
override fun fork(): Chain<R> = this@collect.fork().collect(mapper) override suspend fun fork(): Chain<R> = this@collect.fork().collect(mapper)
} }
public fun <T, S, R> Chain<T>.collectWithState( public fun <T, S, R> Chain<T>.collectWithState(
@ -145,7 +145,7 @@ public fun <T, S, R> Chain<T>.collectWithState(
): Chain<R> = object : Chain<R> { ): Chain<R> = object : Chain<R> {
override suspend fun next(): R = state.mapper(this@collectWithState) override suspend fun next(): R = state.mapper(this@collectWithState)
override fun fork(): Chain<R> = override suspend fun fork(): Chain<R> =
this@collectWithState.fork().collectWithState(stateFork(state), stateFork, mapper) this@collectWithState.fork().collectWithState(stateFork(state), stateFork, mapper)
} }
@ -154,5 +154,5 @@ public fun <T, S, R> Chain<T>.collectWithState(
*/ */
public fun <T, U, R> Chain<T>.zip(other: Chain<U>, block: suspend (T, U) -> R): Chain<R> = object : Chain<R> { public fun <T, U, R> Chain<T>.zip(other: Chain<U>, block: suspend (T, U) -> R): Chain<R> = object : Chain<R> {
override suspend fun next(): R = block(this@zip.next(), other.next()) override suspend fun next(): R = block(this@zip.next(), other.next())
override fun fork(): Chain<R> = this@zip.fork().zip(other.fork(), block) override suspend fun fork(): Chain<R> = this@zip.fork().zip(other.fork(), block)
} }

View File

@ -6,7 +6,6 @@ import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.structures.Buffer import space.kscience.kmath.structures.Buffer
import space.kscience.kmath.structures.BufferFactory import space.kscience.kmath.structures.BufferFactory
import space.kscience.kmath.structures.DoubleBuffer import space.kscience.kmath.structures.DoubleBuffer
import space.kscience.kmath.structures.asBuffer
/** /**
* Create a [Flow] from buffer * Create a [Flow] from buffer
@ -50,7 +49,7 @@ public fun Flow<Double>.chunked(bufferSize: Int): Flow<DoubleBuffer> = flow {
if (this@chunked is BlockingDoubleChain) { if (this@chunked is BlockingDoubleChain) {
// performance optimization for blocking primitive chain // performance optimization for blocking primitive chain
while (true) emit(nextBlock(bufferSize).asBuffer()) while (true) emit(nextBufferBlocking(bufferSize))
} else { } else {
val array = DoubleArray(bufferSize) val array = DoubleArray(bufferSize)
var counter = 0 var counter = 0

View File

@ -2,6 +2,10 @@ plugins {
id("ru.mipt.npm.gradle.mpp") id("ru.mipt.npm.gradle.mpp")
} }
kscience{
useAtomic()
}
kotlin.sourceSets { kotlin.sourceSets {
commonMain { commonMain {
dependencies { dependencies {

View File

@ -0,0 +1,38 @@
package space.kscience.kmath.distributions
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* A distribution of typed objects.
*/
public interface Distribution<T : Any> : Sampler<T> {
/**
* A probability value for given argument [arg].
* For continuous distributions returns PDF
*/
public fun probability(arg: T): Double
public override fun sample(generator: RandomGenerator): Chain<T>
/**
* An empty companion. Distribution factories should be written as its extensions
*/
public companion object
}
public interface UnivariateDistribution<T : Comparable<T>> : Distribution<T> {
/**
* Cumulative distribution for ordered parameter (CDF)
*/
public fun cumulative(arg: T): Double
}
/**
* Compute probability integral in an interval
*/
public fun <T : Comparable<T>> UnivariateDistribution<T>.integral(from: T, to: T): Double {
require(to > from)
return cumulative(to) - cumulative(from)
}

View File

@ -1,7 +1,8 @@
package space.kscience.kmath.stat package space.kscience.kmath.distributions
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.SimpleChain import space.kscience.kmath.chains.SimpleChain
import space.kscience.kmath.stat.RandomGenerator
/** /**
* A multivariate distribution which takes a map of parameters * A multivariate distribution which takes a map of parameters

View File

@ -1,12 +1,11 @@
package space.kscience.kmath.stat.distributions package space.kscience.kmath.distributions
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.internal.InternalErf
import space.kscience.kmath.samplers.GaussianSampler
import space.kscience.kmath.samplers.NormalizedGaussianSampler
import space.kscience.kmath.samplers.ZigguratNormalizedGaussianSampler
import space.kscience.kmath.stat.RandomGenerator import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.UnivariateDistribution
import space.kscience.kmath.stat.internal.InternalErf
import space.kscience.kmath.stat.samplers.GaussianSampler
import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler
import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler
import kotlin.math.* import kotlin.math.*
/** /**
@ -16,8 +15,8 @@ public inline class NormalDistribution(public val sampler: GaussianSampler) : Un
public constructor( public constructor(
mean: Double, mean: Double,
standardDeviation: Double, standardDeviation: Double,
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(), normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler,
) : this(GaussianSampler.of(mean, standardDeviation, normalized)) ) : this(GaussianSampler(mean, standardDeviation, normalized))
public override fun probability(arg: Double): Double { public override fun probability(arg: Double): Double {
val x1 = (arg - sampler.mean) / sampler.standardDeviation val x1 = (arg - sampler.mean) / sampler.standardDeviation

View File

@ -1,4 +1,4 @@
package space.kscience.kmath.stat.internal package space.kscience.kmath.internal
import kotlin.math.abs import kotlin.math.abs

View File

@ -1,4 +1,4 @@
package space.kscience.kmath.stat.internal package space.kscience.kmath.internal
import kotlin.math.* import kotlin.math.*

View File

@ -1,4 +1,4 @@
package space.kscience.kmath.stat.internal package space.kscience.kmath.internal
import kotlin.math.ln import kotlin.math.ln
import kotlin.math.min import kotlin.math.min

View File

@ -0,0 +1,72 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.math.ln
import kotlin.math.pow
/**
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
*/
public class AhrensDieterExponentialSampler(public val mean: Double) : Sampler<Double> {
init {
require(mean > 0) { "mean is not strictly positive: $mean" }
}
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
override fun nextBlocking(): Double {
// Step 1:
var a = 0.0
var u = generator.nextDouble()
// Step 2 and 3:
while (u < 0.5) {
a += EXPONENTIAL_SA_QI[0]
u *= 2.0
}
// Step 4 (now u >= 0.5):
u += u - 1
// Step 5:
if (u <= EXPONENTIAL_SA_QI[0]) return mean * (a + u)
// Step 6:
var i = 0 // Should be 1, be we iterate before it in while using 0.
var u2 = generator.nextDouble()
var umin = u2
// Step 7 and 8:
do {
++i
u2 = generator.nextDouble()
if (u2 < umin) umin = u2
// Step 8:
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
return mean * (a + umin * EXPONENTIAL_SA_QI[0])
}
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
}
public companion object {
private val EXPONENTIAL_SA_QI by lazy {
val ln2 = ln(2.0)
var qi = 0.0
DoubleArray(16) { i ->
qi += ln2.pow(i + 1.0) / space.kscience.kmath.internal.InternalUtils.factorial(i + 1)
qi
}
}
}
}

View File

@ -1,4 +1,4 @@
package space.kscience.kmath.stat.samplers package space.kscience.kmath.samplers
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator import space.kscience.kmath.stat.RandomGenerator
@ -80,7 +80,7 @@ public class AhrensDieterMarsagliaTsangGammaSampler private constructor(
private val gaussian: NormalizedGaussianSampler private val gaussian: NormalizedGaussianSampler
init { init {
gaussian = ZigguratNormalizedGaussianSampler.of() gaussian = ZigguratNormalizedGaussianSampler
dOptim = alpha - ONE_THIRD dOptim = alpha - ONE_THIRD
cOptim = ONE_THIRD / sqrt(dOptim) cOptim = ONE_THIRD / sqrt(dOptim)
} }

View File

@ -1,10 +1,10 @@
package space.kscience.kmath.stat.samplers package space.kscience.kmath.samplers
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.internal.InternalUtils
import space.kscience.kmath.stat.RandomGenerator import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import kotlin.math.ceil import kotlin.math.ceil
import kotlin.math.max import kotlin.math.max
import kotlin.math.min import kotlin.math.min
@ -39,12 +39,12 @@ import kotlin.math.min
public open class AliasMethodDiscreteSampler private constructor( public open class AliasMethodDiscreteSampler private constructor(
// Deliberate direct storage of input arrays // Deliberate direct storage of input arrays
protected val probability: LongArray, protected val probability: LongArray,
protected val alias: IntArray protected val alias: IntArray,
) : Sampler<Int> { ) : Sampler<Int> {
private class SmallTableAliasMethodDiscreteSampler( private class SmallTableAliasMethodDiscreteSampler(
probability: LongArray, probability: LongArray,
alias: IntArray alias: IntArray,
) : AliasMethodDiscreteSampler(probability, alias) { ) : AliasMethodDiscreteSampler(probability, alias) {
// Assume the table size is a power of 2 and create the mask // Assume the table size is a power of 2 and create the mask
private val mask: Int = alias.size - 1 private val mask: Int = alias.size - 1
@ -111,110 +111,6 @@ public open class AliasMethodDiscreteSampler private constructor(
private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble() private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11 private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
public fun of(
probabilities: DoubleArray,
alpha: Int = DEFAULT_ALPHA
): Sampler<Int> {
// The Alias method balances N categories with counts around the mean into N sections,
// each allocated 'mean' observations.
//
// Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
// 2D array as 4 sections with a height of the mean:
//
// 6
// 6
// 6
// 63 => 6366 --
// 632 6326 |-- mean
// 6321 6321 --
//
// section abcd
//
// Each section is divided as:
// a: 6=1/1
// b: 3=1/1
// c: 2=2/3; 6=1/3 (6 is the alias)
// d: 1=1/3; 6=2/3 (6 is the alias)
//
// The sample is obtained by randomly selecting a section, then choosing which category
// from the pair based on a uniform random deviate.
val sumProb = InternalUtils.validateProbabilities(probabilities)
// Allow zero-padding
val n = computeSize(probabilities.size, alpha)
// Partition into small and large by splitting on the average.
val mean = sumProb / n
// The cardinality of smallSize + largeSize = n.
// So fill the same array from either end.
val indices = IntArray(n)
var large = n
var small = 0
probabilities.indices.forEach { i ->
if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i
}
small = fillRemainingIndices(probabilities.size, indices, small)
// This may be smaller than the input length if the probabilities were already padded.
val nonZeroIndex = findLastNonZeroIndex(probabilities)
// The probabilities are modified so use a copy.
// Note: probabilities are required only up to last nonZeroIndex
val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1)
// Allocate the final tables.
// Probability table may be truncated (when zero padded).
// The alias table is full length.
val probability = LongArray(remainingProbabilities.size)
val alias = IntArray(n)
// This loop uses each large in turn to fill the alias table for small probabilities that
// do not reach the requirement to fill an entire section alone (i.e. p < mean).
// Since the sum of the small should be less than the sum of the large it should use up
// all the small first. However floating point round-off can result in
// misclassification of items as small or large. The Vose algorithm handles this using
// a while loop conditioned on the size of both sets and a subsequent loop to use
// unpaired items.
while (large != n && small != 0) {
// Index of the small and the large probabilities.
val j = indices[--small]
val k = indices[large++]
// Optimisation for zero-padded input:
// p(j) = 0 above the last nonZeroIndex
if (j > nonZeroIndex)
// The entire amount for the section is taken from the alias.
remainingProbabilities[k] -= mean
else {
val pj = remainingProbabilities[j]
// Item j is a small probability that is below the mean.
// Compute the weight of the section for item j: pj / mean.
// This is scaled by 2^53 and the ceiling function used to round-up
// the probability to a numerator of a fraction in the range [1,2^53].
// Ceiling ensures non-zero values.
probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong()
// The remaining amount for the section is taken from the alias.
// Effectively: probabilities[k] -= (mean - pj)
remainingProbabilities[k] += pj - mean
}
// If not j then the alias is k
alias[j] = k
// Add the remaining probability from large to the appropriate list.
if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k
}
// Final loop conditions to consume unpaired items.
// Note: The large set should never be non-empty but this can occur due to round-off
// error so consume from both.
fillTable(probability, alias, indices, 0, small)
fillTable(probability, alias, indices, large, n)
// Change the algorithm for small power of 2 sized tables
return if (isSmallPowerOf2(n))
SmallTableAliasMethodDiscreteSampler(probability, alias)
else
AliasMethodDiscreteSampler(probability, alias)
}
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int { private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
var updatedSmall = small var updatedSmall = small
(length until indices.size).forEach { i -> indices[updatedSmall++] = i } (length until indices.size).forEach { i -> indices[updatedSmall++] = i }
@ -246,7 +142,7 @@ public open class AliasMethodDiscreteSampler private constructor(
alias: IntArray, alias: IntArray,
indices: IntArray, indices: IntArray,
start: Int, start: Int,
end: Int end: Int,
) = (start until end).forEach { i -> ) = (start until end).forEach { i ->
val index = indices[i] val index = indices[i]
probability[index] = ONE_AS_NUMERATOR probability[index] = ONE_AS_NUMERATOR
@ -283,4 +179,110 @@ public open class AliasMethodDiscreteSampler private constructor(
return n - (mutI ushr 1) return n - (mutI ushr 1)
} }
} }
@Suppress("FunctionName")
public fun AliasMethodDiscreteSampler(
probabilities: DoubleArray,
alpha: Int = DEFAULT_ALPHA,
): Sampler<Int> {
// The Alias method balances N categories with counts around the mean into N sections,
// each allocated 'mean' observations.
//
// Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
// 2D array as 4 sections with a height of the mean:
//
// 6
// 6
// 6
// 63 => 6366 --
// 632 6326 |-- mean
// 6321 6321 --
//
// section abcd
//
// Each section is divided as:
// a: 6=1/1
// b: 3=1/1
// c: 2=2/3; 6=1/3 (6 is the alias)
// d: 1=1/3; 6=2/3 (6 is the alias)
//
// The sample is obtained by randomly selecting a section, then choosing which category
// from the pair based on a uniform random deviate.
val sumProb = InternalUtils.validateProbabilities(probabilities)
// Allow zero-padding
val n = computeSize(probabilities.size, alpha)
// Partition into small and large by splitting on the average.
val mean = sumProb / n
// The cardinality of smallSize + largeSize = n.
// So fill the same array from either end.
val indices = IntArray(n)
var large = n
var small = 0
probabilities.indices.forEach { i ->
if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i
}
small = fillRemainingIndices(probabilities.size, indices, small)
// This may be smaller than the input length if the probabilities were already padded.
val nonZeroIndex = findLastNonZeroIndex(probabilities)
// The probabilities are modified so use a copy.
// Note: probabilities are required only up to last nonZeroIndex
val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1)
// Allocate the final tables.
// Probability table may be truncated (when zero padded).
// The alias table is full length.
val probability = LongArray(remainingProbabilities.size)
val alias = IntArray(n)
// This loop uses each large in turn to fill the alias table for small probabilities that
// do not reach the requirement to fill an entire section alone (i.e. p < mean).
// Since the sum of the small should be less than the sum of the large it should use up
// all the small first. However floating point round-off can result in
// misclassification of items as small or large. The Vose algorithm handles this using
// a while loop conditioned on the size of both sets and a subsequent loop to use
// unpaired items.
while (large != n && small != 0) {
// Index of the small and the large probabilities.
val j = indices[--small]
val k = indices[large++]
// Optimisation for zero-padded input:
// p(j) = 0 above the last nonZeroIndex
if (j > nonZeroIndex)
// The entire amount for the section is taken from the alias.
remainingProbabilities[k] -= mean
else {
val pj = remainingProbabilities[j]
// Item j is a small probability that is below the mean.
// Compute the weight of the section for item j: pj / mean.
// This is scaled by 2^53 and the ceiling function used to round-up
// the probability to a numerator of a fraction in the range [1,2^53].
// Ceiling ensures non-zero values.
probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong()
// The remaining amount for the section is taken from the alias.
// Effectively: probabilities[k] -= (mean - pj)
remainingProbabilities[k] += pj - mean
}
// If not j then the alias is k
alias[j] = k
// Add the remaining probability from large to the appropriate list.
if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k
}
// Final loop conditions to consume unpaired items.
// Note: The large set should never be non-empty but this can occur due to round-off
// error so consume from both.
fillTable(probability, alias, indices, 0, small)
fillTable(probability, alias, indices, large, n)
// Change the algorithm for small power of 2 sized tables
return if (isSmallPowerOf2(n)) {
SmallTableAliasMethodDiscreteSampler(probability, alias)
} else {
AliasMethodDiscreteSampler(probability, alias)
}
}
} }

View File

@ -0,0 +1,52 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.math.*
/**
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
* distribution.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
*/
public object BoxMullerSampler : NormalizedGaussianSampler {
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
var state = Double.NaN
override fun nextBufferBlocking(size: Int): DoubleBuffer {
val xs = generator.nextDoubleBuffer(size)
val ys = generator.nextDoubleBuffer(size)
return DoubleBuffer(size) { index ->
if (state.isNaN()) {
// Generate a pair of Gaussian numbers.
val x = xs[index]
val y = ys[index]
val alpha = 2 * PI * x
val r = sqrt(-2 * ln(y))
// Keep second element of the pair for next invocation.
state = r * sin(alpha)
// Return the first element of the generated pair.
r * cos(alpha)
} else {
// Use the second element of the pair (generated at the
// previous invocation).
state.also {
// Both elements of the pair have been used.
state = Double.NaN
}
}
}
}
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
}
}

View File

@ -0,0 +1,13 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingBufferChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.structures.Buffer
public class ConstantSampler<T : Any>(public val const: T) : Sampler<T> {
override fun sample(generator: RandomGenerator): BlockingBufferChain<T> = object : BlockingBufferChain<T> {
override fun nextBufferBlocking(size: Int): Buffer<T> = Buffer.boxing(size) { const }
override suspend fun fork(): BlockingBufferChain<T> = this
}
}

View File

@ -0,0 +1,34 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.chains.map
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampling from a Gaussian distribution with given mean and standard deviation.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
*
* @property mean the mean of the distribution.
* @property standardDeviation the variance of the distribution.
*/
public class GaussianSampler(
public val mean: Double,
public val standardDeviation: Double,
private val normalized: NormalizedGaussianSampler = BoxMullerSampler
) : Sampler<Double> {
init {
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
}
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = normalized
.sample(generator)
.map { standardDeviation * it + mean }
override fun toString(): String = "N($mean, $standardDeviation)"
public companion object
}

View File

@ -0,0 +1,68 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.structures.IntBuffer
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
*
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
*
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
*/
public class KempSmallMeanPoissonSampler internal constructor(
private val p0: Double,
private val mean: Double,
) : Sampler<Int> {
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
override fun nextBlocking(): Int {
//TODO move to nextBufferBlocking
// Note on the algorithm:
// - X is the unknown sample deviate (the output of the algorithm)
// - x is the current value from the distribution
// - p is the probability of the current value x, p(X=x)
// - u is effectively the cumulative probability that the sample X
// is equal or above the current value x, p(X>=x)
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
var u = generator.nextDouble()
var x = 0
var p = p0
while (u > p) {
u -= p
// Compute the next probability using a recurrence relation.
// p(x+1) = p(x) * mean / (x+1)
p *= mean / ++x
// The algorithm listed in Kemp (1981) does not check that the rolling probability
// is positive. This check is added to ensure no errors when the limit of the summation
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
if (p == 0.0) return x
}
return x
}
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
}
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
}
public fun KempSmallMeanPoissonSampler(mean: Double): KempSmallMeanPoissonSampler {
require(mean > 0) { "Mean is not strictly positive: $mean" }
val p0 = exp(-mean)
// Probability must be positive. As mean increases then p(0) decreases.
require(p0 > 0) { "No probability for mean: $mean" }
return KempSmallMeanPoissonSampler(p0, mean)
}

View File

@ -0,0 +1,61 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.math.ln
import kotlin.math.sqrt
/**
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
* [BoxMullerNormalizedGaussianSampler].
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
*/
public object MarsagliaNormalizedGaussianSampler : NormalizedGaussianSampler {
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
var nextGaussian = Double.NaN
override fun nextBlocking(): Double {
return if (nextGaussian.isNaN()) {
val alpha: Double
var x: Double
// Rejection scheme for selecting a pair that lies within the unit circle.
while (true) {
// Generate a pair of numbers within [-1 , 1).
x = 2.0 * generator.nextDouble() - 1.0
val y = 2.0 * generator.nextDouble() - 1.0
val r2 = x * x + y * y
if (r2 < 1 && r2 > 0) {
// Pair (x, y) is within unit circle.
alpha = sqrt(-2 * ln(r2) / r2)
// Keep second element of the pair for next invocation.
nextGaussian = alpha * y
// Return the first element of the generated pair.
break
}
// Pair is not within the unit circle: Generate another one.
}
// Return the first element of the generated pair.
alpha * x
} else {
// Use the second element of the pair (generated at the
// previous invocation).
val r = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
r
}
}
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
}
}

View File

@ -0,0 +1,18 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
public interface BlockingDoubleSampler: Sampler<Double>{
override fun sample(generator: RandomGenerator): BlockingDoubleChain
}
/**
* Marker interface for a sampler that generates values from an N(0,1)
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
*/
public fun interface NormalizedGaussianSampler : BlockingDoubleSampler{
public companion object
}

View File

@ -0,0 +1,203 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.internal.InternalUtils
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.structures.IntBuffer
import kotlin.math.*
private const val PIVOT = 40.0
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
*/
@Suppress("FunctionName")
public fun PoissonSampler(mean: Double): Sampler<Int> {
return if (mean < PIVOT) SmallMeanPoissonSampler(mean) else LargeMeanPoissonSampler(mean)
}
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
* integer-valued distributions: The Poisson distribution. Addison Wesley.
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
*
* Based on Commons RNG implementation.
*
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
*/
public class SmallMeanPoissonSampler(public val mean: Double) : Sampler<Int> {
init {
require(mean > 0) { "mean is not strictly positive: $mean" }
}
private val p0: Double = exp(-mean)
private val limit: Int = if (p0 > 0) {
ceil(1000 * mean)
} else {
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")
}.toInt()
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
override fun nextBlocking(): Int {
var n = 0
var r = 1.0
while (n < limit) {
r *= generator.nextDouble()
if (r >= p0) n++ else break
}
return n
}
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
}
public override fun toString(): String = "Small Mean Poisson deviate"
}
/**
* Sampler for the Poisson distribution.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
* Computing vol. 26 pp. 197-207.
*
* This sampler is suitable for mean >= 40.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
*/
public class LargeMeanPoissonSampler(public val mean: Double) : Sampler<Int> {
init {
require(mean >= 1) { "mean is not >= 1: $mean" }
// The algorithm is not valid if Math.floor(mean) is not an integer.
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
}
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
private val lambda: Double = floor(mean)
private val logLambda: Double = ln(lambda)
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
private val halfDelta: Double = delta / 2
private val twolpd: Double = 2 * lambda + delta
private val c1: Double = 1 / (8 * lambda)
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
private val aSum: Double = a1 + a2 + 1
private val p1: Double = a1 / aSum
private val p2: Double = a2 / aSum
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
override fun nextBlocking(): Int {
val exponential = AhrensDieterExponentialSampler(1.0).sample(generator)
val gaussian = ZigguratNormalizedGaussianSampler.sample(generator)
val smallMeanPoissonSampler = if (mean - lambda < Double.MIN_VALUE) {
null
} else {
KempSmallMeanPoissonSampler(mean - lambda).sample(generator)
}
val y2 = smallMeanPoissonSampler?.nextBlocking() ?: 0
var x: Double
var y: Double
var v: Double
var a: Int
var t: Double
var qr: Double
var qa: Double
while (true) {
// Step 1:
val u = generator.nextDouble()
if (u <= p1) {
// Step 2:
val n = gaussian.nextBlocking()
x = n * sqrt(lambda + halfDelta) - 0.5
if (x > delta || x < -lambda) continue
y = if (x < 0) floor(x) else ceil(x)
val e = exponential.nextBlocking()
v = -e - 0.5 * n * n + c1
} else {
// Step 3:
if (u > p1 + p2) {
y = lambda
break
}
x = delta + twolpd / delta * exponential.nextBlocking()
y = ceil(x)
v = -exponential.nextBlocking() - delta * (x + 1) / twolpd
}
// The Squeeze Principle
// Step 4.1:
a = if (x < 0) 1 else 0
t = y * (y + 1) / (2 * lambda)
// Step 4.2
if (v < -t && a == 0) {
y += lambda
break
}
// Step 4.3:
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
// Step 4.4:
if (v < qa) {
y += lambda
break
}
// Step 4.5:
if (v > qr) continue
// Step 4.6:
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
y += lambda
break
}
}
return min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
}
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
}
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
public override fun toString(): String = "Large Mean Poisson deviate"
public companion object {
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
}
}

View File

@ -0,0 +1,88 @@
package space.kscience.kmath.samplers
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.math.*
/**
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
* implementation has been adapted from the C code provided therein.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
*/
public object ZigguratNormalizedGaussianSampler : NormalizedGaussianSampler {
private const val R: Double = 3.442619855899
private const val ONE_OVER_R: Double = 1 / R
private const val V: Double = 9.91256303526217e-3
private val MAX: Double = 2.0.pow(63.0)
private val ONE_OVER_MAX: Double = 1.0 / MAX
private const val LEN: Int = 128
private const val LAST: Int = LEN - 1
private val K: LongArray = LongArray(LEN)
private val W: DoubleArray = DoubleArray(LEN)
private val F: DoubleArray = DoubleArray(LEN)
init {
// Filling the tables.
var d = R
var t = d
var fd = gauss(d)
val q = V / fd
K[0] = (d / q * MAX).toLong()
K[1] = 0
W[0] = q * ONE_OVER_MAX
W[LAST] = d * ONE_OVER_MAX
F[0] = 1.0
F[LAST] = fd
(LAST - 1 downTo 1).forEach { i ->
d = sqrt(-2 * ln(V / d + fd))
fd = gauss(d)
K[i + 1] = (d / t * MAX).toLong()
t = d
F[i] = fd
W[i] = d * ONE_OVER_MAX
}
}
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
private fun sampleOne(generator: RandomGenerator): Double {
val j = generator.nextLong()
val i = (j and LAST.toLong()).toInt()
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
}
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { sampleOne(generator) }
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
}
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
var x = hz * W[iz]
return when {
iz == 0 -> {
var y: Double
do {
y = -ln(generator.nextDouble())
x = -ln(generator.nextDouble()) * ONE_OVER_R
} while (y + y < x * x)
val out = R + x
if (hz > 0) out else -out
}
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
else -> sampleOne(generator)
}
}
}

View File

@ -1,8 +1,8 @@
package space.kscience.kmath.stat package space.kscience.kmath.stat
import space.kscience.kmath.chains.BlockingDoubleChain import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.structures.DoubleBuffer
/** /**
* A possibly stateful chain producing random values. * A possibly stateful chain producing random values.
@ -11,12 +11,24 @@ import space.kscience.kmath.chains.Chain
*/ */
public class RandomChain<out R>( public class RandomChain<out R>(
public val generator: RandomGenerator, public val generator: RandomGenerator,
private val gen: suspend RandomGenerator.() -> R private val gen: suspend RandomGenerator.() -> R,
) : Chain<R> { ) : Chain<R> {
override suspend fun next(): R = generator.gen() override suspend fun next(): R = generator.gen()
override fun fork(): Chain<R> = RandomChain(generator.fork(), gen) override suspend fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
}
/**
* Create a generic random chain with provided [generator]
*/
public fun <R> RandomGenerator.chain(generator: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, generator)
/**
* A type-specific double chunk random chain
*/
public class UniformDoubleChain(public val generator: RandomGenerator) : BlockingDoubleChain {
public override fun nextBufferBlocking(size: Int): DoubleBuffer = generator.nextDoubleBuffer(size)
override suspend fun nextBuffer(size: Int): DoubleBuffer = nextBufferBlocking(size)
override suspend fun fork(): UniformDoubleChain = UniformDoubleChain(generator.fork())
} }
public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
public fun Chain<Double>.blocking(): BlockingDoubleChain = object : Chain<Double> by this, BlockingDoubleChain {}
public fun Chain<Int>.blocking(): BlockingIntChain = object : Chain<Int> by this, BlockingIntChain {}

View File

@ -1,5 +1,6 @@
package space.kscience.kmath.stat package space.kscience.kmath.stat
import space.kscience.kmath.structures.DoubleBuffer
import kotlin.random.Random import kotlin.random.Random
/** /**
@ -16,6 +17,11 @@ public interface RandomGenerator {
*/ */
public fun nextDouble(): Double public fun nextDouble(): Double
/**
* A chunk of doubles of given [size]
*/
public fun nextDoubleBuffer(size: Int): DoubleBuffer = DoubleBuffer(size) { nextDouble() }
/** /**
* Gets the next random `Int` from the random number generator. * Gets the next random `Int` from the random number generator.
* *

View File

@ -3,16 +3,13 @@ package space.kscience.kmath.stat
import kotlinx.coroutines.flow.first import kotlinx.coroutines.flow.first
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.collect import space.kscience.kmath.chains.collect
import space.kscience.kmath.structures.Buffer import space.kscience.kmath.structures.*
import space.kscience.kmath.structures.BufferFactory
import space.kscience.kmath.structures.IntBuffer
import space.kscience.kmath.structures.MutableBuffer
import kotlin.jvm.JvmName import kotlin.jvm.JvmName
/** /**
* Sampler that generates chains of values of type [T]. * Sampler that generates chains of values of type [T] in a chain of type [C].
*/ */
public fun interface Sampler<T : Any> { public fun interface Sampler<out T : Any> {
/** /**
* Generates a chain of samples. * Generates a chain of samples.
* *
@ -22,39 +19,6 @@ public fun interface Sampler<T : Any> {
public fun sample(generator: RandomGenerator): Chain<T> public fun sample(generator: RandomGenerator): Chain<T>
} }
/**
* A distribution of typed objects.
*/
public interface Distribution<T : Any> : Sampler<T> {
/**
* A probability value for given argument [arg].
* For continuous distributions returns PDF
*/
public fun probability(arg: T): Double
public override fun sample(generator: RandomGenerator): Chain<T>
/**
* An empty companion. Distribution factories should be written as its extensions
*/
public companion object
}
public interface UnivariateDistribution<T : Comparable<T>> : Distribution<T> {
/**
* Cumulative distribution for ordered parameter (CDF)
*/
public fun cumulative(arg: T): Double
}
/**
* Compute probability integral in an interval
*/
public fun <T : Comparable<T>> UnivariateDistribution<T>.integral(from: T, to: T): Double {
require(to > from)
return cumulative(to) - cumulative(from)
}
/** /**
* Sample a bunch of values * Sample a bunch of values
*/ */
@ -71,7 +35,7 @@ public fun <T : Any> Sampler<T>.sampleBuffer(
//clear list from previous run //clear list from previous run
tmp.clear() tmp.clear()
//Fill list //Fill list
repeat(size) { tmp += chain.next() } repeat(size) { tmp.add(chain.next()) }
//return new buffer with elements from tmp //return new buffer with elements from tmp
bufferFactory(size) { tmp[it] } bufferFactory(size) { tmp[it] }
} }
@ -87,7 +51,7 @@ public suspend fun <T : Any> Sampler<T>.next(generator: RandomGenerator): T = sa
*/ */
@JvmName("sampleRealBuffer") @JvmName("sampleRealBuffer")
public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> = public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> =
sampleBuffer(generator, size, MutableBuffer.Companion::double) sampleBuffer(generator, size, ::DoubleBuffer)
/** /**
* Generates [size] integer samples and chunks them into some buffers. * Generates [size] integer samples and chunks them into some buffers.

View File

@ -81,7 +81,7 @@ public class Mean<T>(
public companion object { public companion object {
//TODO replace with optimized version which respects overflow //TODO replace with optimized version which respects overflow
public val real: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count } public val double: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count }
public val int: Mean<Int> = Mean(IntRing) { sum, count -> sum / count } public val int: Mean<Int> = Mean(IntRing) { sum, count -> sum / count }
public val long: Mean<Long> = Mean(LongRing) { sum, count -> sum / count } public val long: Mean<Long> = Mean(LongRing) { sum, count -> sum / count }
} }

View File

@ -2,6 +2,8 @@ package space.kscience.kmath.stat
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.SimpleChain import space.kscience.kmath.chains.SimpleChain
import space.kscience.kmath.distributions.Distribution
import space.kscience.kmath.distributions.UnivariateDistribution
public class UniformDistribution(public val range: ClosedFloatingPointRange<Double>) : UnivariateDistribution<Double> { public class UniformDistribution(public val range: ClosedFloatingPointRange<Double>) : UnivariateDistribution<Double> {
private val length: Double = range.endInclusive - range.start private val length: Double = range.endInclusive - range.start

View File

@ -1,73 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import kotlin.math.ln
import kotlin.math.pow
/**
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
*/
public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler<Double> {
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
// Step 1:
var a = 0.0
var u = nextDouble()
// Step 2 and 3:
while (u < 0.5) {
a += EXPONENTIAL_SA_QI[0]
u *= 2.0
}
// Step 4 (now u >= 0.5):
u += u - 1
// Step 5:
if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u)
// Step 6:
var i = 0 // Should be 1, be we iterate before it in while using 0.
var u2 = nextDouble()
var umin = u2
// Step 7 and 8:
do {
++i
u2 = nextDouble()
if (u2 < umin) umin = u2
// Step 8:
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
mean * (a + umin * EXPONENTIAL_SA_QI[0])
}
override fun toString(): String = "Ahrens-Dieter Exponential deviate"
public companion object {
private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) }
init {
/**
* Filling EXPONENTIAL_SA_QI table.
* Note that we don't want qi = 0 in the table.
*/
val ln2 = ln(2.0)
var qi = 0.0
EXPONENTIAL_SA_QI.indices.forEach { i ->
qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1)
EXPONENTIAL_SA_QI[i] = qi
}
}
public fun of(mean: Double): AhrensDieterExponentialSampler {
require(mean > 0) { "mean is not strictly positive: $mean" }
return AhrensDieterExponentialSampler(mean)
}
}
}

View File

@ -1,48 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.*
/**
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
* distribution.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
*/
public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
private var nextGaussian: Double = Double.NaN
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
val random: Double
if (nextGaussian.isNaN()) {
// Generate a pair of Gaussian numbers.
val x = nextDouble()
val y = nextDouble()
val alpha = 2 * PI * x
val r = sqrt(-2 * ln(y))
// Return the first element of the generated pair.
random = r * cos(alpha)
// Keep second element of the pair for next invocation.
nextGaussian = r * sin(alpha)
} else {
// Use the second element of the pair (generated at the
// previous invocation).
random = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
}
random
}
public override fun toString(): String = "Box-Muller normalized Gaussian deviate"
public companion object {
public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler()
}
}

View File

@ -1,43 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.map
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampling from a Gaussian distribution with given mean and standard deviation.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
*
* @property mean the mean of the distribution.
* @property standardDeviation the variance of the distribution.
*/
public class GaussianSampler private constructor(
public val mean: Double,
public val standardDeviation: Double,
private val normalized: NormalizedGaussianSampler
) : Sampler<Double> {
public override fun sample(generator: RandomGenerator): Chain<Double> = normalized
.sample(generator)
.map { standardDeviation * it + mean }
override fun toString(): String = "Gaussian deviate [$normalized]"
public companion object {
public fun of(
mean: Double,
standardDeviation: Double,
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of()
): GaussianSampler {
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
return GaussianSampler(
mean,
standardDeviation,
normalized
)
}
}
}

View File

@ -1,63 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
*
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
*
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
*/
public class KempSmallMeanPoissonSampler private constructor(
private val p0: Double,
private val mean: Double
) : Sampler<Int> {
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// Note on the algorithm:
// - X is the unknown sample deviate (the output of the algorithm)
// - x is the current value from the distribution
// - p is the probability of the current value x, p(X=x)
// - u is effectively the cumulative probability that the sample X
// is equal or above the current value x, p(X>=x)
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
var u = nextDouble()
var x = 0
var p = p0
while (u > p) {
u -= p
// Compute the next probability using a recurrence relation.
// p(x+1) = p(x) * mean / (x+1)
p *= mean / ++x
// The algorithm listed in Kemp (1981) does not check that the rolling probability
// is positive. This check is added to ensure no errors when the limit of the summation
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
if (p == 0.0) return@chain x
}
x
}
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
public companion object {
public fun of(mean: Double): KempSmallMeanPoissonSampler {
require(mean > 0) { "Mean is not strictly positive: $mean" }
val p0 = exp(-mean)
// Probability must be positive. As mean increases then p(0) decreases.
require(p0 > 0) { "No probability for mean: $mean" }
return KempSmallMeanPoissonSampler(p0, mean)
}
}
}

View File

@ -1,130 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.ConstantChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import space.kscience.kmath.stat.next
import kotlin.math.*
/**
* Sampler for the Poisson distribution.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
* Computing vol. 26 pp. 197-207.
*
* This sampler is suitable for mean >= 40.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
*/
public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler<Int> {
private val exponential: Sampler<Double> = AhrensDieterExponentialSampler.of(1.0)
private val gaussian: Sampler<Double> = ZigguratNormalizedGaussianSampler.of()
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
private val lambda: Double = floor(mean)
private val logLambda: Double = ln(lambda)
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
private val halfDelta: Double = delta / 2
private val twolpd: Double = 2 * lambda + delta
private val c1: Double = 1 / (8 * lambda)
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
private val aSum: Double = a1 + a2 + 1
private val p1: Double = a1 / aSum
private val p2: Double = a2 / aSum
private val smallMeanPoissonSampler: Sampler<Int> = if (mean - lambda < Double.MIN_VALUE)
NO_SMALL_MEAN_POISSON_SAMPLER
else // Not used.
KempSmallMeanPoissonSampler.of(mean - lambda)
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// This will never be null. It may be a no-op delegate that returns zero.
val y2 = smallMeanPoissonSampler.next(generator)
var x: Double
var y: Double
var v: Double
var a: Int
var t: Double
var qr: Double
var qa: Double
while (true) {
// Step 1:
val u = generator.nextDouble()
if (u <= p1) {
// Step 2:
val n = gaussian.next(generator)
x = n * sqrt(lambda + halfDelta) - 0.5
if (x > delta || x < -lambda) continue
y = if (x < 0) floor(x) else ceil(x)
val e = exponential.next(generator)
v = -e - 0.5 * n * n + c1
} else {
// Step 3:
if (u > p1 + p2) {
y = lambda
break
}
x = delta + twolpd / delta * exponential.next(generator)
y = ceil(x)
v = -exponential.next(generator) - delta * (x + 1) / twolpd
}
// The Squeeze Principle
// Step 4.1:
a = if (x < 0) 1 else 0
t = y * (y + 1) / (2 * lambda)
// Step 4.2
if (v < -t && a == 0) {
y += lambda
break
}
// Step 4.3:
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
// Step 4.4:
if (v < qa) {
y += lambda
break
}
// Step 4.5:
if (v > qr) continue
// Step 4.6:
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
y += lambda
break
}
}
min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
}
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
public override fun toString(): String = "Large Mean Poisson deviate"
public companion object {
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = Sampler { ConstantChain(0) }
public fun of(mean: Double): LargeMeanPoissonSampler {
require(mean >= 1) { "mean is not >= 1: $mean" }
// The algorithm is not valid if Math.floor(mean) is not an integer.
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
return LargeMeanPoissonSampler(mean)
}
}
}

View File

@ -1,61 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.ln
import kotlin.math.sqrt
/**
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
* [BoxMullerNormalizedGaussianSampler].
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
*/
public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
private var nextGaussian = Double.NaN
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
if (nextGaussian.isNaN()) {
val alpha: Double
var x: Double
// Rejection scheme for selecting a pair that lies within the unit circle.
while (true) {
// Generate a pair of numbers within [-1 , 1).
x = 2.0 * generator.nextDouble() - 1.0
val y = 2.0 * generator.nextDouble() - 1.0
val r2 = x * x + y * y
if (r2 < 1 && r2 > 0) {
// Pair (x, y) is within unit circle.
alpha = sqrt(-2 * ln(r2) / r2)
// Keep second element of the pair for next invocation.
nextGaussian = alpha * y
// Return the first element of the generated pair.
break
}
// Pair is not within the unit circle: Generate another one.
}
// Return the first element of the generated pair.
alpha * x
} else {
// Use the second element of the pair (generated at the
// previous invocation).
val r = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
r
}
}
public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate"
public companion object {
public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler()
}
}

View File

@ -1,9 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.stat.Sampler
/**
* Marker interface for a sampler that generates values from an N(0,1)
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
*/
public interface NormalizedGaussianSampler : Sampler<Double>

View File

@ -1,30 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
*/
public class PoissonSampler private constructor(mean: Double) : Sampler<Int> {
private val poissonSamplerDelegate: Sampler<Int> = of(mean)
public override fun sample(generator: RandomGenerator): Chain<Int> = poissonSamplerDelegate.sample(generator)
public override fun toString(): String = poissonSamplerDelegate.toString()
public companion object {
private const val PIVOT = 40.0
public fun of(mean: Double): Sampler<Int> =// Each sampler should check the input arguments.
if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean)
}
}

View File

@ -1,50 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.ceil
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
* integer-valued distributions: The Poisson distribution. Addison Wesley.
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
*
* Based on Commons RNG implementation.
*
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
*/
public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler<Int> {
private val p0: Double = exp(-mean)
private val limit: Int = (if (p0 > 0)
ceil(1000 * mean)
else
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt()
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
var n = 0
var r = 1.0
while (n < limit) {
r *= nextDouble()
if (r >= p0) n++ else break
}
n
}
public override fun toString(): String = "Small Mean Poisson deviate"
public companion object {
public fun of(mean: Double): SmallMeanPoissonSampler {
require(mean > 0) { "mean is not strictly positive: $mean" }
return SmallMeanPoissonSampler(mean)
}
}
}

View File

@ -1,88 +0,0 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.*
/**
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
* implementation has been adapted from the C code provided therein.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
*/
public class ZigguratNormalizedGaussianSampler private constructor() :
NormalizedGaussianSampler, Sampler<Double> {
private fun sampleOne(generator: RandomGenerator): Double {
val j = generator.nextLong()
val i = (j and LAST.toLong()).toInt()
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
}
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain { sampleOne(this) }
public override fun toString(): String = "Ziggurat normalized Gaussian deviate"
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
var x = hz * W[iz]
return when {
iz == 0 -> {
var y: Double
do {
y = -ln(generator.nextDouble())
x = -ln(generator.nextDouble()) * ONE_OVER_R
} while (y + y < x * x)
val out = R + x
if (hz > 0) out else -out
}
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
else -> sampleOne(generator)
}
}
public companion object {
private const val R: Double = 3.442619855899
private const val ONE_OVER_R: Double = 1 / R
private const val V: Double = 9.91256303526217e-3
private val MAX: Double = 2.0.pow(63.0)
private val ONE_OVER_MAX: Double = 1.0 / MAX
private const val LEN: Int = 128
private const val LAST: Int = LEN - 1
private val K: LongArray = LongArray(LEN)
private val W: DoubleArray = DoubleArray(LEN)
private val F: DoubleArray = DoubleArray(LEN)
init {
// Filling the tables.
var d = R
var t = d
var fd = gauss(d)
val q = V / fd
K[0] = (d / q * MAX).toLong()
K[1] = 0
W[0] = q * ONE_OVER_MAX
W[LAST] = d * ONE_OVER_MAX
F[0] = 1.0
F[LAST] = fd
(LAST - 1 downTo 1).forEach { i ->
d = sqrt(-2 * ln(V / d + fd))
fd = gauss(d)
K[i + 1] = (d / t * MAX).toLong()
t = d
F[i] = fd
W[i] = d * ONE_OVER_MAX
}
}
public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler()
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
}
}

View File

@ -5,22 +5,23 @@ import kotlinx.coroutines.flow.toList
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import space.kscience.kmath.stat.samplers.GaussianSampler import space.kscience.kmath.samplers.GaussianSampler
import space.kscience.kmath.structures.asBuffer
internal class CommonsDistributionsTest { internal class CommonsDistributionsTest {
@Test @Test
fun testNormalDistributionSuspend() { fun testNormalDistributionSuspend() = runBlocking {
val distribution = GaussianSampler.of(7.0, 2.0) val distribution = GaussianSampler(7.0, 2.0)
val generator = RandomGenerator.default(1) val generator = RandomGenerator.default(1)
val sample = runBlocking { distribution.sample(generator).take(1000).toList() } val sample = distribution.sample(generator).take(1000).toList().asBuffer()
Assertions.assertEquals(7.0, sample.average(), 0.1) Assertions.assertEquals(7.0, Mean.double(sample), 0.2)
} }
@Test @Test
fun testNormalDistributionBlocking() { fun testNormalDistributionBlocking() = runBlocking {
val distribution = GaussianSampler.of(7.0, 2.0) val distribution = GaussianSampler(7.0, 2.0)
val generator = RandomGenerator.default(1) val generator = RandomGenerator.default(1)
val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) } val sample = distribution.sample(generator).nextBufferBlocking(1000)
Assertions.assertEquals(7.0, sample.average(), 0.1) Assertions.assertEquals(7.0, Mean.double(sample), 0.2)
} }
} }

View File

@ -20,7 +20,7 @@ internal class StatisticTest {
@Test @Test
fun testParallelMean() { fun testParallelMean() {
runBlocking { runBlocking {
val average = Mean.real val average = Mean.double
.flow(chunked) //create a flow with results .flow(chunked) //create a flow with results
.drop(99) // Skip first 99 values and use one with total data .drop(99) // Skip first 99 values and use one with total data
.first() //get 1e5 data samples average .first() //get 1e5 data samples average