Fix Samplers and distribution API
This commit is contained in:
parent
ae911fcc2f
commit
c2bab5d138
@ -7,6 +7,7 @@ import kscience.plotly.models.ScatterMode
|
||||
import kscience.plotly.models.TraceValues
|
||||
import space.kscience.kmath.commons.optimization.chiSquared
|
||||
import space.kscience.kmath.commons.optimization.minimize
|
||||
import space.kscience.kmath.distributions.NormalDistribution
|
||||
import space.kscience.kmath.misc.symbol
|
||||
import space.kscience.kmath.optimization.FunctionOptimization
|
||||
import space.kscience.kmath.optimization.OptimizationResult
|
||||
@ -14,7 +15,6 @@ import space.kscience.kmath.real.DoubleVector
|
||||
import space.kscience.kmath.real.map
|
||||
import space.kscience.kmath.real.step
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
||||
import space.kscience.kmath.structures.asIterable
|
||||
import space.kscience.kmath.structures.toList
|
||||
import kotlin.math.pow
|
||||
|
@ -3,8 +3,8 @@ package space.kscience.kmath.stat
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.async
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
||||
import org.apache.commons.rng.simple.RandomSource
|
||||
import space.kscience.kmath.samplers.GaussianSampler
|
||||
import java.time.Duration
|
||||
import java.time.Instant
|
||||
import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler
|
||||
@ -12,8 +12,8 @@ import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSa
|
||||
|
||||
private suspend fun runKMathChained(): Duration {
|
||||
val generator = RandomGenerator.fromSource(RandomSource.MT, 123L)
|
||||
val normal = GaussianSampler.of(7.0, 2.0)
|
||||
val chain = normal.sample(generator).blocking()
|
||||
val normal = GaussianSampler(7.0, 2.0)
|
||||
val chain = normal.sample(generator)
|
||||
val startTime = Instant.now()
|
||||
var sum = 0.0
|
||||
|
||||
|
@ -3,7 +3,7 @@ package space.kscience.kmath.stat
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.collectWithState
|
||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
||||
import space.kscience.kmath.distributions.NormalDistribution
|
||||
|
||||
/**
|
||||
* The state of distribution averager.
|
||||
|
@ -2,10 +2,10 @@ package space.kscience.kmath.commons.optimization
|
||||
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import space.kscience.kmath.commons.expressions.DerivativeStructureExpression
|
||||
import space.kscience.kmath.distributions.NormalDistribution
|
||||
import space.kscience.kmath.misc.symbol
|
||||
import space.kscience.kmath.optimization.FunctionOptimization
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
||||
import kotlin.math.pow
|
||||
import kotlin.test.Test
|
||||
|
||||
|
@ -0,0 +1,50 @@
|
||||
package space.kscience.kmath.chains
|
||||
|
||||
import space.kscience.kmath.structures.Buffer
|
||||
|
||||
|
||||
public interface BufferChain<out T> : Chain<T> {
|
||||
public suspend fun nextBuffer(size: Int): Buffer<T>
|
||||
override suspend fun fork(): BufferChain<T>
|
||||
}
|
||||
|
||||
/**
|
||||
* A chain with blocking generator that could be used without suspension
|
||||
*/
|
||||
public interface BlockingChain<out T> : Chain<T> {
|
||||
/**
|
||||
* Get the next value without concurrency support. Not guaranteed to be thread safe.
|
||||
*/
|
||||
public fun nextBlocking(): T
|
||||
|
||||
override suspend fun next(): T = nextBlocking()
|
||||
|
||||
override suspend fun fork(): BlockingChain<T>
|
||||
}
|
||||
|
||||
|
||||
public interface BlockingBufferChain<out T> : BlockingChain<T>, BufferChain<T> {
|
||||
|
||||
public fun nextBufferBlocking(size: Int): Buffer<T>
|
||||
|
||||
public override fun nextBlocking(): T = nextBufferBlocking(1)[0]
|
||||
|
||||
public override suspend fun nextBuffer(size: Int): Buffer<T> = nextBufferBlocking(size)
|
||||
|
||||
override suspend fun fork(): BlockingBufferChain<T>
|
||||
}
|
||||
|
||||
|
||||
public suspend inline fun <reified T : Any> Chain<T>.nextBuffer(size: Int): Buffer<T> = if (this is BufferChain) {
|
||||
nextBuffer(size)
|
||||
} else {
|
||||
Buffer.auto(size) { next() }
|
||||
}
|
||||
|
||||
public inline fun <reified T : Any> BlockingChain<T>.nextBufferBlocking(
|
||||
size: Int,
|
||||
): Buffer<T> = if (this is BlockingBufferChain) {
|
||||
nextBufferBlocking(size)
|
||||
} else {
|
||||
Buffer.auto(size) { nextBlocking() }
|
||||
}
|
@ -1,13 +1,27 @@
|
||||
package space.kscience.kmath.chains
|
||||
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
|
||||
/**
|
||||
* Chunked, specialized chain for real values.
|
||||
* Chunked, specialized chain for double values, which supports blocking [nextBlocking] operation
|
||||
*/
|
||||
public interface BlockingDoubleChain : Chain<Double> {
|
||||
public override suspend fun next(): Double
|
||||
public interface BlockingDoubleChain : BlockingBufferChain<Double> {
|
||||
|
||||
/**
|
||||
* Returns an [DoubleArray] chunk of [size] values of [next].
|
||||
*/
|
||||
public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() }
|
||||
public override fun nextBufferBlocking(size: Int): DoubleBuffer
|
||||
|
||||
override suspend fun fork(): BlockingDoubleChain
|
||||
|
||||
public companion object
|
||||
}
|
||||
|
||||
public fun BlockingDoubleChain.map(transform: (Double) -> Double): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||
override fun nextBufferBlocking(size: Int): DoubleBuffer {
|
||||
val block = this@map.nextBufferBlocking(size)
|
||||
return DoubleBuffer(size) { transform(block[it]) }
|
||||
}
|
||||
|
||||
override suspend fun fork(): BlockingDoubleChain = this@map.fork().map(transform)
|
||||
}
|
@ -1,9 +1,12 @@
|
||||
package space.kscience.kmath.chains
|
||||
|
||||
import space.kscience.kmath.structures.IntBuffer
|
||||
|
||||
/**
|
||||
* Performance optimized chain for integer values
|
||||
*/
|
||||
public interface BlockingIntChain : Chain<Int> {
|
||||
public override suspend fun next(): Int
|
||||
public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
|
||||
public interface BlockingIntChain : BlockingBufferChain<Int> {
|
||||
override fun nextBufferBlocking(size: Int): IntBuffer
|
||||
|
||||
override suspend fun fork(): BlockingIntChain
|
||||
}
|
@ -24,20 +24,20 @@ import kotlinx.coroutines.sync.withLock
|
||||
|
||||
/**
|
||||
* A not-necessary-Markov chain of some type
|
||||
* @param R - the chain element type
|
||||
* @param T - the chain element type
|
||||
*/
|
||||
public interface Chain<out R> : Flow<R> {
|
||||
public interface Chain<out T> : Flow<T> {
|
||||
/**
|
||||
* Generate next value, changing state if needed
|
||||
*/
|
||||
public suspend fun next(): R
|
||||
public suspend fun next(): T
|
||||
|
||||
/**
|
||||
* Create a copy of current chain state. Consuming resulting chain does not affect initial chain
|
||||
*/
|
||||
public fun fork(): Chain<R>
|
||||
public suspend fun fork(): Chain<T>
|
||||
|
||||
override suspend fun collect(collector: FlowCollector<R>): Unit =
|
||||
override suspend fun collect(collector: FlowCollector<T>): Unit =
|
||||
flow { while (true) emit(next()) }.collect(collector)
|
||||
|
||||
public companion object
|
||||
@ -51,7 +51,7 @@ public fun <T> Sequence<T>.asChain(): Chain<T> = iterator().asChain()
|
||||
*/
|
||||
public class SimpleChain<out R>(private val gen: suspend () -> R) : Chain<R> {
|
||||
public override suspend fun next(): R = gen()
|
||||
public override fun fork(): Chain<R> = this
|
||||
public override suspend fun fork(): Chain<R> = this
|
||||
}
|
||||
|
||||
/**
|
||||
@ -69,7 +69,7 @@ public class MarkovChain<out R : Any>(private val seed: suspend () -> R, private
|
||||
newValue
|
||||
}
|
||||
|
||||
public override fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
|
||||
public override suspend fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -94,7 +94,7 @@ public class StatefulChain<S, out R>(
|
||||
newValue
|
||||
}
|
||||
|
||||
public override fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)
|
||||
public override suspend fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -102,7 +102,7 @@ public class StatefulChain<S, out R>(
|
||||
*/
|
||||
public class ConstantChain<out T>(public val value: T) : Chain<T> {
|
||||
public override suspend fun next(): T = value
|
||||
public override fun fork(): Chain<T> = this
|
||||
public override suspend fun fork(): Chain<T> = this
|
||||
}
|
||||
|
||||
/**
|
||||
@ -111,7 +111,7 @@ public class ConstantChain<out T>(public val value: T) : Chain<T> {
|
||||
*/
|
||||
public fun <T, R> Chain<T>.map(func: suspend (T) -> R): Chain<R> = object : Chain<R> {
|
||||
override suspend fun next(): R = func(this@map.next())
|
||||
override fun fork(): Chain<R> = this@map.fork().map(func)
|
||||
override suspend fun fork(): Chain<R> = this@map.fork().map(func)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -127,7 +127,7 @@ public fun <T> Chain<T>.filter(block: (T) -> Boolean): Chain<T> = object : Chain
|
||||
return next
|
||||
}
|
||||
|
||||
override fun fork(): Chain<T> = this@filter.fork().filter(block)
|
||||
override suspend fun fork(): Chain<T> = this@filter.fork().filter(block)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -135,7 +135,7 @@ public fun <T> Chain<T>.filter(block: (T) -> Boolean): Chain<T> = object : Chain
|
||||
*/
|
||||
public fun <T, R> Chain<T>.collect(mapper: suspend (Chain<T>) -> R): Chain<R> = object : Chain<R> {
|
||||
override suspend fun next(): R = mapper(this@collect)
|
||||
override fun fork(): Chain<R> = this@collect.fork().collect(mapper)
|
||||
override suspend fun fork(): Chain<R> = this@collect.fork().collect(mapper)
|
||||
}
|
||||
|
||||
public fun <T, S, R> Chain<T>.collectWithState(
|
||||
@ -145,7 +145,7 @@ public fun <T, S, R> Chain<T>.collectWithState(
|
||||
): Chain<R> = object : Chain<R> {
|
||||
override suspend fun next(): R = state.mapper(this@collectWithState)
|
||||
|
||||
override fun fork(): Chain<R> =
|
||||
override suspend fun fork(): Chain<R> =
|
||||
this@collectWithState.fork().collectWithState(stateFork(state), stateFork, mapper)
|
||||
}
|
||||
|
||||
@ -154,5 +154,5 @@ public fun <T, S, R> Chain<T>.collectWithState(
|
||||
*/
|
||||
public fun <T, U, R> Chain<T>.zip(other: Chain<U>, block: suspend (T, U) -> R): Chain<R> = object : Chain<R> {
|
||||
override suspend fun next(): R = block(this@zip.next(), other.next())
|
||||
override fun fork(): Chain<R> = this@zip.fork().zip(other.fork(), block)
|
||||
override suspend fun fork(): Chain<R> = this@zip.fork().zip(other.fork(), block)
|
||||
}
|
||||
|
@ -6,7 +6,6 @@ import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.structures.Buffer
|
||||
import space.kscience.kmath.structures.BufferFactory
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
import space.kscience.kmath.structures.asBuffer
|
||||
|
||||
/**
|
||||
* Create a [Flow] from buffer
|
||||
@ -50,7 +49,7 @@ public fun Flow<Double>.chunked(bufferSize: Int): Flow<DoubleBuffer> = flow {
|
||||
|
||||
if (this@chunked is BlockingDoubleChain) {
|
||||
// performance optimization for blocking primitive chain
|
||||
while (true) emit(nextBlock(bufferSize).asBuffer())
|
||||
while (true) emit(nextBufferBlocking(bufferSize))
|
||||
} else {
|
||||
val array = DoubleArray(bufferSize)
|
||||
var counter = 0
|
||||
|
@ -2,6 +2,10 @@ plugins {
|
||||
id("ru.mipt.npm.gradle.mpp")
|
||||
}
|
||||
|
||||
kscience{
|
||||
useAtomic()
|
||||
}
|
||||
|
||||
kotlin.sourceSets {
|
||||
commonMain {
|
||||
dependencies {
|
||||
|
@ -0,0 +1,38 @@
|
||||
package space.kscience.kmath.distributions
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
/**
|
||||
* A distribution of typed objects.
|
||||
*/
|
||||
public interface Distribution<T : Any> : Sampler<T> {
|
||||
/**
|
||||
* A probability value for given argument [arg].
|
||||
* For continuous distributions returns PDF
|
||||
*/
|
||||
public fun probability(arg: T): Double
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<T>
|
||||
|
||||
/**
|
||||
* An empty companion. Distribution factories should be written as its extensions
|
||||
*/
|
||||
public companion object
|
||||
}
|
||||
|
||||
public interface UnivariateDistribution<T : Comparable<T>> : Distribution<T> {
|
||||
/**
|
||||
* Cumulative distribution for ordered parameter (CDF)
|
||||
*/
|
||||
public fun cumulative(arg: T): Double
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute probability integral in an interval
|
||||
*/
|
||||
public fun <T : Comparable<T>> UnivariateDistribution<T>.integral(from: T, to: T): Double {
|
||||
require(to > from)
|
||||
return cumulative(to) - cumulative(from)
|
||||
}
|
@ -1,7 +1,8 @@
|
||||
package space.kscience.kmath.stat
|
||||
package space.kscience.kmath.distributions
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.SimpleChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
|
||||
/**
|
||||
* A multivariate distribution which takes a map of parameters
|
@ -1,12 +1,11 @@
|
||||
package space.kscience.kmath.stat.distributions
|
||||
package space.kscience.kmath.distributions
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.internal.InternalErf
|
||||
import space.kscience.kmath.samplers.GaussianSampler
|
||||
import space.kscience.kmath.samplers.NormalizedGaussianSampler
|
||||
import space.kscience.kmath.samplers.ZigguratNormalizedGaussianSampler
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.UnivariateDistribution
|
||||
import space.kscience.kmath.stat.internal.InternalErf
|
||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
||||
import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler
|
||||
import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
@ -16,8 +15,8 @@ public inline class NormalDistribution(public val sampler: GaussianSampler) : Un
|
||||
public constructor(
|
||||
mean: Double,
|
||||
standardDeviation: Double,
|
||||
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(),
|
||||
) : this(GaussianSampler.of(mean, standardDeviation, normalized))
|
||||
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler,
|
||||
) : this(GaussianSampler(mean, standardDeviation, normalized))
|
||||
|
||||
public override fun probability(arg: Double): Double {
|
||||
val x1 = (arg - sampler.mean) / sampler.standardDeviation
|
@ -1,4 +1,4 @@
|
||||
package space.kscience.kmath.stat.internal
|
||||
package space.kscience.kmath.internal
|
||||
|
||||
import kotlin.math.abs
|
||||
|
@ -1,4 +1,4 @@
|
||||
package space.kscience.kmath.stat.internal
|
||||
package space.kscience.kmath.internal
|
||||
|
||||
import kotlin.math.*
|
||||
|
@ -1,4 +1,4 @@
|
||||
package space.kscience.kmath.stat.internal
|
||||
package space.kscience.kmath.internal
|
||||
|
||||
import kotlin.math.ln
|
||||
import kotlin.math.min
|
@ -0,0 +1,72 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
import kotlin.math.ln
|
||||
import kotlin.math.pow
|
||||
|
||||
/**
|
||||
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
|
||||
*/
|
||||
public class AhrensDieterExponentialSampler(public val mean: Double) : Sampler<Double> {
|
||||
|
||||
init {
|
||||
require(mean > 0) { "mean is not strictly positive: $mean" }
|
||||
}
|
||||
|
||||
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||
override fun nextBlocking(): Double {
|
||||
// Step 1:
|
||||
var a = 0.0
|
||||
var u = generator.nextDouble()
|
||||
|
||||
// Step 2 and 3:
|
||||
while (u < 0.5) {
|
||||
a += EXPONENTIAL_SA_QI[0]
|
||||
u *= 2.0
|
||||
}
|
||||
|
||||
// Step 4 (now u >= 0.5):
|
||||
u += u - 1
|
||||
// Step 5:
|
||||
if (u <= EXPONENTIAL_SA_QI[0]) return mean * (a + u)
|
||||
// Step 6:
|
||||
var i = 0 // Should be 1, be we iterate before it in while using 0.
|
||||
var u2 = generator.nextDouble()
|
||||
var umin = u2
|
||||
|
||||
// Step 7 and 8:
|
||||
do {
|
||||
++i
|
||||
u2 = generator.nextDouble()
|
||||
if (u2 < umin) umin = u2
|
||||
// Step 8:
|
||||
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
|
||||
|
||||
return mean * (a + umin * EXPONENTIAL_SA_QI[0])
|
||||
}
|
||||
|
||||
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() }
|
||||
|
||||
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
|
||||
}
|
||||
|
||||
public companion object {
|
||||
private val EXPONENTIAL_SA_QI by lazy {
|
||||
val ln2 = ln(2.0)
|
||||
var qi = 0.0
|
||||
|
||||
DoubleArray(16) { i ->
|
||||
qi += ln2.pow(i + 1.0) / space.kscience.kmath.internal.InternalUtils.factorial(i + 1)
|
||||
qi
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
@ -80,7 +80,7 @@ public class AhrensDieterMarsagliaTsangGammaSampler private constructor(
|
||||
private val gaussian: NormalizedGaussianSampler
|
||||
|
||||
init {
|
||||
gaussian = ZigguratNormalizedGaussianSampler.of()
|
||||
gaussian = ZigguratNormalizedGaussianSampler
|
||||
dOptim = alpha - ONE_THIRD
|
||||
cOptim = ONE_THIRD / sqrt(dOptim)
|
||||
}
|
@ -1,10 +1,10 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.internal.InternalUtils
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import space.kscience.kmath.stat.internal.InternalUtils
|
||||
import kotlin.math.ceil
|
||||
import kotlin.math.max
|
||||
import kotlin.math.min
|
||||
@ -39,12 +39,12 @@ import kotlin.math.min
|
||||
public open class AliasMethodDiscreteSampler private constructor(
|
||||
// Deliberate direct storage of input arrays
|
||||
protected val probability: LongArray,
|
||||
protected val alias: IntArray
|
||||
protected val alias: IntArray,
|
||||
) : Sampler<Int> {
|
||||
|
||||
private class SmallTableAliasMethodDiscreteSampler(
|
||||
probability: LongArray,
|
||||
alias: IntArray
|
||||
alias: IntArray,
|
||||
) : AliasMethodDiscreteSampler(probability, alias) {
|
||||
// Assume the table size is a power of 2 and create the mask
|
||||
private val mask: Int = alias.size - 1
|
||||
@ -111,9 +111,79 @@ public open class AliasMethodDiscreteSampler private constructor(
|
||||
private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
|
||||
private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
|
||||
|
||||
public fun of(
|
||||
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
|
||||
var updatedSmall = small
|
||||
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
|
||||
return updatedSmall
|
||||
}
|
||||
|
||||
private fun findLastNonZeroIndex(probabilities: DoubleArray): Int {
|
||||
// No bounds check is performed when decrementing as the array contains at least one
|
||||
// value above zero.
|
||||
var nonZeroIndex = probabilities.size - 1
|
||||
while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex--
|
||||
return nonZeroIndex
|
||||
}
|
||||
|
||||
private fun computeSize(length: Int, alpha: Int): Int {
|
||||
// If No padding
|
||||
if (alpha < 0) return length
|
||||
// Use the number of leading zeros function to find the next power of 2,
|
||||
// i.e. ceil(log2(x))
|
||||
var pow2 = 32 - numberOfLeadingZeros(length - 1)
|
||||
// Increase by the alpha. Clip this to limit to a positive integer (2^30)
|
||||
pow2 = min(30, pow2 + alpha)
|
||||
// Use max to handle a length above the highest possible power of 2
|
||||
return max(length, 1 shl pow2)
|
||||
}
|
||||
|
||||
private fun fillTable(
|
||||
probability: LongArray,
|
||||
alias: IntArray,
|
||||
indices: IntArray,
|
||||
start: Int,
|
||||
end: Int,
|
||||
) = (start until end).forEach { i ->
|
||||
val index = indices[i]
|
||||
probability[index] = ONE_AS_NUMERATOR
|
||||
alias[index] = index
|
||||
}
|
||||
|
||||
private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0
|
||||
|
||||
private fun numberOfLeadingZeros(i: Int): Int {
|
||||
var mutI = i
|
||||
if (mutI <= 0) return if (mutI == 0) 32 else 0
|
||||
var n = 31
|
||||
|
||||
if (mutI >= 1 shl 16) {
|
||||
n -= 16
|
||||
mutI = mutI ushr 16
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 8) {
|
||||
n -= 8
|
||||
mutI = mutI ushr 8
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 4) {
|
||||
n -= 4
|
||||
mutI = mutI ushr 4
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 2) {
|
||||
n -= 2
|
||||
mutI = mutI ushr 2
|
||||
}
|
||||
|
||||
return n - (mutI ushr 1)
|
||||
}
|
||||
}
|
||||
|
||||
@Suppress("FunctionName")
|
||||
public fun AliasMethodDiscreteSampler(
|
||||
probabilities: DoubleArray,
|
||||
alpha: Int = DEFAULT_ALPHA
|
||||
alpha: Int = DEFAULT_ALPHA,
|
||||
): Sampler<Int> {
|
||||
// The Alias method balances N categories with counts around the mean into N sections,
|
||||
// each allocated 'mean' observations.
|
||||
@ -209,78 +279,10 @@ public open class AliasMethodDiscreteSampler private constructor(
|
||||
fillTable(probability, alias, indices, large, n)
|
||||
|
||||
// Change the algorithm for small power of 2 sized tables
|
||||
return if (isSmallPowerOf2(n))
|
||||
return if (isSmallPowerOf2(n)) {
|
||||
SmallTableAliasMethodDiscreteSampler(probability, alias)
|
||||
else
|
||||
} else {
|
||||
AliasMethodDiscreteSampler(probability, alias)
|
||||
}
|
||||
|
||||
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
|
||||
var updatedSmall = small
|
||||
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
|
||||
return updatedSmall
|
||||
}
|
||||
|
||||
private fun findLastNonZeroIndex(probabilities: DoubleArray): Int {
|
||||
// No bounds check is performed when decrementing as the array contains at least one
|
||||
// value above zero.
|
||||
var nonZeroIndex = probabilities.size - 1
|
||||
while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex--
|
||||
return nonZeroIndex
|
||||
}
|
||||
|
||||
private fun computeSize(length: Int, alpha: Int): Int {
|
||||
// If No padding
|
||||
if (alpha < 0) return length
|
||||
// Use the number of leading zeros function to find the next power of 2,
|
||||
// i.e. ceil(log2(x))
|
||||
var pow2 = 32 - numberOfLeadingZeros(length - 1)
|
||||
// Increase by the alpha. Clip this to limit to a positive integer (2^30)
|
||||
pow2 = min(30, pow2 + alpha)
|
||||
// Use max to handle a length above the highest possible power of 2
|
||||
return max(length, 1 shl pow2)
|
||||
}
|
||||
|
||||
private fun fillTable(
|
||||
probability: LongArray,
|
||||
alias: IntArray,
|
||||
indices: IntArray,
|
||||
start: Int,
|
||||
end: Int
|
||||
) = (start until end).forEach { i ->
|
||||
val index = indices[i]
|
||||
probability[index] = ONE_AS_NUMERATOR
|
||||
alias[index] = index
|
||||
}
|
||||
|
||||
private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0
|
||||
|
||||
private fun numberOfLeadingZeros(i: Int): Int {
|
||||
var mutI = i
|
||||
if (mutI <= 0) return if (mutI == 0) 32 else 0
|
||||
var n = 31
|
||||
|
||||
if (mutI >= 1 shl 16) {
|
||||
n -= 16
|
||||
mutI = mutI ushr 16
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 8) {
|
||||
n -= 8
|
||||
mutI = mutI ushr 8
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 4) {
|
||||
n -= 4
|
||||
mutI = mutI ushr 4
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 2) {
|
||||
n -= 2
|
||||
mutI = mutI ushr 2
|
||||
}
|
||||
|
||||
return n - (mutI ushr 1)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
|
||||
* distribution.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
|
||||
*/
|
||||
|
||||
public object BoxMullerSampler : NormalizedGaussianSampler {
|
||||
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||
var state = Double.NaN
|
||||
|
||||
override fun nextBufferBlocking(size: Int): DoubleBuffer {
|
||||
val xs = generator.nextDoubleBuffer(size)
|
||||
val ys = generator.nextDoubleBuffer(size)
|
||||
|
||||
return DoubleBuffer(size) { index ->
|
||||
if (state.isNaN()) {
|
||||
// Generate a pair of Gaussian numbers.
|
||||
val x = xs[index]
|
||||
val y = ys[index]
|
||||
val alpha = 2 * PI * x
|
||||
val r = sqrt(-2 * ln(y))
|
||||
|
||||
// Keep second element of the pair for next invocation.
|
||||
state = r * sin(alpha)
|
||||
|
||||
// Return the first element of the generated pair.
|
||||
r * cos(alpha)
|
||||
} else {
|
||||
// Use the second element of the pair (generated at the
|
||||
// previous invocation).
|
||||
state.also {
|
||||
// Both elements of the pair have been used.
|
||||
state = Double.NaN
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingBufferChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.structures.Buffer
|
||||
|
||||
public class ConstantSampler<T : Any>(public val const: T) : Sampler<T> {
|
||||
override fun sample(generator: RandomGenerator): BlockingBufferChain<T> = object : BlockingBufferChain<T> {
|
||||
override fun nextBufferBlocking(size: Int): Buffer<T> = Buffer.boxing(size) { const }
|
||||
override suspend fun fork(): BlockingBufferChain<T> = this
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.chains.map
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
/**
|
||||
* Sampling from a Gaussian distribution with given mean and standard deviation.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
|
||||
*
|
||||
* @property mean the mean of the distribution.
|
||||
* @property standardDeviation the variance of the distribution.
|
||||
*/
|
||||
public class GaussianSampler(
|
||||
public val mean: Double,
|
||||
public val standardDeviation: Double,
|
||||
private val normalized: NormalizedGaussianSampler = BoxMullerSampler
|
||||
) : Sampler<Double> {
|
||||
|
||||
init {
|
||||
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
|
||||
}
|
||||
|
||||
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = normalized
|
||||
.sample(generator)
|
||||
.map { standardDeviation * it + mean }
|
||||
|
||||
override fun toString(): String = "N($mean, $standardDeviation)"
|
||||
|
||||
public companion object
|
||||
}
|
@ -0,0 +1,68 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingIntChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.structures.IntBuffer
|
||||
import kotlin.math.exp
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
|
||||
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
|
||||
*
|
||||
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
|
||||
*
|
||||
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
|
||||
*/
|
||||
public class KempSmallMeanPoissonSampler internal constructor(
|
||||
private val p0: Double,
|
||||
private val mean: Double,
|
||||
) : Sampler<Int> {
|
||||
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
|
||||
override fun nextBlocking(): Int {
|
||||
//TODO move to nextBufferBlocking
|
||||
// Note on the algorithm:
|
||||
// - X is the unknown sample deviate (the output of the algorithm)
|
||||
// - x is the current value from the distribution
|
||||
// - p is the probability of the current value x, p(X=x)
|
||||
// - u is effectively the cumulative probability that the sample X
|
||||
// is equal or above the current value x, p(X>=x)
|
||||
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
|
||||
var u = generator.nextDouble()
|
||||
var x = 0
|
||||
var p = p0
|
||||
|
||||
while (u > p) {
|
||||
u -= p
|
||||
// Compute the next probability using a recurrence relation.
|
||||
// p(x+1) = p(x) * mean / (x+1)
|
||||
p *= mean / ++x
|
||||
// The algorithm listed in Kemp (1981) does not check that the rolling probability
|
||||
// is positive. This check is added to ensure no errors when the limit of the summation
|
||||
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
|
||||
if (p == 0.0) return x
|
||||
}
|
||||
|
||||
return x
|
||||
}
|
||||
|
||||
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
|
||||
|
||||
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
|
||||
}
|
||||
|
||||
public fun KempSmallMeanPoissonSampler(mean: Double): KempSmallMeanPoissonSampler {
|
||||
require(mean > 0) { "Mean is not strictly positive: $mean" }
|
||||
val p0 = exp(-mean)
|
||||
// Probability must be positive. As mean increases then p(0) decreases.
|
||||
require(p0 > 0) { "No probability for mean: $mean" }
|
||||
return KempSmallMeanPoissonSampler(p0, mean)
|
||||
}
|
||||
|
@ -0,0 +1,61 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
import kotlin.math.ln
|
||||
import kotlin.math.sqrt
|
||||
|
||||
/**
|
||||
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
|
||||
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
|
||||
* [BoxMullerNormalizedGaussianSampler].
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
|
||||
*/
|
||||
public object MarsagliaNormalizedGaussianSampler : NormalizedGaussianSampler {
|
||||
|
||||
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||
var nextGaussian = Double.NaN
|
||||
|
||||
override fun nextBlocking(): Double {
|
||||
return if (nextGaussian.isNaN()) {
|
||||
val alpha: Double
|
||||
var x: Double
|
||||
|
||||
// Rejection scheme for selecting a pair that lies within the unit circle.
|
||||
while (true) {
|
||||
// Generate a pair of numbers within [-1 , 1).
|
||||
x = 2.0 * generator.nextDouble() - 1.0
|
||||
val y = 2.0 * generator.nextDouble() - 1.0
|
||||
val r2 = x * x + y * y
|
||||
|
||||
if (r2 < 1 && r2 > 0) {
|
||||
// Pair (x, y) is within unit circle.
|
||||
alpha = sqrt(-2 * ln(r2) / r2)
|
||||
// Keep second element of the pair for next invocation.
|
||||
nextGaussian = alpha * y
|
||||
// Return the first element of the generated pair.
|
||||
break
|
||||
}
|
||||
// Pair is not within the unit circle: Generate another one.
|
||||
}
|
||||
|
||||
// Return the first element of the generated pair.
|
||||
alpha * x
|
||||
} else {
|
||||
// Use the second element of the pair (generated at the
|
||||
// previous invocation).
|
||||
val r = nextGaussian
|
||||
// Both elements of the pair have been used.
|
||||
nextGaussian = Double.NaN
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() }
|
||||
|
||||
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
|
||||
}
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
public interface BlockingDoubleSampler: Sampler<Double>{
|
||||
override fun sample(generator: RandomGenerator): BlockingDoubleChain
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Marker interface for a sampler that generates values from an N(0,1)
|
||||
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
|
||||
*/
|
||||
public fun interface NormalizedGaussianSampler : BlockingDoubleSampler{
|
||||
public companion object
|
||||
}
|
@ -0,0 +1,203 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingIntChain
|
||||
import space.kscience.kmath.internal.InternalUtils
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.structures.IntBuffer
|
||||
import kotlin.math.*
|
||||
|
||||
|
||||
private const val PIVOT = 40.0
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
||||
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
|
||||
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
|
||||
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
||||
* - For large means, we use the rejection algorithm described in
|
||||
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
|
||||
*/
|
||||
@Suppress("FunctionName")
|
||||
public fun PoissonSampler(mean: Double): Sampler<Int> {
|
||||
return if (mean < PIVOT) SmallMeanPoissonSampler(mean) else LargeMeanPoissonSampler(mean)
|
||||
}
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
||||
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
|
||||
* integer-valued distributions: The Poisson distribution. Addison Wesley.
|
||||
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
||||
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
*
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
|
||||
*/
|
||||
public class SmallMeanPoissonSampler(public val mean: Double) : Sampler<Int> {
|
||||
|
||||
init {
|
||||
require(mean > 0) { "mean is not strictly positive: $mean" }
|
||||
}
|
||||
|
||||
private val p0: Double = exp(-mean)
|
||||
|
||||
private val limit: Int = if (p0 > 0) {
|
||||
ceil(1000 * mean)
|
||||
} else {
|
||||
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")
|
||||
}.toInt()
|
||||
|
||||
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
|
||||
override fun nextBlocking(): Int {
|
||||
var n = 0
|
||||
var r = 1.0
|
||||
|
||||
while (n < limit) {
|
||||
r *= generator.nextDouble()
|
||||
if (r >= p0) n++ else break
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
|
||||
|
||||
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Small Mean Poisson deviate"
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For large means, we use the rejection algorithm described in
|
||||
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
|
||||
* Computing vol. 26 pp. 197-207.
|
||||
*
|
||||
* This sampler is suitable for mean >= 40.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
|
||||
*/
|
||||
public class LargeMeanPoissonSampler(public val mean: Double) : Sampler<Int> {
|
||||
|
||||
init {
|
||||
require(mean >= 1) { "mean is not >= 1: $mean" }
|
||||
// The algorithm is not valid if Math.floor(mean) is not an integer.
|
||||
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
|
||||
}
|
||||
|
||||
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
|
||||
private val lambda: Double = floor(mean)
|
||||
private val logLambda: Double = ln(lambda)
|
||||
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
|
||||
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
|
||||
private val halfDelta: Double = delta / 2
|
||||
private val twolpd: Double = 2 * lambda + delta
|
||||
private val c1: Double = 1 / (8 * lambda)
|
||||
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
|
||||
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
|
||||
private val aSum: Double = a1 + a2 + 1
|
||||
private val p1: Double = a1 / aSum
|
||||
private val p2: Double = a2 / aSum
|
||||
|
||||
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
|
||||
override fun nextBlocking(): Int {
|
||||
val exponential = AhrensDieterExponentialSampler(1.0).sample(generator)
|
||||
val gaussian = ZigguratNormalizedGaussianSampler.sample(generator)
|
||||
|
||||
val smallMeanPoissonSampler = if (mean - lambda < Double.MIN_VALUE) {
|
||||
null
|
||||
} else {
|
||||
KempSmallMeanPoissonSampler(mean - lambda).sample(generator)
|
||||
}
|
||||
|
||||
val y2 = smallMeanPoissonSampler?.nextBlocking() ?: 0
|
||||
var x: Double
|
||||
var y: Double
|
||||
var v: Double
|
||||
var a: Int
|
||||
var t: Double
|
||||
var qr: Double
|
||||
var qa: Double
|
||||
|
||||
while (true) {
|
||||
// Step 1:
|
||||
val u = generator.nextDouble()
|
||||
|
||||
if (u <= p1) {
|
||||
// Step 2:
|
||||
val n = gaussian.nextBlocking()
|
||||
x = n * sqrt(lambda + halfDelta) - 0.5
|
||||
if (x > delta || x < -lambda) continue
|
||||
y = if (x < 0) floor(x) else ceil(x)
|
||||
val e = exponential.nextBlocking()
|
||||
v = -e - 0.5 * n * n + c1
|
||||
} else {
|
||||
// Step 3:
|
||||
if (u > p1 + p2) {
|
||||
y = lambda
|
||||
break
|
||||
}
|
||||
|
||||
x = delta + twolpd / delta * exponential.nextBlocking()
|
||||
y = ceil(x)
|
||||
v = -exponential.nextBlocking() - delta * (x + 1) / twolpd
|
||||
}
|
||||
|
||||
// The Squeeze Principle
|
||||
// Step 4.1:
|
||||
a = if (x < 0) 1 else 0
|
||||
t = y * (y + 1) / (2 * lambda)
|
||||
|
||||
// Step 4.2
|
||||
if (v < -t && a == 0) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
|
||||
// Step 4.3:
|
||||
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
|
||||
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
|
||||
|
||||
// Step 4.4:
|
||||
if (v < qa) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
|
||||
// Step 4.5:
|
||||
if (v > qr) continue
|
||||
|
||||
// Step 4.6:
|
||||
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
|
||||
}
|
||||
|
||||
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
|
||||
|
||||
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
|
||||
}
|
||||
|
||||
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
|
||||
public override fun toString(): String = "Large Mean Poisson deviate"
|
||||
|
||||
public companion object {
|
||||
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
|
||||
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,88 @@
|
||||
package space.kscience.kmath.samplers
|
||||
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
|
||||
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
|
||||
* implementation has been adapted from the C code provided therein.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
|
||||
*/
|
||||
public object ZigguratNormalizedGaussianSampler : NormalizedGaussianSampler {
|
||||
|
||||
private const val R: Double = 3.442619855899
|
||||
private const val ONE_OVER_R: Double = 1 / R
|
||||
private const val V: Double = 9.91256303526217e-3
|
||||
private val MAX: Double = 2.0.pow(63.0)
|
||||
private val ONE_OVER_MAX: Double = 1.0 / MAX
|
||||
private const val LEN: Int = 128
|
||||
private const val LAST: Int = LEN - 1
|
||||
private val K: LongArray = LongArray(LEN)
|
||||
private val W: DoubleArray = DoubleArray(LEN)
|
||||
private val F: DoubleArray = DoubleArray(LEN)
|
||||
|
||||
init {
|
||||
// Filling the tables.
|
||||
var d = R
|
||||
var t = d
|
||||
var fd = gauss(d)
|
||||
val q = V / fd
|
||||
K[0] = (d / q * MAX).toLong()
|
||||
K[1] = 0
|
||||
W[0] = q * ONE_OVER_MAX
|
||||
W[LAST] = d * ONE_OVER_MAX
|
||||
F[0] = 1.0
|
||||
F[LAST] = fd
|
||||
|
||||
(LAST - 1 downTo 1).forEach { i ->
|
||||
d = sqrt(-2 * ln(V / d + fd))
|
||||
fd = gauss(d)
|
||||
K[i + 1] = (d / t * MAX).toLong()
|
||||
t = d
|
||||
F[i] = fd
|
||||
W[i] = d * ONE_OVER_MAX
|
||||
}
|
||||
}
|
||||
|
||||
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
|
||||
|
||||
private fun sampleOne(generator: RandomGenerator): Double {
|
||||
val j = generator.nextLong()
|
||||
val i = (j and LAST.toLong()).toInt()
|
||||
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
|
||||
}
|
||||
|
||||
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { sampleOne(generator) }
|
||||
|
||||
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
|
||||
}
|
||||
|
||||
|
||||
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
|
||||
var x = hz * W[iz]
|
||||
|
||||
return when {
|
||||
iz == 0 -> {
|
||||
var y: Double
|
||||
|
||||
do {
|
||||
y = -ln(generator.nextDouble())
|
||||
x = -ln(generator.nextDouble()) * ONE_OVER_R
|
||||
} while (y + y < x * x)
|
||||
|
||||
val out = R + x
|
||||
if (hz > 0) out else -out
|
||||
}
|
||||
|
||||
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
|
||||
else -> sampleOne(generator)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
package space.kscience.kmath.stat
|
||||
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.chains.BlockingIntChain
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
|
||||
/**
|
||||
* A possibly stateful chain producing random values.
|
||||
@ -11,12 +11,24 @@ import space.kscience.kmath.chains.Chain
|
||||
*/
|
||||
public class RandomChain<out R>(
|
||||
public val generator: RandomGenerator,
|
||||
private val gen: suspend RandomGenerator.() -> R
|
||||
private val gen: suspend RandomGenerator.() -> R,
|
||||
) : Chain<R> {
|
||||
override suspend fun next(): R = generator.gen()
|
||||
override fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
|
||||
override suspend fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a generic random chain with provided [generator]
|
||||
*/
|
||||
public fun <R> RandomGenerator.chain(generator: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, generator)
|
||||
|
||||
/**
|
||||
* A type-specific double chunk random chain
|
||||
*/
|
||||
public class UniformDoubleChain(public val generator: RandomGenerator) : BlockingDoubleChain {
|
||||
public override fun nextBufferBlocking(size: Int): DoubleBuffer = generator.nextDoubleBuffer(size)
|
||||
override suspend fun nextBuffer(size: Int): DoubleBuffer = nextBufferBlocking(size)
|
||||
|
||||
override suspend fun fork(): UniformDoubleChain = UniformDoubleChain(generator.fork())
|
||||
}
|
||||
|
||||
public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
|
||||
public fun Chain<Double>.blocking(): BlockingDoubleChain = object : Chain<Double> by this, BlockingDoubleChain {}
|
||||
public fun Chain<Int>.blocking(): BlockingIntChain = object : Chain<Int> by this, BlockingIntChain {}
|
||||
|
@ -1,5 +1,6 @@
|
||||
package space.kscience.kmath.stat
|
||||
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
import kotlin.random.Random
|
||||
|
||||
/**
|
||||
@ -16,6 +17,11 @@ public interface RandomGenerator {
|
||||
*/
|
||||
public fun nextDouble(): Double
|
||||
|
||||
/**
|
||||
* A chunk of doubles of given [size]
|
||||
*/
|
||||
public fun nextDoubleBuffer(size: Int): DoubleBuffer = DoubleBuffer(size) { nextDouble() }
|
||||
|
||||
/**
|
||||
* Gets the next random `Int` from the random number generator.
|
||||
*
|
||||
|
@ -3,16 +3,13 @@ package space.kscience.kmath.stat
|
||||
import kotlinx.coroutines.flow.first
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.collect
|
||||
import space.kscience.kmath.structures.Buffer
|
||||
import space.kscience.kmath.structures.BufferFactory
|
||||
import space.kscience.kmath.structures.IntBuffer
|
||||
import space.kscience.kmath.structures.MutableBuffer
|
||||
import space.kscience.kmath.structures.*
|
||||
import kotlin.jvm.JvmName
|
||||
|
||||
/**
|
||||
* Sampler that generates chains of values of type [T].
|
||||
* Sampler that generates chains of values of type [T] in a chain of type [C].
|
||||
*/
|
||||
public fun interface Sampler<T : Any> {
|
||||
public fun interface Sampler<out T : Any> {
|
||||
/**
|
||||
* Generates a chain of samples.
|
||||
*
|
||||
@ -22,39 +19,6 @@ public fun interface Sampler<T : Any> {
|
||||
public fun sample(generator: RandomGenerator): Chain<T>
|
||||
}
|
||||
|
||||
/**
|
||||
* A distribution of typed objects.
|
||||
*/
|
||||
public interface Distribution<T : Any> : Sampler<T> {
|
||||
/**
|
||||
* A probability value for given argument [arg].
|
||||
* For continuous distributions returns PDF
|
||||
*/
|
||||
public fun probability(arg: T): Double
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<T>
|
||||
|
||||
/**
|
||||
* An empty companion. Distribution factories should be written as its extensions
|
||||
*/
|
||||
public companion object
|
||||
}
|
||||
|
||||
public interface UnivariateDistribution<T : Comparable<T>> : Distribution<T> {
|
||||
/**
|
||||
* Cumulative distribution for ordered parameter (CDF)
|
||||
*/
|
||||
public fun cumulative(arg: T): Double
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute probability integral in an interval
|
||||
*/
|
||||
public fun <T : Comparable<T>> UnivariateDistribution<T>.integral(from: T, to: T): Double {
|
||||
require(to > from)
|
||||
return cumulative(to) - cumulative(from)
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample a bunch of values
|
||||
*/
|
||||
@ -71,7 +35,7 @@ public fun <T : Any> Sampler<T>.sampleBuffer(
|
||||
//clear list from previous run
|
||||
tmp.clear()
|
||||
//Fill list
|
||||
repeat(size) { tmp += chain.next() }
|
||||
repeat(size) { tmp.add(chain.next()) }
|
||||
//return new buffer with elements from tmp
|
||||
bufferFactory(size) { tmp[it] }
|
||||
}
|
||||
@ -87,7 +51,7 @@ public suspend fun <T : Any> Sampler<T>.next(generator: RandomGenerator): T = sa
|
||||
*/
|
||||
@JvmName("sampleRealBuffer")
|
||||
public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> =
|
||||
sampleBuffer(generator, size, MutableBuffer.Companion::double)
|
||||
sampleBuffer(generator, size, ::DoubleBuffer)
|
||||
|
||||
/**
|
||||
* Generates [size] integer samples and chunks them into some buffers.
|
@ -81,7 +81,7 @@ public class Mean<T>(
|
||||
|
||||
public companion object {
|
||||
//TODO replace with optimized version which respects overflow
|
||||
public val real: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count }
|
||||
public val double: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count }
|
||||
public val int: Mean<Int> = Mean(IntRing) { sum, count -> sum / count }
|
||||
public val long: Mean<Long> = Mean(LongRing) { sum, count -> sum / count }
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ package space.kscience.kmath.stat
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.SimpleChain
|
||||
import space.kscience.kmath.distributions.Distribution
|
||||
import space.kscience.kmath.distributions.UnivariateDistribution
|
||||
|
||||
public class UniformDistribution(public val range: ClosedFloatingPointRange<Double>) : UnivariateDistribution<Double> {
|
||||
private val length: Double = range.endInclusive - range.start
|
||||
|
@ -1,73 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import space.kscience.kmath.stat.internal.InternalUtils
|
||||
import kotlin.math.ln
|
||||
import kotlin.math.pow
|
||||
|
||||
/**
|
||||
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
|
||||
*/
|
||||
public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler<Double> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
||||
// Step 1:
|
||||
var a = 0.0
|
||||
var u = nextDouble()
|
||||
|
||||
// Step 2 and 3:
|
||||
while (u < 0.5) {
|
||||
a += EXPONENTIAL_SA_QI[0]
|
||||
u *= 2.0
|
||||
}
|
||||
|
||||
// Step 4 (now u >= 0.5):
|
||||
u += u - 1
|
||||
// Step 5:
|
||||
if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u)
|
||||
// Step 6:
|
||||
var i = 0 // Should be 1, be we iterate before it in while using 0.
|
||||
var u2 = nextDouble()
|
||||
var umin = u2
|
||||
|
||||
// Step 7 and 8:
|
||||
do {
|
||||
++i
|
||||
u2 = nextDouble()
|
||||
if (u2 < umin) umin = u2
|
||||
// Step 8:
|
||||
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
|
||||
|
||||
mean * (a + umin * EXPONENTIAL_SA_QI[0])
|
||||
}
|
||||
|
||||
override fun toString(): String = "Ahrens-Dieter Exponential deviate"
|
||||
|
||||
public companion object {
|
||||
private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) }
|
||||
|
||||
init {
|
||||
/**
|
||||
* Filling EXPONENTIAL_SA_QI table.
|
||||
* Note that we don't want qi = 0 in the table.
|
||||
*/
|
||||
val ln2 = ln(2.0)
|
||||
var qi = 0.0
|
||||
|
||||
EXPONENTIAL_SA_QI.indices.forEach { i ->
|
||||
qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1)
|
||||
EXPONENTIAL_SA_QI[i] = qi
|
||||
}
|
||||
}
|
||||
|
||||
public fun of(mean: Double): AhrensDieterExponentialSampler {
|
||||
require(mean > 0) { "mean is not strictly positive: $mean" }
|
||||
return AhrensDieterExponentialSampler(mean)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
|
||||
* distribution.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
|
||||
*/
|
||||
public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
|
||||
private var nextGaussian: Double = Double.NaN
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
||||
val random: Double
|
||||
|
||||
if (nextGaussian.isNaN()) {
|
||||
// Generate a pair of Gaussian numbers.
|
||||
val x = nextDouble()
|
||||
val y = nextDouble()
|
||||
val alpha = 2 * PI * x
|
||||
val r = sqrt(-2 * ln(y))
|
||||
// Return the first element of the generated pair.
|
||||
random = r * cos(alpha)
|
||||
// Keep second element of the pair for next invocation.
|
||||
nextGaussian = r * sin(alpha)
|
||||
} else {
|
||||
// Use the second element of the pair (generated at the
|
||||
// previous invocation).
|
||||
random = nextGaussian
|
||||
// Both elements of the pair have been used.
|
||||
nextGaussian = Double.NaN
|
||||
}
|
||||
|
||||
random
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Box-Muller normalized Gaussian deviate"
|
||||
|
||||
public companion object {
|
||||
public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler()
|
||||
}
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.map
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
/**
|
||||
* Sampling from a Gaussian distribution with given mean and standard deviation.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
|
||||
*
|
||||
* @property mean the mean of the distribution.
|
||||
* @property standardDeviation the variance of the distribution.
|
||||
*/
|
||||
public class GaussianSampler private constructor(
|
||||
public val mean: Double,
|
||||
public val standardDeviation: Double,
|
||||
private val normalized: NormalizedGaussianSampler
|
||||
) : Sampler<Double> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = normalized
|
||||
.sample(generator)
|
||||
.map { standardDeviation * it + mean }
|
||||
|
||||
override fun toString(): String = "Gaussian deviate [$normalized]"
|
||||
|
||||
public companion object {
|
||||
public fun of(
|
||||
mean: Double,
|
||||
standardDeviation: Double,
|
||||
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of()
|
||||
): GaussianSampler {
|
||||
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
|
||||
|
||||
return GaussianSampler(
|
||||
mean,
|
||||
standardDeviation,
|
||||
normalized
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,63 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.exp
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
|
||||
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
|
||||
*
|
||||
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
|
||||
*
|
||||
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
|
||||
*/
|
||||
public class KempSmallMeanPoissonSampler private constructor(
|
||||
private val p0: Double,
|
||||
private val mean: Double
|
||||
) : Sampler<Int> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
||||
// Note on the algorithm:
|
||||
// - X is the unknown sample deviate (the output of the algorithm)
|
||||
// - x is the current value from the distribution
|
||||
// - p is the probability of the current value x, p(X=x)
|
||||
// - u is effectively the cumulative probability that the sample X
|
||||
// is equal or above the current value x, p(X>=x)
|
||||
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
|
||||
var u = nextDouble()
|
||||
var x = 0
|
||||
var p = p0
|
||||
|
||||
while (u > p) {
|
||||
u -= p
|
||||
// Compute the next probability using a recurrence relation.
|
||||
// p(x+1) = p(x) * mean / (x+1)
|
||||
p *= mean / ++x
|
||||
// The algorithm listed in Kemp (1981) does not check that the rolling probability
|
||||
// is positive. This check is added to ensure no errors when the limit of the summation
|
||||
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
|
||||
if (p == 0.0) return@chain x
|
||||
}
|
||||
|
||||
x
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
|
||||
|
||||
public companion object {
|
||||
public fun of(mean: Double): KempSmallMeanPoissonSampler {
|
||||
require(mean > 0) { "Mean is not strictly positive: $mean" }
|
||||
val p0 = exp(-mean)
|
||||
// Probability must be positive. As mean increases then p(0) decreases.
|
||||
require(p0 > 0) { "No probability for mean: $mean" }
|
||||
return KempSmallMeanPoissonSampler(p0, mean)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,130 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.ConstantChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import space.kscience.kmath.stat.internal.InternalUtils
|
||||
import space.kscience.kmath.stat.next
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For large means, we use the rejection algorithm described in
|
||||
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
|
||||
* Computing vol. 26 pp. 197-207.
|
||||
*
|
||||
* This sampler is suitable for mean >= 40.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
|
||||
*/
|
||||
public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler<Int> {
|
||||
private val exponential: Sampler<Double> = AhrensDieterExponentialSampler.of(1.0)
|
||||
private val gaussian: Sampler<Double> = ZigguratNormalizedGaussianSampler.of()
|
||||
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
|
||||
private val lambda: Double = floor(mean)
|
||||
private val logLambda: Double = ln(lambda)
|
||||
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
|
||||
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
|
||||
private val halfDelta: Double = delta / 2
|
||||
private val twolpd: Double = 2 * lambda + delta
|
||||
private val c1: Double = 1 / (8 * lambda)
|
||||
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
|
||||
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
|
||||
private val aSum: Double = a1 + a2 + 1
|
||||
private val p1: Double = a1 / aSum
|
||||
private val p2: Double = a2 / aSum
|
||||
|
||||
private val smallMeanPoissonSampler: Sampler<Int> = if (mean - lambda < Double.MIN_VALUE)
|
||||
NO_SMALL_MEAN_POISSON_SAMPLER
|
||||
else // Not used.
|
||||
KempSmallMeanPoissonSampler.of(mean - lambda)
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
||||
// This will never be null. It may be a no-op delegate that returns zero.
|
||||
val y2 = smallMeanPoissonSampler.next(generator)
|
||||
var x: Double
|
||||
var y: Double
|
||||
var v: Double
|
||||
var a: Int
|
||||
var t: Double
|
||||
var qr: Double
|
||||
var qa: Double
|
||||
|
||||
while (true) {
|
||||
// Step 1:
|
||||
val u = generator.nextDouble()
|
||||
|
||||
if (u <= p1) {
|
||||
// Step 2:
|
||||
val n = gaussian.next(generator)
|
||||
x = n * sqrt(lambda + halfDelta) - 0.5
|
||||
if (x > delta || x < -lambda) continue
|
||||
y = if (x < 0) floor(x) else ceil(x)
|
||||
val e = exponential.next(generator)
|
||||
v = -e - 0.5 * n * n + c1
|
||||
} else {
|
||||
// Step 3:
|
||||
if (u > p1 + p2) {
|
||||
y = lambda
|
||||
break
|
||||
}
|
||||
|
||||
x = delta + twolpd / delta * exponential.next(generator)
|
||||
y = ceil(x)
|
||||
v = -exponential.next(generator) - delta * (x + 1) / twolpd
|
||||
}
|
||||
|
||||
// The Squeeze Principle
|
||||
// Step 4.1:
|
||||
a = if (x < 0) 1 else 0
|
||||
t = y * (y + 1) / (2 * lambda)
|
||||
|
||||
// Step 4.2
|
||||
if (v < -t && a == 0) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
|
||||
// Step 4.3:
|
||||
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
|
||||
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
|
||||
|
||||
// Step 4.4:
|
||||
if (v < qa) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
|
||||
// Step 4.5:
|
||||
if (v > qr) continue
|
||||
|
||||
// Step 4.6:
|
||||
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
|
||||
}
|
||||
|
||||
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
|
||||
public override fun toString(): String = "Large Mean Poisson deviate"
|
||||
|
||||
public companion object {
|
||||
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
|
||||
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
|
||||
|
||||
private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = Sampler { ConstantChain(0) }
|
||||
|
||||
public fun of(mean: Double): LargeMeanPoissonSampler {
|
||||
require(mean >= 1) { "mean is not >= 1: $mean" }
|
||||
// The algorithm is not valid if Math.floor(mean) is not an integer.
|
||||
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
|
||||
return LargeMeanPoissonSampler(mean)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,61 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.ln
|
||||
import kotlin.math.sqrt
|
||||
|
||||
/**
|
||||
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
|
||||
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
|
||||
* [BoxMullerNormalizedGaussianSampler].
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
|
||||
*/
|
||||
public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
|
||||
private var nextGaussian = Double.NaN
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
||||
if (nextGaussian.isNaN()) {
|
||||
val alpha: Double
|
||||
var x: Double
|
||||
|
||||
// Rejection scheme for selecting a pair that lies within the unit circle.
|
||||
while (true) {
|
||||
// Generate a pair of numbers within [-1 , 1).
|
||||
x = 2.0 * generator.nextDouble() - 1.0
|
||||
val y = 2.0 * generator.nextDouble() - 1.0
|
||||
val r2 = x * x + y * y
|
||||
|
||||
if (r2 < 1 && r2 > 0) {
|
||||
// Pair (x, y) is within unit circle.
|
||||
alpha = sqrt(-2 * ln(r2) / r2)
|
||||
// Keep second element of the pair for next invocation.
|
||||
nextGaussian = alpha * y
|
||||
// Return the first element of the generated pair.
|
||||
break
|
||||
}
|
||||
// Pair is not within the unit circle: Generate another one.
|
||||
}
|
||||
|
||||
// Return the first element of the generated pair.
|
||||
alpha * x
|
||||
} else {
|
||||
// Use the second element of the pair (generated at the
|
||||
// previous invocation).
|
||||
val r = nextGaussian
|
||||
// Both elements of the pair have been used.
|
||||
nextGaussian = Double.NaN
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate"
|
||||
|
||||
public companion object {
|
||||
public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler()
|
||||
}
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
/**
|
||||
* Marker interface for a sampler that generates values from an N(0,1)
|
||||
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
|
||||
*/
|
||||
public interface NormalizedGaussianSampler : Sampler<Double>
|
@ -1,30 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
||||
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
|
||||
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
|
||||
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
||||
* - For large means, we use the rejection algorithm described in
|
||||
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
|
||||
*/
|
||||
public class PoissonSampler private constructor(mean: Double) : Sampler<Int> {
|
||||
private val poissonSamplerDelegate: Sampler<Int> = of(mean)
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = poissonSamplerDelegate.sample(generator)
|
||||
public override fun toString(): String = poissonSamplerDelegate.toString()
|
||||
|
||||
public companion object {
|
||||
private const val PIVOT = 40.0
|
||||
|
||||
public fun of(mean: Double): Sampler<Int> =// Each sampler should check the input arguments.
|
||||
if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean)
|
||||
}
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.ceil
|
||||
import kotlin.math.exp
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
||||
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
|
||||
* integer-valued distributions: The Poisson distribution. Addison Wesley.
|
||||
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
||||
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
*
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
|
||||
*/
|
||||
public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler<Int> {
|
||||
private val p0: Double = exp(-mean)
|
||||
|
||||
private val limit: Int = (if (p0 > 0)
|
||||
ceil(1000 * mean)
|
||||
else
|
||||
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt()
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
||||
var n = 0
|
||||
var r = 1.0
|
||||
|
||||
while (n < limit) {
|
||||
r *= nextDouble()
|
||||
if (r >= p0) n++ else break
|
||||
}
|
||||
|
||||
n
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Small Mean Poisson deviate"
|
||||
|
||||
public companion object {
|
||||
public fun of(mean: Double): SmallMeanPoissonSampler {
|
||||
require(mean > 0) { "mean is not strictly positive: $mean" }
|
||||
return SmallMeanPoissonSampler(mean)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,88 +0,0 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
|
||||
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
|
||||
* implementation has been adapted from the C code provided therein.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
|
||||
*/
|
||||
public class ZigguratNormalizedGaussianSampler private constructor() :
|
||||
NormalizedGaussianSampler, Sampler<Double> {
|
||||
|
||||
private fun sampleOne(generator: RandomGenerator): Double {
|
||||
val j = generator.nextLong()
|
||||
val i = (j and LAST.toLong()).toInt()
|
||||
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
|
||||
}
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain { sampleOne(this) }
|
||||
public override fun toString(): String = "Ziggurat normalized Gaussian deviate"
|
||||
|
||||
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
|
||||
var x = hz * W[iz]
|
||||
|
||||
return when {
|
||||
iz == 0 -> {
|
||||
var y: Double
|
||||
|
||||
do {
|
||||
y = -ln(generator.nextDouble())
|
||||
x = -ln(generator.nextDouble()) * ONE_OVER_R
|
||||
} while (y + y < x * x)
|
||||
|
||||
val out = R + x
|
||||
if (hz > 0) out else -out
|
||||
}
|
||||
|
||||
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
|
||||
else -> sampleOne(generator)
|
||||
}
|
||||
}
|
||||
|
||||
public companion object {
|
||||
private const val R: Double = 3.442619855899
|
||||
private const val ONE_OVER_R: Double = 1 / R
|
||||
private const val V: Double = 9.91256303526217e-3
|
||||
private val MAX: Double = 2.0.pow(63.0)
|
||||
private val ONE_OVER_MAX: Double = 1.0 / MAX
|
||||
private const val LEN: Int = 128
|
||||
private const val LAST: Int = LEN - 1
|
||||
private val K: LongArray = LongArray(LEN)
|
||||
private val W: DoubleArray = DoubleArray(LEN)
|
||||
private val F: DoubleArray = DoubleArray(LEN)
|
||||
|
||||
init {
|
||||
// Filling the tables.
|
||||
var d = R
|
||||
var t = d
|
||||
var fd = gauss(d)
|
||||
val q = V / fd
|
||||
K[0] = (d / q * MAX).toLong()
|
||||
K[1] = 0
|
||||
W[0] = q * ONE_OVER_MAX
|
||||
W[LAST] = d * ONE_OVER_MAX
|
||||
F[0] = 1.0
|
||||
F[LAST] = fd
|
||||
|
||||
(LAST - 1 downTo 1).forEach { i ->
|
||||
d = sqrt(-2 * ln(V / d + fd))
|
||||
fd = gauss(d)
|
||||
K[i + 1] = (d / t * MAX).toLong()
|
||||
t = d
|
||||
F[i] = fd
|
||||
W[i] = d * ONE_OVER_MAX
|
||||
}
|
||||
}
|
||||
|
||||
public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler()
|
||||
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
|
||||
}
|
||||
}
|
@ -5,22 +5,23 @@ import kotlinx.coroutines.flow.toList
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.junit.jupiter.api.Assertions
|
||||
import org.junit.jupiter.api.Test
|
||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
||||
import space.kscience.kmath.samplers.GaussianSampler
|
||||
import space.kscience.kmath.structures.asBuffer
|
||||
|
||||
internal class CommonsDistributionsTest {
|
||||
@Test
|
||||
fun testNormalDistributionSuspend() {
|
||||
val distribution = GaussianSampler.of(7.0, 2.0)
|
||||
fun testNormalDistributionSuspend() = runBlocking {
|
||||
val distribution = GaussianSampler(7.0, 2.0)
|
||||
val generator = RandomGenerator.default(1)
|
||||
val sample = runBlocking { distribution.sample(generator).take(1000).toList() }
|
||||
Assertions.assertEquals(7.0, sample.average(), 0.1)
|
||||
val sample = distribution.sample(generator).take(1000).toList().asBuffer()
|
||||
Assertions.assertEquals(7.0, Mean.double(sample), 0.2)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testNormalDistributionBlocking() {
|
||||
val distribution = GaussianSampler.of(7.0, 2.0)
|
||||
fun testNormalDistributionBlocking() = runBlocking {
|
||||
val distribution = GaussianSampler(7.0, 2.0)
|
||||
val generator = RandomGenerator.default(1)
|
||||
val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) }
|
||||
Assertions.assertEquals(7.0, sample.average(), 0.1)
|
||||
val sample = distribution.sample(generator).nextBufferBlocking(1000)
|
||||
Assertions.assertEquals(7.0, Mean.double(sample), 0.2)
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ internal class StatisticTest {
|
||||
@Test
|
||||
fun testParallelMean() {
|
||||
runBlocking {
|
||||
val average = Mean.real
|
||||
val average = Mean.double
|
||||
.flow(chunked) //create a flow with results
|
||||
.drop(99) // Skip first 99 values and use one with total data
|
||||
.first() //get 1e5 data samples average
|
||||
|
Loading…
Reference in New Issue
Block a user