Merge pull request #164 from mipt-npm/feature/mp-samplers

Implement Commons RNG-like samplers in kmath-prob module for Multiplatform
This commit is contained in:
Alexander Nozik 2021-03-31 09:25:44 +03:00 committed by GitHub
commit b4fc311668
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 1596 additions and 227 deletions

View File

@ -106,6 +106,7 @@ kotlin.sourceSets.all {
with(languageSettings) {
useExperimentalAnnotation("kotlin.contracts.ExperimentalContracts")
useExperimentalAnnotation("kotlin.ExperimentalUnsignedTypes")
useExperimentalAnnotation("space.kscience.kmath.misc.UnstableKMathAPI")
}
}

View File

@ -13,9 +13,8 @@ import space.kscience.kmath.optimization.OptimizationResult
import space.kscience.kmath.real.DoubleVector
import space.kscience.kmath.real.map
import space.kscience.kmath.real.step
import space.kscience.kmath.stat.Distribution
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.normal
import space.kscience.kmath.stat.distributions.NormalDistribution
import space.kscience.kmath.structures.asIterable
import space.kscience.kmath.structures.toList
import kotlin.math.pow
@ -37,10 +36,9 @@ operator fun TraceValues.invoke(vector: DoubleVector) {
/**
* Least squares fie with auto-differentiation. Uses `kmath-commons` and `kmath-for-real` modules.
*/
fun main() {
suspend fun main() {
//A generator for a normally distributed values
val generator = Distribution.normal()
val generator = NormalDistribution(2.0, 7.0)
//A chain/flow of random values with the given seed
val chain = generator.sample(RandomGenerator.default(112667))
@ -53,7 +51,7 @@ fun main() {
//Perform an operation on each x value (much more effective, than numpy)
val y = x.map {
val value = it.pow(2) + it + 1
value + chain.nextDouble() * sqrt(value)
value + chain.next() * sqrt(value)
}
// this will also work, but less effective:
// val y = x.pow(2)+ x + 1 + chain.nextDouble()

View File

@ -1,23 +1,24 @@
package kscience.kmath.commons.prob
package space.kscience.kmath.stat
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.runBlocking
import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler
import space.kscience.kmath.stat.samplers.GaussianSampler
import org.apache.commons.rng.simple.RandomSource
import space.kscience.kmath.stat.*
import java.time.Duration
import java.time.Instant
import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler
import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler as CMZigguratNormalizedGaussianSampler
private fun runChain(): Duration {
private suspend fun runKMathChained(): Duration {
val generator = RandomGenerator.fromSource(RandomSource.MT, 123L)
val normal = Distribution.normal(NormalSamplerMethod.Ziggurat)
val chain = normal.sample(generator)
val normal = GaussianSampler.of(7.0, 2.0)
val chain = normal.sample(generator).blocking()
val startTime = Instant.now()
var sum = 0.0
repeat(10000001) { counter ->
sum += chain.nextDouble()
sum += chain.next()
if (counter % 100000 == 0) {
val duration = Duration.between(startTime, Instant.now())
@ -29,9 +30,15 @@ private fun runChain(): Duration {
return Duration.between(startTime, Instant.now())
}
private fun runDirect(): Duration {
val provider = RandomSource.create(RandomSource.MT, 123L)
val sampler = ZigguratNormalizedGaussianSampler(provider)
private fun runApacheDirect(): Duration {
val rng = RandomSource.create(RandomSource.MT, 123L)
val sampler = CMGaussianSampler.of(
CMZigguratNormalizedGaussianSampler.of(rng),
7.0,
2.0
)
val startTime = Instant.now()
var sum = 0.0
@ -51,11 +58,9 @@ private fun runDirect(): Duration {
/**
* Comparing chain sampling performance with direct sampling performance
*/
fun main() {
runBlocking(Dispatchers.Default) {
val chainJob = async { runChain() }
val directJob = async { runDirect() }
println("Chain: ${chainJob.await()}")
println("Direct: ${directJob.await()}")
}
fun main(): Unit = runBlocking(Dispatchers.Default) {
val chainJob = async { runKMathChained() }
val directJob = async { runApacheDirect() }
println("KMath Chained: ${chainJob.await()}")
println("Apache Direct: ${directJob.await()}")
}

View File

@ -3,14 +3,15 @@ package space.kscience.kmath.stat
import kotlinx.coroutines.runBlocking
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.collectWithState
import space.kscience.kmath.stat.distributions.NormalDistribution
/**
* The state of distribution averager
* The state of distribution averager.
*/
private data class AveragingChainState(var num: Int = 0, var value: Double = 0.0)
/**
* Averaging
* Averaging.
*/
private fun Chain<Double>.mean(): Chain<Double> = collectWithState(AveragingChainState(), { it.copy() }) { chain ->
val next = chain.next()
@ -21,7 +22,7 @@ private fun Chain<Double>.mean(): Chain<Double> = collectWithState(AveragingChai
fun main() {
val normal = Distribution.normal()
val normal = NormalDistribution(0.0, 2.0)
val chain = normal.sample(RandomGenerator.default).mean()
runBlocking {

View File

@ -1,13 +1,13 @@
package space.kscience.kmath.commons.optimization
import org.junit.jupiter.api.Test
import kotlinx.coroutines.runBlocking
import space.kscience.kmath.commons.expressions.DerivativeStructureExpression
import space.kscience.kmath.misc.symbol
import space.kscience.kmath.optimization.FunctionOptimization
import space.kscience.kmath.stat.Distribution
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.normal
import space.kscience.kmath.stat.distributions.NormalDistribution
import kotlin.math.pow
import kotlin.test.Test
internal class OptimizeTest {
val x by symbol
@ -34,23 +34,24 @@ internal class OptimizeTest {
simplexSteps(x to 2.0, y to 0.5)
//this sets simplex optimizer
}
println(result.point)
println(result.value)
}
@Test
fun testCmFit() {
fun testCmFit() = runBlocking {
val a by symbol
val b by symbol
val c by symbol
val sigma = 1.0
val generator = Distribution.normal(0.0, sigma)
val generator = NormalDistribution(0.0, sigma)
val chain = generator.sample(RandomGenerator.default(112667))
val x = (1..100).map(Int::toDouble)
val y = x.map {
it.pow(2) + it + 1 + chain.nextDouble()
it.pow(2) + it + 1 + chain.next()
}
val yErr = List(x.size) { sigma }
@ -64,5 +65,4 @@ internal class OptimizeTest {
println(result)
println("Chi2/dof = ${result.value / (x.size - 3)}")
}
}

View File

@ -241,18 +241,18 @@ public class BigInt internal constructor(
)
private fun compareMagnitudes(mag1: Magnitude, mag2: Magnitude): Int {
when {
mag1.size > mag2.size -> return 1
mag1.size < mag2.size -> return -1
return when {
mag1.size > mag2.size -> 1
mag1.size < mag2.size -> -1
else -> {
for (i in mag1.size - 1 downTo 0) {
if (mag1[i] > mag2[i]) {
return 1
} else if (mag1[i] < mag2[i]) {
return -1
}
for (i in mag1.size - 1 downTo 0) return when {
mag1[i] > mag2[i] -> 1
mag1[i] < mag2[i] -> -1
else -> continue
}
return 0
0
}
}
}
@ -302,10 +302,11 @@ public class BigInt internal constructor(
var carry = 0uL
for (i in mag.indices) {
val cur: ULong = carry + mag[i].toULong() * x.toULong()
val cur = carry + mag[i].toULong() * x.toULong()
result[i] = (cur and BASE).toUInt()
carry = cur shr BASE_SIZE
}
result[resultLength - 1] = (carry and BASE).toUInt()
return stripLeadingZeros(result)

View File

@ -40,7 +40,6 @@ public interface Buffer<out T> {
public operator fun iterator(): Iterator<T>
public companion object {
/**
* Check the element-by-element match of content of two buffers.
*/
@ -110,7 +109,6 @@ public interface MutableBuffer<T> : Buffer<T> {
public fun copy(): MutableBuffer<T>
public companion object {
/**
* Creates a [DoubleBuffer] with the specified [size], where each element is calculated by calling the specified
* [initializer] function.

View File

@ -38,12 +38,11 @@ class NumberNDFieldTest {
(i * 10 + j).toDouble()
}
for (i in 0..2) {
for (i in 0..2)
for (j in 0..2) {
val expected = (i * 10 + j).toDouble()
assertEquals(expected, array[i, j], "Error at index [$i, $j]")
}
}
}
@Test

View File

@ -1,12 +1,13 @@
package space.kscience.kmath.chains
/**
* Performance optimized chain for real values
* Chunked, specialized chain for real values.
*/
public abstract class BlockingDoubleChain : Chain<Double> {
public abstract fun nextDouble(): Double
public interface BlockingDoubleChain : Chain<Double> {
public override suspend fun next(): Double
override suspend fun next(): Double = nextDouble()
public open fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { nextDouble() }
/**
* Returns an [DoubleArray] chunk of [size] values of [next].
*/
public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() }
}

View File

@ -3,10 +3,7 @@ package space.kscience.kmath.chains
/**
* Performance optimized chain for integer values
*/
public abstract class BlockingIntChain : Chain<Int> {
public abstract fun nextInt(): Int
override suspend fun next(): Int = nextInt()
public fun nextBlock(size: Int): IntArray = IntArray(size) { nextInt() }
public interface BlockingIntChain : Chain<Int> {
public override suspend fun next(): Int
public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
}

View File

@ -63,12 +63,10 @@ public class MarkovChain<out R : Any>(private val seed: suspend () -> R, private
public fun value(): R? = value
public override suspend fun next(): R {
mutex.withLock {
val newValue = gen(value ?: seed())
value = newValue
return newValue
}
public override suspend fun next(): R = mutex.withLock {
val newValue = gen(value ?: seed())
value = newValue
newValue
}
public override fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
@ -90,12 +88,10 @@ public class StatefulChain<S, out R>(
public fun value(): R? = value
public override suspend fun next(): R {
mutex.withLock {
val newValue = state.gen(value ?: state.seed())
value = newValue
return newValue
}
public override suspend fun next(): R = mutex.withLock {
val newValue = state.gen(value ?: state.seed())
value = newValue
newValue
}
public override fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)

View File

@ -28,7 +28,7 @@ public fun <T> Flow<T>.chunked(bufferSize: Int, bufferFactory: BufferFactory<T>)
var counter = 0
this@chunked.collect { element ->
list.add(element)
list += element
counter++
if (counter == bufferSize) {

View File

@ -48,11 +48,9 @@ public class RingBuffer<T>(
/**
* A safe snapshot operation
*/
public suspend fun snapshot(): Buffer<T> {
mutex.withLock {
val copy = buffer.copy()
return VirtualBuffer(size) { i -> copy[startIndex.forward(i)] as T }
}
public suspend fun snapshot(): Buffer<T> = mutex.withLock {
val copy = buffer.copy()
VirtualBuffer(size) { i -> copy[startIndex.forward(i)] as T }
}
public suspend fun push(element: T) {

View File

@ -1,4 +1,4 @@
package kscience.dimensions
package space.kscience.dimensions
import space.kscience.kmath.dimensions.D2
import space.kscience.kmath.dimensions.D3

View File

@ -38,8 +38,8 @@ public class OrderedPiecewisePolynomial<T : Comparable<T>>(delimiter: T) :
*/
public fun putRight(right: T, piece: Polynomial<T>) {
require(right > delimiters.last()) { "New delimiter should be to the right of old one" }
delimiters.add(right)
pieces.add(piece)
delimiters += right
pieces += piece
}
/**

View File

@ -1,17 +1,29 @@
package space.kscience.kmath.stat
import kotlinx.coroutines.flow.first
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.collect
import space.kscience.kmath.structures.Buffer
import space.kscience.kmath.structures.BufferFactory
import space.kscience.kmath.structures.DoubleBuffer
import space.kscience.kmath.structures.IntBuffer
import space.kscience.kmath.structures.MutableBuffer
import kotlin.jvm.JvmName
public interface Sampler<T : Any> {
/**
* Sampler that generates chains of values of type [T].
*/
public fun interface Sampler<T : Any> {
/**
* Generates a chain of samples.
*
* @param generator the randomness provider.
* @return the new chain.
*/
public fun sample(generator: RandomGenerator): Chain<T>
}
/**
* A distribution of typed objects
* A distribution of typed objects.
*/
public interface Distribution<T : Any> : Sampler<T> {
/**
@ -20,11 +32,7 @@ public interface Distribution<T : Any> : Sampler<T> {
*/
public fun probability(arg: T): Double
/**
* Create a chain of samples from this distribution.
* The chain is not guaranteed to be stateless, but different sample chains should be independent.
*/
override fun sample(generator: RandomGenerator): Chain<T>
public override fun sample(generator: RandomGenerator): Chain<T>
/**
* An empty companion. Distribution factories should be written as its extensions
@ -63,16 +71,27 @@ public fun <T : Any> Sampler<T>.sampleBuffer(
//clear list from previous run
tmp.clear()
//Fill list
repeat(size) {
tmp.add(chain.next())
}
repeat(size) { tmp += chain.next() }
//return new buffer with elements from tmp
bufferFactory(size) { tmp[it] }
}
}
/**
* Generate a bunch of samples from real distributions
* Samples one value from this [Sampler].
*/
public suspend fun <T : Any> Sampler<T>.next(generator: RandomGenerator): T = sample(generator).first()
/**
* Generates [size] real samples and chunks them into some buffers.
*/
@JvmName("sampleRealBuffer")
public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> =
sampleBuffer(generator, size, ::DoubleBuffer)
sampleBuffer(generator, size, MutableBuffer.Companion::double)
/**
* Generates [size] integer samples and chunks them into some buffers.
*/
@JvmName("sampleIntBuffer")
public fun Sampler<Int>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Int>> =
sampleBuffer(generator, size, ::IntBuffer)

View File

@ -14,7 +14,7 @@ public interface NamedDistribution<T> : Distribution<Map<String, T>>
public class FactorizedDistribution<T>(public val distributions: Collection<NamedDistribution<T>>) :
NamedDistribution<T> {
override fun probability(arg: Map<String, T>): Double =
distributions.fold(1.0) { acc, distr -> acc * distr.probability(arg) }
distributions.fold(1.0) { acc, dist -> acc * dist.probability(arg) }
override fun sample(generator: RandomGenerator): Chain<Map<String, T>> {
val chains = distributions.map { it.sample(generator) }
@ -38,6 +38,6 @@ public class DistributionBuilder<T : Any> {
private val distributions = ArrayList<NamedDistribution<T>>()
public infix fun String.to(distribution: Distribution<T>) {
distributions.add(NamedDistributionWrapper(this, distribution))
distributions += NamedDistributionWrapper(this, distribution)
}
}

View File

@ -1,17 +1,22 @@
package space.kscience.kmath.stat
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.chains.Chain
/**
* A possibly stateful chain producing random values.
*
* @property generator the underlying [RandomGenerator] instance.
*/
public class RandomChain<out R>(
public val generator: RandomGenerator,
private val gen: suspend RandomGenerator.() -> R,
private val gen: suspend RandomGenerator.() -> R
) : Chain<R> {
override suspend fun next(): R = generator.gen()
override fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
}
public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
public fun Chain<Double>.blocking(): BlockingDoubleChain = object : Chain<Double> by this, BlockingDoubleChain {}
public fun Chain<Int>.blocking(): BlockingIntChain = object : Chain<Int> by this, BlockingIntChain {}

View File

@ -82,6 +82,8 @@ public interface RandomGenerator {
/**
* Implements [RandomGenerator] by delegating all operations to [Random].
*
* @property random the underlying [Random] object.
*/
public class DefaultGenerator(public val random: Random = Random) : RandomGenerator {
public override fun nextBoolean(): Boolean = random.nextBoolean()

View File

@ -8,16 +8,28 @@ import space.kscience.kmath.operations.Group
import space.kscience.kmath.operations.ScaleOperations
import space.kscience.kmath.operations.invoke
public class BasicSampler<T : Any>(public val chainBuilder: (RandomGenerator) -> Chain<T>) : Sampler<T> {
public override fun sample(generator: RandomGenerator): Chain<T> = chainBuilder(generator)
}
/**
* Implements [Sampler] by sampling only certain [value].
*
* @property value the value to sample.
*/
public class ConstantSampler<T : Any>(public val value: T) : Sampler<T> {
public override fun sample(generator: RandomGenerator): Chain<T> = ConstantChain(value)
}
/**
* A space for samplers. Allows to perform simple operations on distributions
* Implements [Sampler] by delegating sampling to value of [chainBuilder].
*
* @property chainBuilder the provider of [Chain].
*/
public class BasicSampler<T : Any>(public val chainBuilder: (RandomGenerator) -> Chain<T>) : Sampler<T> {
public override fun sample(generator: RandomGenerator): Chain<T> = chainBuilder(generator)
}
/**
* A space of samplers. Allows to perform simple operations on distributions.
*
* @property algebra the space to provide addition and scalar multiplication for [T].
*/
public class SamplerSpace<T : Any, S>(public val algebra: S) : Group<Sampler<T>>,
ScaleOperations<Sampler<T>> where S : Group<T>, S : ScaleOperations<T> {
@ -29,8 +41,10 @@ public class SamplerSpace<T : Any, S>(public val algebra: S) : Group<Sampler<T>>
}
public override fun scale(a: Sampler<T>, value: Double): Sampler<T> = BasicSampler { generator ->
a.sample(generator).map { algebra { it * value } }
a.sample(generator).map { a ->
algebra { a * value }
}
}
override fun Sampler<T>.unaryMinus(): Sampler<T> = scale(this, -1.0)
public override fun Sampler<T>.unaryMinus(): Sampler<T> = scale(this, -1.0)
}

View File

@ -0,0 +1,41 @@
package space.kscience.kmath.stat.distributions
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.UnivariateDistribution
import space.kscience.kmath.stat.internal.InternalErf
import space.kscience.kmath.stat.samplers.GaussianSampler
import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler
import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler
import kotlin.math.*
/**
* Implements [UnivariateDistribution] for the normal (gaussian) distribution.
*/
public inline class NormalDistribution(public val sampler: GaussianSampler) : UnivariateDistribution<Double> {
public constructor(
mean: Double,
standardDeviation: Double,
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(),
) : this(GaussianSampler.of(mean, standardDeviation, normalized))
public override fun probability(arg: Double): Double {
val x1 = (arg - sampler.mean) / sampler.standardDeviation
return exp(-0.5 * x1 * x1 - (ln(sampler.standardDeviation) + 0.5 * ln(2 * PI)))
}
public override fun sample(generator: RandomGenerator): Chain<Double> = sampler.sample(generator)
public override fun cumulative(arg: Double): Double {
val dev = arg - sampler.mean
return when {
abs(dev) > 40 * sampler.standardDeviation -> if (dev < 0) 0.0 else 1.0
else -> 0.5 * InternalErf.erfc(-dev / (sampler.standardDeviation * SQRT2))
}
}
private companion object {
private val SQRT2 = sqrt(2.0)
}
}

View File

@ -0,0 +1,15 @@
package space.kscience.kmath.stat.internal
import kotlin.math.abs
/**
* Based on Commons Math implementation.
* See [https://commons.apache.org/proper/commons-math/javadocs/api-3.3/org/apache/commons/math3/special/Erf.html].
*/
internal object InternalErf {
fun erfc(x: Double): Double {
if (abs(x) > 40) return if (x > 0) 0.0 else 2.0
val ret = InternalGamma.regularizedGammaQ(0.5, x * x, 10000)
return if (x < 0) 2 - ret else ret
}
}

View File

@ -0,0 +1,238 @@
package space.kscience.kmath.stat.internal
import kotlin.math.*
private abstract class ContinuedFraction protected constructor() {
protected abstract fun getA(n: Int, x: Double): Double
protected abstract fun getB(n: Int, x: Double): Double
fun evaluate(x: Double, maxIterations: Int): Double {
val small = 1e-50
var hPrev = getA(0, x)
if (hPrev == 0.0 || abs(0.0 - hPrev) <= small) hPrev = small
var n = 1
var dPrev = 0.0
var cPrev = hPrev
var hN = hPrev
while (n < maxIterations) {
val a = getA(n, x)
val b = getB(n, x)
var dN = a + b * dPrev
if (dN == 0.0 || abs(0.0 - dN) <= small) dN = small
var cN = a + b / cPrev
if (cN == 0.0 || abs(0.0 - cN) <= small) cN = small
dN = 1 / dN
val deltaN = cN * dN
hN = hPrev * deltaN
check(!hN.isInfinite()) { "hN is infinite" }
check(!hN.isNaN()) { "hN is NaN" }
if (abs(deltaN - 1.0) < 10e-9) break
dPrev = dN
cPrev = cN
hPrev = hN
n++
}
check(n < maxIterations) { "n is more than maxIterations" }
return hN
}
}
internal object InternalGamma {
const val LANCZOS_G = 607.0 / 128.0
private val LANCZOS = doubleArrayOf(
0.99999999999999709182,
57.156235665862923517,
-59.597960355475491248,
14.136097974741747174,
-0.49191381609762019978,
.33994649984811888699e-4,
.46523628927048575665e-4,
-.98374475304879564677e-4,
.15808870322491248884e-3,
-.21026444172410488319e-3,
.21743961811521264320e-3,
-.16431810653676389022e-3,
.84418223983852743293e-4,
-.26190838401581408670e-4,
.36899182659531622704e-5
)
private val HALF_LOG_2_PI = 0.5 * ln(2.0 * PI)
private const val INV_GAMMA1P_M1_A0 = .611609510448141581788E-08
private const val INV_GAMMA1P_M1_A1 = .624730830116465516210E-08
private const val INV_GAMMA1P_M1_B1 = .203610414066806987300E+00
private const val INV_GAMMA1P_M1_B2 = .266205348428949217746E-01
private const val INV_GAMMA1P_M1_B3 = .493944979382446875238E-03
private const val INV_GAMMA1P_M1_B4 = -.851419432440314906588E-05
private const val INV_GAMMA1P_M1_B5 = -.643045481779353022248E-05
private const val INV_GAMMA1P_M1_B6 = .992641840672773722196E-06
private const val INV_GAMMA1P_M1_B7 = -.607761895722825260739E-07
private const val INV_GAMMA1P_M1_B8 = .195755836614639731882E-09
private const val INV_GAMMA1P_M1_P0 = .6116095104481415817861E-08
private const val INV_GAMMA1P_M1_P1 = .6871674113067198736152E-08
private const val INV_GAMMA1P_M1_P2 = .6820161668496170657918E-09
private const val INV_GAMMA1P_M1_P3 = .4686843322948848031080E-10
private const val INV_GAMMA1P_M1_P4 = .1572833027710446286995E-11
private const val INV_GAMMA1P_M1_P5 = -.1249441572276366213222E-12
private const val INV_GAMMA1P_M1_P6 = .4343529937408594255178E-14
private const val INV_GAMMA1P_M1_Q1 = .3056961078365221025009E+00
private const val INV_GAMMA1P_M1_Q2 = .5464213086042296536016E-01
private const val INV_GAMMA1P_M1_Q3 = .4956830093825887312020E-02
private const val INV_GAMMA1P_M1_Q4 = .2692369466186361192876E-03
private const val INV_GAMMA1P_M1_C = -.422784335098467139393487909917598E+00
private const val INV_GAMMA1P_M1_C0 = .577215664901532860606512090082402E+00
private const val INV_GAMMA1P_M1_C1 = -.655878071520253881077019515145390E+00
private const val INV_GAMMA1P_M1_C2 = -.420026350340952355290039348754298E-01
private const val INV_GAMMA1P_M1_C3 = .166538611382291489501700795102105E+00
private const val INV_GAMMA1P_M1_C4 = -.421977345555443367482083012891874E-01
private const val INV_GAMMA1P_M1_C5 = -.962197152787697356211492167234820E-02
private const val INV_GAMMA1P_M1_C6 = .721894324666309954239501034044657E-02
private const val INV_GAMMA1P_M1_C7 = -.116516759185906511211397108401839E-02
private const val INV_GAMMA1P_M1_C8 = -.215241674114950972815729963053648E-03
private const val INV_GAMMA1P_M1_C9 = .128050282388116186153198626328164E-03
private const val INV_GAMMA1P_M1_C10 = -.201348547807882386556893914210218E-04
private const val INV_GAMMA1P_M1_C11 = -.125049348214267065734535947383309E-05
private const val INV_GAMMA1P_M1_C12 = .113302723198169588237412962033074E-05
private const val INV_GAMMA1P_M1_C13 = -.205633841697760710345015413002057E-06
fun logGamma(x: Double): Double = when {
x.isNaN() || x <= 0.0 -> Double.NaN
x < 0.5 -> logGamma1p(x) - ln(x)
x <= 2.5 -> logGamma1p(x - 0.5 - 0.5)
x <= 8.0 -> {
val n = floor(x - 1.5).toInt()
val prod = (1..n).fold(1.0, { prod, i -> prod * (x - i) })
logGamma1p(x - (n + 1)) + ln(prod)
}
else -> {
val tmp = x + LANCZOS_G + .5
(x + .5) * ln(tmp) - tmp + HALF_LOG_2_PI + ln(lanczos(x) / x)
}
}
private fun regularizedGammaP(
a: Double,
x: Double,
maxIterations: Int = Int.MAX_VALUE
): Double = when {
a.isNaN() || x.isNaN() || a <= 0.0 || x < 0.0 -> Double.NaN
x == 0.0 -> 0.0
x >= a + 1 -> 1.0 - regularizedGammaQ(a, x, maxIterations)
else -> {
// calculate series
var n = 0.0 // current element index
var an = 1.0 / a // n-th element in the series
var sum = an // partial sum
while (abs(an / sum) > 10e-15 && n < maxIterations && sum < Double.POSITIVE_INFINITY) {
// compute next element in the series
n += 1.0
an *= x / (a + n)
// update partial sum
sum += an
}
when {
n >= maxIterations -> throw error("Maximal iterations is exceeded $maxIterations")
sum.isInfinite() -> 1.0
else -> exp(-x + a * ln(x) - logGamma(a)) * sum
}
}
}
fun regularizedGammaQ(
a: Double,
x: Double,
maxIterations: Int = Int.MAX_VALUE
): Double = when {
a.isNaN() || x.isNaN() || a <= 0.0 || x < 0.0 -> Double.NaN
x == 0.0 -> 1.0
x < a + 1.0 -> 1.0 - regularizedGammaP(a, x, maxIterations)
else -> 1.0 / object : ContinuedFraction() {
override fun getA(n: Int, x: Double): Double = 2.0 * n + 1.0 - a + x
override fun getB(n: Int, x: Double): Double = n * (a - n)
}.evaluate(x, maxIterations) * exp(-x + a * ln(x) - logGamma(a))
}
private fun lanczos(x: Double): Double =
(LANCZOS.size - 1 downTo 1).sumByDouble { LANCZOS[it] / (x + it) } + LANCZOS[0]
private fun invGamma1pm1(x: Double): Double {
require(x >= -0.5)
require(x <= 1.5)
val ret: Double
val t = if (x <= 0.5) x else x - 0.5 - 0.5
if (t < 0.0) {
val a = INV_GAMMA1P_M1_A0 + t * INV_GAMMA1P_M1_A1
var b = INV_GAMMA1P_M1_B8
b = INV_GAMMA1P_M1_B7 + t * b
b = INV_GAMMA1P_M1_B6 + t * b
b = INV_GAMMA1P_M1_B5 + t * b
b = INV_GAMMA1P_M1_B4 + t * b
b = INV_GAMMA1P_M1_B3 + t * b
b = INV_GAMMA1P_M1_B2 + t * b
b = INV_GAMMA1P_M1_B1 + t * b
b = 1.0 + t * b
var c = INV_GAMMA1P_M1_C13 + t * (a / b)
c = INV_GAMMA1P_M1_C12 + t * c
c = INV_GAMMA1P_M1_C11 + t * c
c = INV_GAMMA1P_M1_C10 + t * c
c = INV_GAMMA1P_M1_C9 + t * c
c = INV_GAMMA1P_M1_C8 + t * c
c = INV_GAMMA1P_M1_C7 + t * c
c = INV_GAMMA1P_M1_C6 + t * c
c = INV_GAMMA1P_M1_C5 + t * c
c = INV_GAMMA1P_M1_C4 + t * c
c = INV_GAMMA1P_M1_C3 + t * c
c = INV_GAMMA1P_M1_C2 + t * c
c = INV_GAMMA1P_M1_C1 + t * c
c = INV_GAMMA1P_M1_C + t * c
ret = (if (x > 0.5) t * c / x else x * (c + 0.5 + 0.5))
} else {
var p = INV_GAMMA1P_M1_P6
p = INV_GAMMA1P_M1_P5 + t * p
p = INV_GAMMA1P_M1_P4 + t * p
p = INV_GAMMA1P_M1_P3 + t * p
p = INV_GAMMA1P_M1_P2 + t * p
p = INV_GAMMA1P_M1_P1 + t * p
p = INV_GAMMA1P_M1_P0 + t * p
var q = INV_GAMMA1P_M1_Q4
q = INV_GAMMA1P_M1_Q3 + t * q
q = INV_GAMMA1P_M1_Q2 + t * q
q = INV_GAMMA1P_M1_Q1 + t * q
q = 1.0 + t * q
var c = INV_GAMMA1P_M1_C13 + p / q * t
c = INV_GAMMA1P_M1_C12 + t * c
c = INV_GAMMA1P_M1_C11 + t * c
c = INV_GAMMA1P_M1_C10 + t * c
c = INV_GAMMA1P_M1_C9 + t * c
c = INV_GAMMA1P_M1_C8 + t * c
c = INV_GAMMA1P_M1_C7 + t * c
c = INV_GAMMA1P_M1_C6 + t * c
c = INV_GAMMA1P_M1_C5 + t * c
c = INV_GAMMA1P_M1_C4 + t * c
c = INV_GAMMA1P_M1_C3 + t * c
c = INV_GAMMA1P_M1_C2 + t * c
c = INV_GAMMA1P_M1_C1 + t * c
c = INV_GAMMA1P_M1_C0 + t * c
ret = (if (x > 0.5) t / x * (c - 0.5 - 0.5) else x * c)
}
return ret
}
private fun logGamma1p(x: Double): Double {
require(x >= -0.5)
require(x <= 1.5)
return -ln1p(invGamma1pm1(x))
}
}

View File

@ -0,0 +1,70 @@
package space.kscience.kmath.stat.internal
import kotlin.math.ln
import kotlin.math.min
internal object InternalUtils {
private val FACTORIALS = longArrayOf(
1L, 1L, 2L,
6L, 24L, 120L,
720L, 5040L, 40320L,
362880L, 3628800L, 39916800L,
479001600L, 6227020800L, 87178291200L,
1307674368000L, 20922789888000L, 355687428096000L,
6402373705728000L, 121645100408832000L, 2432902008176640000L
)
private const val BEGIN_LOG_FACTORIALS = 2
fun factorial(n: Int): Long = FACTORIALS[n]
fun validateProbabilities(probabilities: DoubleArray?): Double {
require(!(probabilities == null || probabilities.isEmpty())) { "Probabilities must not be empty." }
val sumProb = probabilities.sumByDouble { prob ->
require(!(prob < 0 || prob.isInfinite() || prob.isNaN())) { "Invalid probability: $prob" }
prob
}
require(!(sumProb.isInfinite() || sumProb <= 0)) { "Invalid sum of probabilities: $sumProb" }
return sumProb
}
class FactorialLog private constructor(numValues: Int, cache: DoubleArray?) {
private val logFactorials: DoubleArray = DoubleArray(numValues)
init {
val endCopy: Int
if (cache != null && cache.size > BEGIN_LOG_FACTORIALS) {
// Copy available values.
endCopy = min(cache.size, numValues)
cache.copyInto(
logFactorials,
BEGIN_LOG_FACTORIALS,
BEGIN_LOG_FACTORIALS, endCopy
)
} else
// All values to be computed
endCopy = BEGIN_LOG_FACTORIALS
// Compute remaining values.
(endCopy until numValues).forEach { i ->
if (i < FACTORIALS.size)
logFactorials[i] = ln(FACTORIALS[i].toDouble())
else
logFactorials[i] = logFactorials[i - 1] + ln(i.toDouble())
}
}
fun value(n: Int): Double {
if (n < logFactorials.size) return logFactorials[n]
return if (n < FACTORIALS.size) ln(FACTORIALS[n].toDouble()) else InternalGamma.logGamma(n + 1.0)
}
companion object {
fun create(): FactorialLog = FactorialLog(0, null)
}
}
}

View File

@ -0,0 +1,73 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import kotlin.math.ln
import kotlin.math.pow
/**
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
*/
public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler<Double> {
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
// Step 1:
var a = 0.0
var u = nextDouble()
// Step 2 and 3:
while (u < 0.5) {
a += EXPONENTIAL_SA_QI[0]
u *= 2.0
}
// Step 4 (now u >= 0.5):
u += u - 1
// Step 5:
if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u)
// Step 6:
var i = 0 // Should be 1, be we iterate before it in while using 0.
var u2 = nextDouble()
var umin = u2
// Step 7 and 8:
do {
++i
u2 = nextDouble()
if (u2 < umin) umin = u2
// Step 8:
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
mean * (a + umin * EXPONENTIAL_SA_QI[0])
}
override fun toString(): String = "Ahrens-Dieter Exponential deviate"
public companion object {
private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) }
init {
/**
* Filling EXPONENTIAL_SA_QI table.
* Note that we don't want qi = 0 in the table.
*/
val ln2 = ln(2.0)
var qi = 0.0
EXPONENTIAL_SA_QI.indices.forEach { i ->
qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1)
EXPONENTIAL_SA_QI[i] = qi
}
}
public fun of(mean: Double): AhrensDieterExponentialSampler {
require(mean > 0) { "mean is not strictly positive: $mean" }
return AhrensDieterExponentialSampler(mean)
}
}
}

View File

@ -0,0 +1,120 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.next
import kotlin.math.*
/**
* Sampling from the [gamma distribution](http://mathworld.wolfram.com/GammaDistribution.html).
* - For 0 < alpha < 1:
* Ahrens, J. H. and Dieter, U., Computer methods for sampling from gamma, beta, Poisson and binomial distributions, Computing, 12, 223-246, 1974.
* - For alpha >= 1:
* Marsaglia and Tsang, A Simple Method for Generating Gamma Variables. ACM Transactions on Mathematical Software, Volume 26 Issue 3, September, 2000.
*
* Based on Commons RNG implementation.
*
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterMarsagliaTsangGammaSampler.html].
*/
public class AhrensDieterMarsagliaTsangGammaSampler private constructor(
alpha: Double,
theta: Double
) : Sampler<Double> {
private val delegate: BaseGammaSampler =
if (alpha < 1) AhrensDieterGammaSampler(alpha, theta) else MarsagliaTsangGammaSampler(alpha, theta)
private abstract class BaseGammaSampler internal constructor(
protected val alpha: Double,
protected val theta: Double
) : Sampler<Double> {
init {
require(alpha > 0) { "alpha is not strictly positive: $alpha" }
require(theta > 0) { "theta is not strictly positive: $theta" }
}
override fun toString(): String = "Ahrens-Dieter-Marsaglia-Tsang Gamma deviate"
}
private class AhrensDieterGammaSampler(alpha: Double, theta: Double) :
BaseGammaSampler(alpha, theta) {
private val oneOverAlpha: Double = 1.0 / alpha
private val bGSOptim: Double = 1.0 + alpha / E
override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
var x: Double
// [1]: p. 228, Algorithm GS.
while (true) {
// Step 1:
val u = generator.nextDouble()
val p = bGSOptim * u
if (p <= 1) {
// Step 2:
x = p.pow(oneOverAlpha)
val u2 = generator.nextDouble()
if (u2 > exp(-x)) // Reject.
continue
break
}
// Step 3:
x = -ln((bGSOptim - p) * oneOverAlpha)
val u2: Double = generator.nextDouble()
if (u2 <= x.pow(alpha - 1.0)) break
// Reject and continue.
}
x * theta
}
}
private class MarsagliaTsangGammaSampler(alpha: Double, theta: Double) :
BaseGammaSampler(alpha, theta) {
private val dOptim: Double
private val cOptim: Double
private val gaussian: NormalizedGaussianSampler
init {
gaussian = ZigguratNormalizedGaussianSampler.of()
dOptim = alpha - ONE_THIRD
cOptim = ONE_THIRD / sqrt(dOptim)
}
override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
var v: Double
while (true) {
val x = gaussian.next(generator)
val oPcTx = 1 + cOptim * x
v = oPcTx * oPcTx * oPcTx
if (v <= 0) continue
val x2 = x * x
val u = generator.nextDouble()
// Squeeze.
if (u < 1 - 0.0331 * x2 * x2) break
if (ln(u) < 0.5 * x2 + dOptim * (1 - v + ln(v))) break
}
theta * dOptim * v
}
companion object {
private const val ONE_THIRD = 1.0 / 3.0
}
}
public override fun sample(generator: RandomGenerator): Chain<Double> = delegate.sample(generator)
public override fun toString(): String = delegate.toString()
public companion object {
public fun of(
alpha: Double,
theta: Double
): Sampler<Double> = AhrensDieterMarsagliaTsangGammaSampler(alpha, theta)
}
}

View File

@ -0,0 +1,286 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import kotlin.math.ceil
import kotlin.math.max
import kotlin.math.min
/**
* Distribution sampler that uses the Alias method. It can be used to sample from n values each with an associated
* probability. This implementation is based on the detailed explanation of the alias method by Keith Schartz and
* implements Vose's algorithm.
*
* Vose, M.D., A linear algorithm for generating random numbers with a given distribution, IEEE Transactions on
* Software Engineering, 17, 972-975, 1991. he algorithm will sample values in O(1) time after a pre-processing step
* of O(n) time.
*
* The alias tables are constructed using fraction probabilities with an assumed denominator of 253. In the generic
* case sampling uses UniformRandomProvider.nextInt(int) and the upper 53-bits from UniformRandomProvider.nextLong().
*
* Zero padding the input probabilities can be used to make more sampling more efficient. Any zero entry will always be
* aliased removing the requirement to compute a long. Increased sampling speed comes at the cost of increased storage
* space. The algorithm requires approximately 12 bytes of storage per input probability, that is n * 12 for size n.
* Zero-padding only requires 4 bytes of storage per padded value as the probability is known to be zero.
*
* An optimisation is performed for small table sizes that are a power of 2. In this case the sampling uses 1 or 2
* calls from UniformRandomProvider.nextInt() to generate up to 64-bits for creation of an 11-bit index and 53-bits
* for the long. This optimisation requires a generator with a high cycle length for the lower order bits.
*
* Larger table sizes that are a power of 2 will benefit from fast algorithms for UniformRandomProvider.nextInt(int)
* that exploit the power of 2.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AliasMethodDiscreteSampler.html].
*/
public open class AliasMethodDiscreteSampler private constructor(
// Deliberate direct storage of input arrays
protected val probability: LongArray,
protected val alias: IntArray
) : Sampler<Int> {
private class SmallTableAliasMethodDiscreteSampler(
probability: LongArray,
alias: IntArray
) : AliasMethodDiscreteSampler(probability, alias) {
// Assume the table size is a power of 2 and create the mask
private val mask: Int = alias.size - 1
override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
val bits = generator.nextInt()
// Isolate lower bits
val j = bits and mask
// Optimisation for zero-padded input tables
if (j >= probability.size)
// No probability must use the alias
return@chain alias[j]
// Create a uniform random deviate as a long.
// This replicates functionality from the o.a.c.rng.core.utils.NumberFactory.makeLong
val longBits = generator.nextInt().toLong() shl 32 or (bits.toLong() and hex_ffffffff)
// Choose between the two. Use a 53-bit long for the probability.
if (longBits ushr 11 < probability[j]) j else alias[j]
}
private companion object {
private const val hex_ffffffff = 4294967295L
}
}
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// This implements the algorithm as per Vose (1991):
// v = uniform() in [0, 1)
// j = uniform(n) in [0, n)
// if v < prob[j] then
// return j
// else
// return alias[j]
val j = generator.nextInt(alias.size)
// Optimisation for zero-padded input tables
// No probability must use the alias
if (j >= probability.size) return@chain alias[j]
// Note: We could check the probability before computing a deviate.
// p(j) == 0 => alias[j]
// p(j) == 1 => j
// However it is assumed these edge cases are rare:
//
// The probability table will be 1 for approximately 1/n samples, i.e. only the
// last unpaired probability. This is only worth checking for when the table size (n)
// is small. But in that case the user should zero-pad the table for performance.
//
// The probability table will be 0 when an input probability was zero. We
// will assume this is also rare if modelling a discrete distribution where
// all samples are possible. The edge case for zero-padded tables is handled above.
// Choose between the two. Use a 53-bit long for the probability.
if (generator.nextLong() ushr 11 < probability[j]) j else alias[j]
}
public override fun toString(): String = "Alias method"
public companion object {
private const val DEFAULT_ALPHA = 0
private const val ZERO = 0.0
private const val ONE_AS_NUMERATOR = 1L shl 53
private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
public fun of(
probabilities: DoubleArray,
alpha: Int = DEFAULT_ALPHA
): Sampler<Int> {
// The Alias method balances N categories with counts around the mean into N sections,
// each allocated 'mean' observations.
//
// Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
// 2D array as 4 sections with a height of the mean:
//
// 6
// 6
// 6
// 63 => 6366 --
// 632 6326 |-- mean
// 6321 6321 --
//
// section abcd
//
// Each section is divided as:
// a: 6=1/1
// b: 3=1/1
// c: 2=2/3; 6=1/3 (6 is the alias)
// d: 1=1/3; 6=2/3 (6 is the alias)
//
// The sample is obtained by randomly selecting a section, then choosing which category
// from the pair based on a uniform random deviate.
val sumProb = InternalUtils.validateProbabilities(probabilities)
// Allow zero-padding
val n = computeSize(probabilities.size, alpha)
// Partition into small and large by splitting on the average.
val mean = sumProb / n
// The cardinality of smallSize + largeSize = n.
// So fill the same array from either end.
val indices = IntArray(n)
var large = n
var small = 0
probabilities.indices.forEach { i ->
if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i
}
small = fillRemainingIndices(probabilities.size, indices, small)
// This may be smaller than the input length if the probabilities were already padded.
val nonZeroIndex = findLastNonZeroIndex(probabilities)
// The probabilities are modified so use a copy.
// Note: probabilities are required only up to last nonZeroIndex
val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1)
// Allocate the final tables.
// Probability table may be truncated (when zero padded).
// The alias table is full length.
val probability = LongArray(remainingProbabilities.size)
val alias = IntArray(n)
// This loop uses each large in turn to fill the alias table for small probabilities that
// do not reach the requirement to fill an entire section alone (i.e. p < mean).
// Since the sum of the small should be less than the sum of the large it should use up
// all the small first. However floating point round-off can result in
// misclassification of items as small or large. The Vose algorithm handles this using
// a while loop conditioned on the size of both sets and a subsequent loop to use
// unpaired items.
while (large != n && small != 0) {
// Index of the small and the large probabilities.
val j = indices[--small]
val k = indices[large++]
// Optimisation for zero-padded input:
// p(j) = 0 above the last nonZeroIndex
if (j > nonZeroIndex)
// The entire amount for the section is taken from the alias.
remainingProbabilities[k] -= mean
else {
val pj = remainingProbabilities[j]
// Item j is a small probability that is below the mean.
// Compute the weight of the section for item j: pj / mean.
// This is scaled by 2^53 and the ceiling function used to round-up
// the probability to a numerator of a fraction in the range [1,2^53].
// Ceiling ensures non-zero values.
probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong()
// The remaining amount for the section is taken from the alias.
// Effectively: probabilities[k] -= (mean - pj)
remainingProbabilities[k] += pj - mean
}
// If not j then the alias is k
alias[j] = k
// Add the remaining probability from large to the appropriate list.
if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k
}
// Final loop conditions to consume unpaired items.
// Note: The large set should never be non-empty but this can occur due to round-off
// error so consume from both.
fillTable(probability, alias, indices, 0, small)
fillTable(probability, alias, indices, large, n)
// Change the algorithm for small power of 2 sized tables
return if (isSmallPowerOf2(n))
SmallTableAliasMethodDiscreteSampler(probability, alias)
else
AliasMethodDiscreteSampler(probability, alias)
}
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
var updatedSmall = small
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
return updatedSmall
}
private fun findLastNonZeroIndex(probabilities: DoubleArray): Int {
// No bounds check is performed when decrementing as the array contains at least one
// value above zero.
var nonZeroIndex = probabilities.size - 1
while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex--
return nonZeroIndex
}
private fun computeSize(length: Int, alpha: Int): Int {
// If No padding
if (alpha < 0) return length
// Use the number of leading zeros function to find the next power of 2,
// i.e. ceil(log2(x))
var pow2 = 32 - numberOfLeadingZeros(length - 1)
// Increase by the alpha. Clip this to limit to a positive integer (2^30)
pow2 = min(30, pow2 + alpha)
// Use max to handle a length above the highest possible power of 2
return max(length, 1 shl pow2)
}
private fun fillTable(
probability: LongArray,
alias: IntArray,
indices: IntArray,
start: Int,
end: Int
) = (start until end).forEach { i ->
val index = indices[i]
probability[index] = ONE_AS_NUMERATOR
alias[index] = index
}
private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0
private fun numberOfLeadingZeros(i: Int): Int {
var mutI = i
if (mutI <= 0) return if (mutI == 0) 32 else 0
var n = 31
if (mutI >= 1 shl 16) {
n -= 16
mutI = mutI ushr 16
}
if (mutI >= 1 shl 8) {
n -= 8
mutI = mutI ushr 8
}
if (mutI >= 1 shl 4) {
n -= 4
mutI = mutI ushr 4
}
if (mutI >= 1 shl 2) {
n -= 2
mutI = mutI ushr 2
}
return n - (mutI ushr 1)
}
}
}

View File

@ -0,0 +1,48 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.*
/**
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
* distribution.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
*/
public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
private var nextGaussian: Double = Double.NaN
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
val random: Double
if (nextGaussian.isNaN()) {
// Generate a pair of Gaussian numbers.
val x = nextDouble()
val y = nextDouble()
val alpha = 2 * PI * x
val r = sqrt(-2 * ln(y))
// Return the first element of the generated pair.
random = r * cos(alpha)
// Keep second element of the pair for next invocation.
nextGaussian = r * sin(alpha)
} else {
// Use the second element of the pair (generated at the
// previous invocation).
random = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
}
random
}
public override fun toString(): String = "Box-Muller normalized Gaussian deviate"
public companion object {
public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler()
}
}

View File

@ -0,0 +1,43 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.map
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampling from a Gaussian distribution with given mean and standard deviation.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
*
* @property mean the mean of the distribution.
* @property standardDeviation the variance of the distribution.
*/
public class GaussianSampler private constructor(
public val mean: Double,
public val standardDeviation: Double,
private val normalized: NormalizedGaussianSampler
) : Sampler<Double> {
public override fun sample(generator: RandomGenerator): Chain<Double> = normalized
.sample(generator)
.map { standardDeviation * it + mean }
override fun toString(): String = "Gaussian deviate [$normalized]"
public companion object {
public fun of(
mean: Double,
standardDeviation: Double,
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of()
): GaussianSampler {
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
return GaussianSampler(
mean,
standardDeviation,
normalized
)
}
}
}

View File

@ -0,0 +1,63 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
*
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
*
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
*/
public class KempSmallMeanPoissonSampler private constructor(
private val p0: Double,
private val mean: Double
) : Sampler<Int> {
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// Note on the algorithm:
// - X is the unknown sample deviate (the output of the algorithm)
// - x is the current value from the distribution
// - p is the probability of the current value x, p(X=x)
// - u is effectively the cumulative probability that the sample X
// is equal or above the current value x, p(X>=x)
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
var u = nextDouble()
var x = 0
var p = p0
while (u > p) {
u -= p
// Compute the next probability using a recurrence relation.
// p(x+1) = p(x) * mean / (x+1)
p *= mean / ++x
// The algorithm listed in Kemp (1981) does not check that the rolling probability
// is positive. This check is added to ensure no errors when the limit of the summation
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
if (p == 0.0) return@chain x
}
x
}
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
public companion object {
public fun of(mean: Double): KempSmallMeanPoissonSampler {
require(mean > 0) { "Mean is not strictly positive: $mean" }
val p0 = exp(-mean)
// Probability must be positive. As mean increases then p(0) decreases.
require(p0 > 0) { "No probability for mean: $mean" }
return KempSmallMeanPoissonSampler(p0, mean)
}
}
}

View File

@ -0,0 +1,130 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.ConstantChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import space.kscience.kmath.stat.next
import kotlin.math.*
/**
* Sampler for the Poisson distribution.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
* Computing vol. 26 pp. 197-207.
*
* This sampler is suitable for mean >= 40.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
*/
public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler<Int> {
private val exponential: Sampler<Double> = AhrensDieterExponentialSampler.of(1.0)
private val gaussian: Sampler<Double> = ZigguratNormalizedGaussianSampler.of()
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
private val lambda: Double = floor(mean)
private val logLambda: Double = ln(lambda)
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
private val halfDelta: Double = delta / 2
private val twolpd: Double = 2 * lambda + delta
private val c1: Double = 1 / (8 * lambda)
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
private val aSum: Double = a1 + a2 + 1
private val p1: Double = a1 / aSum
private val p2: Double = a2 / aSum
private val smallMeanPoissonSampler: Sampler<Int> = if (mean - lambda < Double.MIN_VALUE)
NO_SMALL_MEAN_POISSON_SAMPLER
else // Not used.
KempSmallMeanPoissonSampler.of(mean - lambda)
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// This will never be null. It may be a no-op delegate that returns zero.
val y2 = smallMeanPoissonSampler.next(generator)
var x: Double
var y: Double
var v: Double
var a: Int
var t: Double
var qr: Double
var qa: Double
while (true) {
// Step 1:
val u = generator.nextDouble()
if (u <= p1) {
// Step 2:
val n = gaussian.next(generator)
x = n * sqrt(lambda + halfDelta) - 0.5
if (x > delta || x < -lambda) continue
y = if (x < 0) floor(x) else ceil(x)
val e = exponential.next(generator)
v = -e - 0.5 * n * n + c1
} else {
// Step 3:
if (u > p1 + p2) {
y = lambda
break
}
x = delta + twolpd / delta * exponential.next(generator)
y = ceil(x)
v = -exponential.next(generator) - delta * (x + 1) / twolpd
}
// The Squeeze Principle
// Step 4.1:
a = if (x < 0) 1 else 0
t = y * (y + 1) / (2 * lambda)
// Step 4.2
if (v < -t && a == 0) {
y += lambda
break
}
// Step 4.3:
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
// Step 4.4:
if (v < qa) {
y += lambda
break
}
// Step 4.5:
if (v > qr) continue
// Step 4.6:
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
y += lambda
break
}
}
min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
}
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
public override fun toString(): String = "Large Mean Poisson deviate"
public companion object {
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = Sampler { ConstantChain(0) }
public fun of(mean: Double): LargeMeanPoissonSampler {
require(mean >= 1) { "mean is not >= 1: $mean" }
// The algorithm is not valid if Math.floor(mean) is not an integer.
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
return LargeMeanPoissonSampler(mean)
}
}
}

View File

@ -0,0 +1,61 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.ln
import kotlin.math.sqrt
/**
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
* [BoxMullerNormalizedGaussianSampler].
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
*/
public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
private var nextGaussian = Double.NaN
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
if (nextGaussian.isNaN()) {
val alpha: Double
var x: Double
// Rejection scheme for selecting a pair that lies within the unit circle.
while (true) {
// Generate a pair of numbers within [-1 , 1).
x = 2.0 * generator.nextDouble() - 1.0
val y = 2.0 * generator.nextDouble() - 1.0
val r2 = x * x + y * y
if (r2 < 1 && r2 > 0) {
// Pair (x, y) is within unit circle.
alpha = sqrt(-2 * ln(r2) / r2)
// Keep second element of the pair for next invocation.
nextGaussian = alpha * y
// Return the first element of the generated pair.
break
}
// Pair is not within the unit circle: Generate another one.
}
// Return the first element of the generated pair.
alpha * x
} else {
// Use the second element of the pair (generated at the
// previous invocation).
val r = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
r
}
}
public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate"
public companion object {
public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler()
}
}

View File

@ -0,0 +1,9 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.stat.Sampler
/**
* Marker interface for a sampler that generates values from an N(0,1)
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
*/
public interface NormalizedGaussianSampler : Sampler<Double>

View File

@ -0,0 +1,30 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
*/
public class PoissonSampler private constructor(mean: Double) : Sampler<Int> {
private val poissonSamplerDelegate: Sampler<Int> = of(mean)
public override fun sample(generator: RandomGenerator): Chain<Int> = poissonSamplerDelegate.sample(generator)
public override fun toString(): String = poissonSamplerDelegate.toString()
public companion object {
private const val PIVOT = 40.0
public fun of(mean: Double): Sampler<Int> =// Each sampler should check the input arguments.
if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean)
}
}

View File

@ -0,0 +1,50 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.ceil
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
* integer-valued distributions: The Poisson distribution. Addison Wesley.
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
*
* Based on Commons RNG implementation.
*
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
*/
public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler<Int> {
private val p0: Double = exp(-mean)
private val limit: Int = (if (p0 > 0)
ceil(1000 * mean)
else
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt()
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
var n = 0
var r = 1.0
while (n < limit) {
r *= nextDouble()
if (r >= p0) n++ else break
}
n
}
public override fun toString(): String = "Small Mean Poisson deviate"
public companion object {
public fun of(mean: Double): SmallMeanPoissonSampler {
require(mean > 0) { "mean is not strictly positive: $mean" }
return SmallMeanPoissonSampler(mean)
}
}
}

View File

@ -0,0 +1,88 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.*
/**
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
* implementation has been adapted from the C code provided therein.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
*/
public class ZigguratNormalizedGaussianSampler private constructor() :
NormalizedGaussianSampler, Sampler<Double> {
private fun sampleOne(generator: RandomGenerator): Double {
val j = generator.nextLong()
val i = (j and LAST.toLong()).toInt()
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
}
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain { sampleOne(this) }
public override fun toString(): String = "Ziggurat normalized Gaussian deviate"
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
var x = hz * W[iz]
return when {
iz == 0 -> {
var y: Double
do {
y = -ln(generator.nextDouble())
x = -ln(generator.nextDouble()) * ONE_OVER_R
} while (y + y < x * x)
val out = R + x
if (hz > 0) out else -out
}
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
else -> sampleOne(generator)
}
}
public companion object {
private const val R: Double = 3.442619855899
private const val ONE_OVER_R: Double = 1 / R
private const val V: Double = 9.91256303526217e-3
private val MAX: Double = 2.0.pow(63.0)
private val ONE_OVER_MAX: Double = 1.0 / MAX
private const val LEN: Int = 128
private const val LAST: Int = LEN - 1
private val K: LongArray = LongArray(LEN)
private val W: DoubleArray = DoubleArray(LEN)
private val F: DoubleArray = DoubleArray(LEN)
init {
// Filling the tables.
var d = R
var t = d
var fd = gauss(d)
val q = V / fd
K[0] = (d / q * MAX).toLong()
K[1] = 0
W[0] = q * ONE_OVER_MAX
W[LAST] = d * ONE_OVER_MAX
F[0] = 1.0
F[LAST] = fd
(LAST - 1 downTo 1).forEach { i ->
d = sqrt(-2 * ln(V / d + fd))
fd = gauss(d)
K[i + 1] = (d / t * MAX).toLong()
t = d
F[i] = fd
W[i] = d * ONE_OVER_MAX
}
}
public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler()
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
}
}

View File

@ -3,10 +3,14 @@ package space.kscience.kmath.stat
import org.apache.commons.rng.UniformRandomProvider
import org.apache.commons.rng.simple.RandomSource
public class RandomSourceGenerator(public val source: RandomSource, seed: Long?) : RandomGenerator {
internal val random: UniformRandomProvider = seed?.let {
RandomSource.create(source, seed)
} ?: RandomSource.create(source)
/**
* Implements [RandomGenerator] by delegating all operations to [RandomSource].
*
* @property source the underlying [RandomSource] object.
*/
public class RandomSourceGenerator internal constructor(public val source: RandomSource, seed: Long?) : RandomGenerator {
internal val random: UniformRandomProvider = seed?.let { RandomSource.create(source, seed) }
?: RandomSource.create(source)
public override fun nextBoolean(): Boolean = random.nextBoolean()
public override fun nextDouble(): Double = random.nextDouble()
@ -23,22 +27,84 @@ public class RandomSourceGenerator(public val source: RandomSource, seed: Long?)
public override fun fork(): RandomGenerator = RandomSourceGenerator(source, nextLong())
}
/**
* Implements [UniformRandomProvider] by delegating all operations to [RandomGenerator].
*
* @property generator the underlying [RandomGenerator] object.
*/
public inline class RandomGeneratorProvider(public val generator: RandomGenerator) : UniformRandomProvider {
/**
* Generates a [Boolean] value.
*
* @return the next random value.
*/
public override fun nextBoolean(): Boolean = generator.nextBoolean()
/**
* Generates a [Float] value between 0 and 1.
*
* @return the next random value between 0 and 1.
*/
public override fun nextFloat(): Float = generator.nextDouble().toFloat()
public override fun nextBytes(bytes: ByteArray) {
generator.fillBytes(bytes)
}
/**
* Generates [Byte] values and places them into a user-supplied array.
*
* The number of random bytes produced is equal to the length of the the byte array.
*
* @param bytes byte array in which to put the random bytes.
*/
public override fun nextBytes(bytes: ByteArray): Unit = generator.fillBytes(bytes)
/**
* Generates [Byte] values and places them into a user-supplied array.
*
* The array is filled with bytes extracted from random integers. This implies that the number of random bytes
* generated may be larger than the length of the byte array.
*
* @param bytes the array in which to put the generated bytes.
* @param start the index at which to start inserting the generated bytes.
* @param len the number of bytes to insert.
*/
public override fun nextBytes(bytes: ByteArray, start: Int, len: Int) {
generator.fillBytes(bytes, start, start + len)
}
/**
* Generates an [Int] value.
*
* @return the next random value.
*/
public override fun nextInt(): Int = generator.nextInt()
/**
* Generates an [Int] value between 0 (inclusive) and the specified value (exclusive).
*
* @param n the bound on the random number to be returned. Must be positive.
* @return a random integer between 0 (inclusive) and [n] (exclusive).
*/
public override fun nextInt(n: Int): Int = generator.nextInt(n)
/**
* Generates a [Double] value between 0 and 1.
*
* @return the next random value between 0 and 1.
*/
public override fun nextDouble(): Double = generator.nextDouble()
/**
* Generates a [Long] value.
*
* @return the next random value.
*/
public override fun nextLong(): Long = generator.nextLong()
/**
* Generates a [Long] value between 0 (inclusive) and the specified value (exclusive).
*
* @param n Bound on the random number to be returned. Must be positive.
* @return a random long value between 0 (inclusive) and [n] (exclusive).
*/
public override fun nextLong(n: Long): Long = generator.nextLong(n)
}
@ -51,8 +117,14 @@ public fun RandomGenerator.asUniformRandomProvider(): UniformRandomProvider = if
else
RandomGeneratorProvider(this)
/**
* Returns [RandomSourceGenerator] with given [RandomSource] and [seed].
*/
public fun RandomGenerator.Companion.fromSource(source: RandomSource, seed: Long? = null): RandomSourceGenerator =
RandomSourceGenerator(source, seed)
/**
* Returns [RandomSourceGenerator] with [RandomSource.MT] algorithm and given [seed].
*/
public fun RandomGenerator.Companion.mersenneTwister(seed: Long? = null): RandomSourceGenerator =
fromSource(RandomSource.MT, seed)

View File

@ -1,99 +0,0 @@
package space.kscience.kmath.stat
import org.apache.commons.rng.UniformRandomProvider
import org.apache.commons.rng.sampling.distribution.*
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.chains.Chain
import kotlin.math.PI
import kotlin.math.exp
import kotlin.math.pow
import kotlin.math.sqrt
public abstract class ContinuousSamplerDistribution : Distribution<Double> {
private inner class ContinuousSamplerChain(val generator: RandomGenerator) : BlockingDoubleChain() {
private val sampler = buildCMSampler(generator)
override fun nextDouble(): Double = sampler.sample()
override fun fork(): Chain<Double> = ContinuousSamplerChain(generator.fork())
}
protected abstract fun buildCMSampler(generator: RandomGenerator): ContinuousSampler
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = ContinuousSamplerChain(generator)
}
public abstract class DiscreteSamplerDistribution : Distribution<Int> {
private inner class ContinuousSamplerChain(val generator: RandomGenerator) : BlockingIntChain() {
private val sampler = buildSampler(generator)
override fun nextInt(): Int = sampler.sample()
override fun fork(): Chain<Int> = ContinuousSamplerChain(generator.fork())
}
protected abstract fun buildSampler(generator: RandomGenerator): DiscreteSampler
public override fun sample(generator: RandomGenerator): BlockingIntChain = ContinuousSamplerChain(generator)
}
public enum class NormalSamplerMethod {
BoxMuller,
Marsaglia,
Ziggurat
}
private fun normalSampler(method: NormalSamplerMethod, provider: UniformRandomProvider): NormalizedGaussianSampler =
when (method) {
NormalSamplerMethod.BoxMuller -> BoxMullerNormalizedGaussianSampler(provider)
NormalSamplerMethod.Marsaglia -> MarsagliaNormalizedGaussianSampler(provider)
NormalSamplerMethod.Ziggurat -> ZigguratNormalizedGaussianSampler(provider)
}
public fun Distribution.Companion.normal(
method: NormalSamplerMethod = NormalSamplerMethod.Ziggurat,
): ContinuousSamplerDistribution = object : ContinuousSamplerDistribution() {
override fun buildCMSampler(generator: RandomGenerator): ContinuousSampler {
val provider = generator.asUniformRandomProvider()
return normalSampler(method, provider)
}
override fun probability(arg: Double): Double = exp(-arg.pow(2) / 2) / sqrt(PI * 2)
}
/**
* A univariate normal distribution with given [mean] and [sigma]. [method] defines commons-rng generation method
*/
public fun Distribution.Companion.normal(
mean: Double,
sigma: Double,
method: NormalSamplerMethod = NormalSamplerMethod.Ziggurat,
): ContinuousSamplerDistribution = object : ContinuousSamplerDistribution() {
private val sigma2 = sigma.pow(2)
private val norm = sigma * sqrt(PI * 2)
override fun buildCMSampler(generator: RandomGenerator): ContinuousSampler {
val provider = generator.asUniformRandomProvider()
val normalizedSampler = normalSampler(method, provider)
return GaussianSampler(normalizedSampler, mean, sigma)
}
override fun probability(arg: Double): Double = exp(-(arg - mean).pow(2) / 2 / sigma2) / norm
}
public fun Distribution.Companion.poisson(
lambda: Double,
): DiscreteSamplerDistribution = object : DiscreteSamplerDistribution() {
private val computedProb: HashMap<Int, Double> = hashMapOf(0 to exp(-lambda))
override fun buildSampler(generator: RandomGenerator): DiscreteSampler =
PoissonSampler.of(generator.asUniformRandomProvider(), lambda)
override fun probability(arg: Int): Double {
require(arg >= 0) { "The argument must be >= 0" }
return if (arg > 40)
exp(-(arg - lambda).pow(2) / 2 / lambda) / sqrt(2 * PI * lambda)
else
computedProb.getOrPut(arg) { probability(arg - 1) * lambda / arg }
}
}

View File

@ -5,23 +5,22 @@ import kotlinx.coroutines.flow.toList
import kotlinx.coroutines.runBlocking
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
import space.kscience.kmath.stat.samplers.GaussianSampler
internal class CommonsDistributionsTest {
@Test
fun testNormalDistributionSuspend() {
val distribution = Distribution.normal(7.0, 2.0)
val distribution = GaussianSampler.of(7.0, 2.0)
val generator = RandomGenerator.default(1)
val sample = runBlocking {
distribution.sample(generator).take(1000).toList()
}
val sample = runBlocking { distribution.sample(generator).take(1000).toList() }
Assertions.assertEquals(7.0, sample.average(), 0.1)
}
@Test
fun testNormalDistributionBlocking() {
val distribution = Distribution.normal(7.0, 2.0)
val distribution = GaussianSampler.of(7.0, 2.0)
val generator = RandomGenerator.default(1)
val sample = distribution.sample(generator).nextBlock(1000)
val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) }
Assertions.assertEquals(7.0, sample.average(), 0.1)
}
}

View File

@ -7,11 +7,8 @@ class SamplerTest {
@Test
fun bufferSamplerTest() {
val sampler: Sampler<Double> =
BasicSampler { it.chain { nextDouble() } }
val sampler = Sampler { it.chain { nextDouble() } }
val data = sampler.sampleBuffer(RandomGenerator.default, 100)
runBlocking {
println(data.next())
}
runBlocking { println(data.next()) }
}
}