Merge pull request #164 from mipt-npm/feature/mp-samplers

Implement Commons RNG-like samplers in kmath-prob module for Multiplatform
This commit is contained in:
Alexander Nozik 2021-03-31 09:25:44 +03:00 committed by GitHub
commit b4fc311668
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 1596 additions and 227 deletions

View File

@ -106,6 +106,7 @@ kotlin.sourceSets.all {
with(languageSettings) { with(languageSettings) {
useExperimentalAnnotation("kotlin.contracts.ExperimentalContracts") useExperimentalAnnotation("kotlin.contracts.ExperimentalContracts")
useExperimentalAnnotation("kotlin.ExperimentalUnsignedTypes") useExperimentalAnnotation("kotlin.ExperimentalUnsignedTypes")
useExperimentalAnnotation("space.kscience.kmath.misc.UnstableKMathAPI")
} }
} }

View File

@ -13,9 +13,8 @@ import space.kscience.kmath.optimization.OptimizationResult
import space.kscience.kmath.real.DoubleVector import space.kscience.kmath.real.DoubleVector
import space.kscience.kmath.real.map import space.kscience.kmath.real.map
import space.kscience.kmath.real.step import space.kscience.kmath.real.step
import space.kscience.kmath.stat.Distribution
import space.kscience.kmath.stat.RandomGenerator import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.normal import space.kscience.kmath.stat.distributions.NormalDistribution
import space.kscience.kmath.structures.asIterable import space.kscience.kmath.structures.asIterable
import space.kscience.kmath.structures.toList import space.kscience.kmath.structures.toList
import kotlin.math.pow import kotlin.math.pow
@ -37,10 +36,9 @@ operator fun TraceValues.invoke(vector: DoubleVector) {
/** /**
* Least squares fie with auto-differentiation. Uses `kmath-commons` and `kmath-for-real` modules. * Least squares fie with auto-differentiation. Uses `kmath-commons` and `kmath-for-real` modules.
*/ */
fun main() { suspend fun main() {
//A generator for a normally distributed values //A generator for a normally distributed values
val generator = Distribution.normal() val generator = NormalDistribution(2.0, 7.0)
//A chain/flow of random values with the given seed //A chain/flow of random values with the given seed
val chain = generator.sample(RandomGenerator.default(112667)) val chain = generator.sample(RandomGenerator.default(112667))
@ -53,7 +51,7 @@ fun main() {
//Perform an operation on each x value (much more effective, than numpy) //Perform an operation on each x value (much more effective, than numpy)
val y = x.map { val y = x.map {
val value = it.pow(2) + it + 1 val value = it.pow(2) + it + 1
value + chain.nextDouble() * sqrt(value) value + chain.next() * sqrt(value)
} }
// this will also work, but less effective: // this will also work, but less effective:
// val y = x.pow(2)+ x + 1 + chain.nextDouble() // val y = x.pow(2)+ x + 1 + chain.nextDouble()

View File

@ -1,23 +1,24 @@
package kscience.kmath.commons.prob package space.kscience.kmath.stat
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async import kotlinx.coroutines.async
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler import space.kscience.kmath.stat.samplers.GaussianSampler
import org.apache.commons.rng.simple.RandomSource import org.apache.commons.rng.simple.RandomSource
import space.kscience.kmath.stat.*
import java.time.Duration import java.time.Duration
import java.time.Instant import java.time.Instant
import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler
import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler as CMZigguratNormalizedGaussianSampler
private fun runChain(): Duration { private suspend fun runKMathChained(): Duration {
val generator = RandomGenerator.fromSource(RandomSource.MT, 123L) val generator = RandomGenerator.fromSource(RandomSource.MT, 123L)
val normal = Distribution.normal(NormalSamplerMethod.Ziggurat) val normal = GaussianSampler.of(7.0, 2.0)
val chain = normal.sample(generator) val chain = normal.sample(generator).blocking()
val startTime = Instant.now() val startTime = Instant.now()
var sum = 0.0 var sum = 0.0
repeat(10000001) { counter -> repeat(10000001) { counter ->
sum += chain.nextDouble() sum += chain.next()
if (counter % 100000 == 0) { if (counter % 100000 == 0) {
val duration = Duration.between(startTime, Instant.now()) val duration = Duration.between(startTime, Instant.now())
@ -29,9 +30,15 @@ private fun runChain(): Duration {
return Duration.between(startTime, Instant.now()) return Duration.between(startTime, Instant.now())
} }
private fun runDirect(): Duration { private fun runApacheDirect(): Duration {
val provider = RandomSource.create(RandomSource.MT, 123L) val rng = RandomSource.create(RandomSource.MT, 123L)
val sampler = ZigguratNormalizedGaussianSampler(provider)
val sampler = CMGaussianSampler.of(
CMZigguratNormalizedGaussianSampler.of(rng),
7.0,
2.0
)
val startTime = Instant.now() val startTime = Instant.now()
var sum = 0.0 var sum = 0.0
@ -51,11 +58,9 @@ private fun runDirect(): Duration {
/** /**
* Comparing chain sampling performance with direct sampling performance * Comparing chain sampling performance with direct sampling performance
*/ */
fun main() { fun main(): Unit = runBlocking(Dispatchers.Default) {
runBlocking(Dispatchers.Default) { val chainJob = async { runKMathChained() }
val chainJob = async { runChain() } val directJob = async { runApacheDirect() }
val directJob = async { runDirect() } println("KMath Chained: ${chainJob.await()}")
println("Chain: ${chainJob.await()}") println("Apache Direct: ${directJob.await()}")
println("Direct: ${directJob.await()}")
}
} }

View File

@ -3,14 +3,15 @@ package space.kscience.kmath.stat
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.collectWithState import space.kscience.kmath.chains.collectWithState
import space.kscience.kmath.stat.distributions.NormalDistribution
/** /**
* The state of distribution averager * The state of distribution averager.
*/ */
private data class AveragingChainState(var num: Int = 0, var value: Double = 0.0) private data class AveragingChainState(var num: Int = 0, var value: Double = 0.0)
/** /**
* Averaging * Averaging.
*/ */
private fun Chain<Double>.mean(): Chain<Double> = collectWithState(AveragingChainState(), { it.copy() }) { chain -> private fun Chain<Double>.mean(): Chain<Double> = collectWithState(AveragingChainState(), { it.copy() }) { chain ->
val next = chain.next() val next = chain.next()
@ -21,7 +22,7 @@ private fun Chain<Double>.mean(): Chain<Double> = collectWithState(AveragingChai
fun main() { fun main() {
val normal = Distribution.normal() val normal = NormalDistribution(0.0, 2.0)
val chain = normal.sample(RandomGenerator.default).mean() val chain = normal.sample(RandomGenerator.default).mean()
runBlocking { runBlocking {

View File

@ -1,13 +1,13 @@
package space.kscience.kmath.commons.optimization package space.kscience.kmath.commons.optimization
import org.junit.jupiter.api.Test import kotlinx.coroutines.runBlocking
import space.kscience.kmath.commons.expressions.DerivativeStructureExpression import space.kscience.kmath.commons.expressions.DerivativeStructureExpression
import space.kscience.kmath.misc.symbol import space.kscience.kmath.misc.symbol
import space.kscience.kmath.optimization.FunctionOptimization import space.kscience.kmath.optimization.FunctionOptimization
import space.kscience.kmath.stat.Distribution
import space.kscience.kmath.stat.RandomGenerator import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.normal import space.kscience.kmath.stat.distributions.NormalDistribution
import kotlin.math.pow import kotlin.math.pow
import kotlin.test.Test
internal class OptimizeTest { internal class OptimizeTest {
val x by symbol val x by symbol
@ -34,23 +34,24 @@ internal class OptimizeTest {
simplexSteps(x to 2.0, y to 0.5) simplexSteps(x to 2.0, y to 0.5)
//this sets simplex optimizer //this sets simplex optimizer
} }
println(result.point) println(result.point)
println(result.value) println(result.value)
} }
@Test @Test
fun testCmFit() { fun testCmFit() = runBlocking {
val a by symbol val a by symbol
val b by symbol val b by symbol
val c by symbol val c by symbol
val sigma = 1.0 val sigma = 1.0
val generator = Distribution.normal(0.0, sigma) val generator = NormalDistribution(0.0, sigma)
val chain = generator.sample(RandomGenerator.default(112667)) val chain = generator.sample(RandomGenerator.default(112667))
val x = (1..100).map(Int::toDouble) val x = (1..100).map(Int::toDouble)
val y = x.map { val y = x.map {
it.pow(2) + it + 1 + chain.nextDouble() it.pow(2) + it + 1 + chain.next()
} }
val yErr = List(x.size) { sigma } val yErr = List(x.size) { sigma }
@ -64,5 +65,4 @@ internal class OptimizeTest {
println(result) println(result)
println("Chi2/dof = ${result.value / (x.size - 3)}") println("Chi2/dof = ${result.value / (x.size - 3)}")
} }
} }

View File

@ -241,18 +241,18 @@ public class BigInt internal constructor(
) )
private fun compareMagnitudes(mag1: Magnitude, mag2: Magnitude): Int { private fun compareMagnitudes(mag1: Magnitude, mag2: Magnitude): Int {
when { return when {
mag1.size > mag2.size -> return 1 mag1.size > mag2.size -> 1
mag1.size < mag2.size -> return -1 mag1.size < mag2.size -> -1
else -> { else -> {
for (i in mag1.size - 1 downTo 0) { for (i in mag1.size - 1 downTo 0) return when {
if (mag1[i] > mag2[i]) { mag1[i] > mag2[i] -> 1
return 1 mag1[i] < mag2[i] -> -1
} else if (mag1[i] < mag2[i]) { else -> continue
return -1
}
} }
return 0
0
} }
} }
} }
@ -302,10 +302,11 @@ public class BigInt internal constructor(
var carry = 0uL var carry = 0uL
for (i in mag.indices) { for (i in mag.indices) {
val cur: ULong = carry + mag[i].toULong() * x.toULong() val cur = carry + mag[i].toULong() * x.toULong()
result[i] = (cur and BASE).toUInt() result[i] = (cur and BASE).toUInt()
carry = cur shr BASE_SIZE carry = cur shr BASE_SIZE
} }
result[resultLength - 1] = (carry and BASE).toUInt() result[resultLength - 1] = (carry and BASE).toUInt()
return stripLeadingZeros(result) return stripLeadingZeros(result)

View File

@ -40,7 +40,6 @@ public interface Buffer<out T> {
public operator fun iterator(): Iterator<T> public operator fun iterator(): Iterator<T>
public companion object { public companion object {
/** /**
* Check the element-by-element match of content of two buffers. * Check the element-by-element match of content of two buffers.
*/ */
@ -110,7 +109,6 @@ public interface MutableBuffer<T> : Buffer<T> {
public fun copy(): MutableBuffer<T> public fun copy(): MutableBuffer<T>
public companion object { public companion object {
/** /**
* Creates a [DoubleBuffer] with the specified [size], where each element is calculated by calling the specified * Creates a [DoubleBuffer] with the specified [size], where each element is calculated by calling the specified
* [initializer] function. * [initializer] function.

View File

@ -38,12 +38,11 @@ class NumberNDFieldTest {
(i * 10 + j).toDouble() (i * 10 + j).toDouble()
} }
for (i in 0..2) { for (i in 0..2)
for (j in 0..2) { for (j in 0..2) {
val expected = (i * 10 + j).toDouble() val expected = (i * 10 + j).toDouble()
assertEquals(expected, array[i, j], "Error at index [$i, $j]") assertEquals(expected, array[i, j], "Error at index [$i, $j]")
} }
}
} }
@Test @Test

View File

@ -1,12 +1,13 @@
package space.kscience.kmath.chains package space.kscience.kmath.chains
/** /**
* Performance optimized chain for real values * Chunked, specialized chain for real values.
*/ */
public abstract class BlockingDoubleChain : Chain<Double> { public interface BlockingDoubleChain : Chain<Double> {
public abstract fun nextDouble(): Double public override suspend fun next(): Double
override suspend fun next(): Double = nextDouble() /**
* Returns an [DoubleArray] chunk of [size] values of [next].
public open fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { nextDouble() } */
public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() }
} }

View File

@ -3,10 +3,7 @@ package space.kscience.kmath.chains
/** /**
* Performance optimized chain for integer values * Performance optimized chain for integer values
*/ */
public abstract class BlockingIntChain : Chain<Int> { public interface BlockingIntChain : Chain<Int> {
public abstract fun nextInt(): Int public override suspend fun next(): Int
public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
override suspend fun next(): Int = nextInt()
public fun nextBlock(size: Int): IntArray = IntArray(size) { nextInt() }
} }

View File

@ -63,12 +63,10 @@ public class MarkovChain<out R : Any>(private val seed: suspend () -> R, private
public fun value(): R? = value public fun value(): R? = value
public override suspend fun next(): R { public override suspend fun next(): R = mutex.withLock {
mutex.withLock { val newValue = gen(value ?: seed())
val newValue = gen(value ?: seed()) value = newValue
value = newValue newValue
return newValue
}
} }
public override fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen) public override fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
@ -90,12 +88,10 @@ public class StatefulChain<S, out R>(
public fun value(): R? = value public fun value(): R? = value
public override suspend fun next(): R { public override suspend fun next(): R = mutex.withLock {
mutex.withLock { val newValue = state.gen(value ?: state.seed())
val newValue = state.gen(value ?: state.seed()) value = newValue
value = newValue newValue
return newValue
}
} }
public override fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen) public override fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)

View File

@ -28,7 +28,7 @@ public fun <T> Flow<T>.chunked(bufferSize: Int, bufferFactory: BufferFactory<T>)
var counter = 0 var counter = 0
this@chunked.collect { element -> this@chunked.collect { element ->
list.add(element) list += element
counter++ counter++
if (counter == bufferSize) { if (counter == bufferSize) {

View File

@ -48,11 +48,9 @@ public class RingBuffer<T>(
/** /**
* A safe snapshot operation * A safe snapshot operation
*/ */
public suspend fun snapshot(): Buffer<T> { public suspend fun snapshot(): Buffer<T> = mutex.withLock {
mutex.withLock { val copy = buffer.copy()
val copy = buffer.copy() VirtualBuffer(size) { i -> copy[startIndex.forward(i)] as T }
return VirtualBuffer(size) { i -> copy[startIndex.forward(i)] as T }
}
} }
public suspend fun push(element: T) { public suspend fun push(element: T) {

View File

@ -1,4 +1,4 @@
package kscience.dimensions package space.kscience.dimensions
import space.kscience.kmath.dimensions.D2 import space.kscience.kmath.dimensions.D2
import space.kscience.kmath.dimensions.D3 import space.kscience.kmath.dimensions.D3

View File

@ -38,8 +38,8 @@ public class OrderedPiecewisePolynomial<T : Comparable<T>>(delimiter: T) :
*/ */
public fun putRight(right: T, piece: Polynomial<T>) { public fun putRight(right: T, piece: Polynomial<T>) {
require(right > delimiters.last()) { "New delimiter should be to the right of old one" } require(right > delimiters.last()) { "New delimiter should be to the right of old one" }
delimiters.add(right) delimiters += right
pieces.add(piece) pieces += piece
} }
/** /**

View File

@ -1,17 +1,29 @@
package space.kscience.kmath.stat package space.kscience.kmath.stat
import kotlinx.coroutines.flow.first
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.collect import space.kscience.kmath.chains.collect
import space.kscience.kmath.structures.Buffer import space.kscience.kmath.structures.Buffer
import space.kscience.kmath.structures.BufferFactory import space.kscience.kmath.structures.BufferFactory
import space.kscience.kmath.structures.DoubleBuffer import space.kscience.kmath.structures.IntBuffer
import space.kscience.kmath.structures.MutableBuffer
import kotlin.jvm.JvmName
public interface Sampler<T : Any> { /**
* Sampler that generates chains of values of type [T].
*/
public fun interface Sampler<T : Any> {
/**
* Generates a chain of samples.
*
* @param generator the randomness provider.
* @return the new chain.
*/
public fun sample(generator: RandomGenerator): Chain<T> public fun sample(generator: RandomGenerator): Chain<T>
} }
/** /**
* A distribution of typed objects * A distribution of typed objects.
*/ */
public interface Distribution<T : Any> : Sampler<T> { public interface Distribution<T : Any> : Sampler<T> {
/** /**
@ -20,11 +32,7 @@ public interface Distribution<T : Any> : Sampler<T> {
*/ */
public fun probability(arg: T): Double public fun probability(arg: T): Double
/** public override fun sample(generator: RandomGenerator): Chain<T>
* Create a chain of samples from this distribution.
* The chain is not guaranteed to be stateless, but different sample chains should be independent.
*/
override fun sample(generator: RandomGenerator): Chain<T>
/** /**
* An empty companion. Distribution factories should be written as its extensions * An empty companion. Distribution factories should be written as its extensions
@ -63,16 +71,27 @@ public fun <T : Any> Sampler<T>.sampleBuffer(
//clear list from previous run //clear list from previous run
tmp.clear() tmp.clear()
//Fill list //Fill list
repeat(size) { repeat(size) { tmp += chain.next() }
tmp.add(chain.next())
}
//return new buffer with elements from tmp //return new buffer with elements from tmp
bufferFactory(size) { tmp[it] } bufferFactory(size) { tmp[it] }
} }
} }
/** /**
* Generate a bunch of samples from real distributions * Samples one value from this [Sampler].
*/ */
public suspend fun <T : Any> Sampler<T>.next(generator: RandomGenerator): T = sample(generator).first()
/**
* Generates [size] real samples and chunks them into some buffers.
*/
@JvmName("sampleRealBuffer")
public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> = public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> =
sampleBuffer(generator, size, ::DoubleBuffer) sampleBuffer(generator, size, MutableBuffer.Companion::double)
/**
* Generates [size] integer samples and chunks them into some buffers.
*/
@JvmName("sampleIntBuffer")
public fun Sampler<Int>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Int>> =
sampleBuffer(generator, size, ::IntBuffer)

View File

@ -14,7 +14,7 @@ public interface NamedDistribution<T> : Distribution<Map<String, T>>
public class FactorizedDistribution<T>(public val distributions: Collection<NamedDistribution<T>>) : public class FactorizedDistribution<T>(public val distributions: Collection<NamedDistribution<T>>) :
NamedDistribution<T> { NamedDistribution<T> {
override fun probability(arg: Map<String, T>): Double = override fun probability(arg: Map<String, T>): Double =
distributions.fold(1.0) { acc, distr -> acc * distr.probability(arg) } distributions.fold(1.0) { acc, dist -> acc * dist.probability(arg) }
override fun sample(generator: RandomGenerator): Chain<Map<String, T>> { override fun sample(generator: RandomGenerator): Chain<Map<String, T>> {
val chains = distributions.map { it.sample(generator) } val chains = distributions.map { it.sample(generator) }
@ -38,6 +38,6 @@ public class DistributionBuilder<T : Any> {
private val distributions = ArrayList<NamedDistribution<T>>() private val distributions = ArrayList<NamedDistribution<T>>()
public infix fun String.to(distribution: Distribution<T>) { public infix fun String.to(distribution: Distribution<T>) {
distributions.add(NamedDistributionWrapper(this, distribution)) distributions += NamedDistributionWrapper(this, distribution)
} }
} }

View File

@ -1,17 +1,22 @@
package space.kscience.kmath.stat package space.kscience.kmath.stat
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.chains.Chain import space.kscience.kmath.chains.Chain
/** /**
* A possibly stateful chain producing random values. * A possibly stateful chain producing random values.
*
* @property generator the underlying [RandomGenerator] instance.
*/ */
public class RandomChain<out R>( public class RandomChain<out R>(
public val generator: RandomGenerator, public val generator: RandomGenerator,
private val gen: suspend RandomGenerator.() -> R, private val gen: suspend RandomGenerator.() -> R
) : Chain<R> { ) : Chain<R> {
override suspend fun next(): R = generator.gen() override suspend fun next(): R = generator.gen()
override fun fork(): Chain<R> = RandomChain(generator.fork(), gen) override fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
} }
public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen) public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
public fun Chain<Double>.blocking(): BlockingDoubleChain = object : Chain<Double> by this, BlockingDoubleChain {}
public fun Chain<Int>.blocking(): BlockingIntChain = object : Chain<Int> by this, BlockingIntChain {}

View File

@ -82,6 +82,8 @@ public interface RandomGenerator {
/** /**
* Implements [RandomGenerator] by delegating all operations to [Random]. * Implements [RandomGenerator] by delegating all operations to [Random].
*
* @property random the underlying [Random] object.
*/ */
public class DefaultGenerator(public val random: Random = Random) : RandomGenerator { public class DefaultGenerator(public val random: Random = Random) : RandomGenerator {
public override fun nextBoolean(): Boolean = random.nextBoolean() public override fun nextBoolean(): Boolean = random.nextBoolean()

View File

@ -8,16 +8,28 @@ import space.kscience.kmath.operations.Group
import space.kscience.kmath.operations.ScaleOperations import space.kscience.kmath.operations.ScaleOperations
import space.kscience.kmath.operations.invoke import space.kscience.kmath.operations.invoke
public class BasicSampler<T : Any>(public val chainBuilder: (RandomGenerator) -> Chain<T>) : Sampler<T> { /**
public override fun sample(generator: RandomGenerator): Chain<T> = chainBuilder(generator) * Implements [Sampler] by sampling only certain [value].
} *
* @property value the value to sample.
*/
public class ConstantSampler<T : Any>(public val value: T) : Sampler<T> { public class ConstantSampler<T : Any>(public val value: T) : Sampler<T> {
public override fun sample(generator: RandomGenerator): Chain<T> = ConstantChain(value) public override fun sample(generator: RandomGenerator): Chain<T> = ConstantChain(value)
} }
/** /**
* A space for samplers. Allows to perform simple operations on distributions * Implements [Sampler] by delegating sampling to value of [chainBuilder].
*
* @property chainBuilder the provider of [Chain].
*/
public class BasicSampler<T : Any>(public val chainBuilder: (RandomGenerator) -> Chain<T>) : Sampler<T> {
public override fun sample(generator: RandomGenerator): Chain<T> = chainBuilder(generator)
}
/**
* A space of samplers. Allows to perform simple operations on distributions.
*
* @property algebra the space to provide addition and scalar multiplication for [T].
*/ */
public class SamplerSpace<T : Any, S>(public val algebra: S) : Group<Sampler<T>>, public class SamplerSpace<T : Any, S>(public val algebra: S) : Group<Sampler<T>>,
ScaleOperations<Sampler<T>> where S : Group<T>, S : ScaleOperations<T> { ScaleOperations<Sampler<T>> where S : Group<T>, S : ScaleOperations<T> {
@ -29,8 +41,10 @@ public class SamplerSpace<T : Any, S>(public val algebra: S) : Group<Sampler<T>>
} }
public override fun scale(a: Sampler<T>, value: Double): Sampler<T> = BasicSampler { generator -> public override fun scale(a: Sampler<T>, value: Double): Sampler<T> = BasicSampler { generator ->
a.sample(generator).map { algebra { it * value } } a.sample(generator).map { a ->
algebra { a * value }
}
} }
override fun Sampler<T>.unaryMinus(): Sampler<T> = scale(this, -1.0) public override fun Sampler<T>.unaryMinus(): Sampler<T> = scale(this, -1.0)
} }

View File

@ -0,0 +1,41 @@
package space.kscience.kmath.stat.distributions
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.UnivariateDistribution
import space.kscience.kmath.stat.internal.InternalErf
import space.kscience.kmath.stat.samplers.GaussianSampler
import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler
import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler
import kotlin.math.*
/**
* Implements [UnivariateDistribution] for the normal (gaussian) distribution.
*/
public inline class NormalDistribution(public val sampler: GaussianSampler) : UnivariateDistribution<Double> {
public constructor(
mean: Double,
standardDeviation: Double,
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(),
) : this(GaussianSampler.of(mean, standardDeviation, normalized))
public override fun probability(arg: Double): Double {
val x1 = (arg - sampler.mean) / sampler.standardDeviation
return exp(-0.5 * x1 * x1 - (ln(sampler.standardDeviation) + 0.5 * ln(2 * PI)))
}
public override fun sample(generator: RandomGenerator): Chain<Double> = sampler.sample(generator)
public override fun cumulative(arg: Double): Double {
val dev = arg - sampler.mean
return when {
abs(dev) > 40 * sampler.standardDeviation -> if (dev < 0) 0.0 else 1.0
else -> 0.5 * InternalErf.erfc(-dev / (sampler.standardDeviation * SQRT2))
}
}
private companion object {
private val SQRT2 = sqrt(2.0)
}
}

View File

@ -0,0 +1,15 @@
package space.kscience.kmath.stat.internal
import kotlin.math.abs
/**
* Based on Commons Math implementation.
* See [https://commons.apache.org/proper/commons-math/javadocs/api-3.3/org/apache/commons/math3/special/Erf.html].
*/
internal object InternalErf {
fun erfc(x: Double): Double {
if (abs(x) > 40) return if (x > 0) 0.0 else 2.0
val ret = InternalGamma.regularizedGammaQ(0.5, x * x, 10000)
return if (x < 0) 2 - ret else ret
}
}

View File

@ -0,0 +1,238 @@
package space.kscience.kmath.stat.internal
import kotlin.math.*
private abstract class ContinuedFraction protected constructor() {
protected abstract fun getA(n: Int, x: Double): Double
protected abstract fun getB(n: Int, x: Double): Double
fun evaluate(x: Double, maxIterations: Int): Double {
val small = 1e-50
var hPrev = getA(0, x)
if (hPrev == 0.0 || abs(0.0 - hPrev) <= small) hPrev = small
var n = 1
var dPrev = 0.0
var cPrev = hPrev
var hN = hPrev
while (n < maxIterations) {
val a = getA(n, x)
val b = getB(n, x)
var dN = a + b * dPrev
if (dN == 0.0 || abs(0.0 - dN) <= small) dN = small
var cN = a + b / cPrev
if (cN == 0.0 || abs(0.0 - cN) <= small) cN = small
dN = 1 / dN
val deltaN = cN * dN
hN = hPrev * deltaN
check(!hN.isInfinite()) { "hN is infinite" }
check(!hN.isNaN()) { "hN is NaN" }
if (abs(deltaN - 1.0) < 10e-9) break
dPrev = dN
cPrev = cN
hPrev = hN
n++
}
check(n < maxIterations) { "n is more than maxIterations" }
return hN
}
}
internal object InternalGamma {
const val LANCZOS_G = 607.0 / 128.0
private val LANCZOS = doubleArrayOf(
0.99999999999999709182,
57.156235665862923517,
-59.597960355475491248,
14.136097974741747174,
-0.49191381609762019978,
.33994649984811888699e-4,
.46523628927048575665e-4,
-.98374475304879564677e-4,
.15808870322491248884e-3,
-.21026444172410488319e-3,
.21743961811521264320e-3,
-.16431810653676389022e-3,
.84418223983852743293e-4,
-.26190838401581408670e-4,
.36899182659531622704e-5
)
private val HALF_LOG_2_PI = 0.5 * ln(2.0 * PI)
private const val INV_GAMMA1P_M1_A0 = .611609510448141581788E-08
private const val INV_GAMMA1P_M1_A1 = .624730830116465516210E-08
private const val INV_GAMMA1P_M1_B1 = .203610414066806987300E+00
private const val INV_GAMMA1P_M1_B2 = .266205348428949217746E-01
private const val INV_GAMMA1P_M1_B3 = .493944979382446875238E-03
private const val INV_GAMMA1P_M1_B4 = -.851419432440314906588E-05
private const val INV_GAMMA1P_M1_B5 = -.643045481779353022248E-05
private const val INV_GAMMA1P_M1_B6 = .992641840672773722196E-06
private const val INV_GAMMA1P_M1_B7 = -.607761895722825260739E-07
private const val INV_GAMMA1P_M1_B8 = .195755836614639731882E-09
private const val INV_GAMMA1P_M1_P0 = .6116095104481415817861E-08
private const val INV_GAMMA1P_M1_P1 = .6871674113067198736152E-08
private const val INV_GAMMA1P_M1_P2 = .6820161668496170657918E-09
private const val INV_GAMMA1P_M1_P3 = .4686843322948848031080E-10
private const val INV_GAMMA1P_M1_P4 = .1572833027710446286995E-11
private const val INV_GAMMA1P_M1_P5 = -.1249441572276366213222E-12
private const val INV_GAMMA1P_M1_P6 = .4343529937408594255178E-14
private const val INV_GAMMA1P_M1_Q1 = .3056961078365221025009E+00
private const val INV_GAMMA1P_M1_Q2 = .5464213086042296536016E-01
private const val INV_GAMMA1P_M1_Q3 = .4956830093825887312020E-02
private const val INV_GAMMA1P_M1_Q4 = .2692369466186361192876E-03
private const val INV_GAMMA1P_M1_C = -.422784335098467139393487909917598E+00
private const val INV_GAMMA1P_M1_C0 = .577215664901532860606512090082402E+00
private const val INV_GAMMA1P_M1_C1 = -.655878071520253881077019515145390E+00
private const val INV_GAMMA1P_M1_C2 = -.420026350340952355290039348754298E-01
private const val INV_GAMMA1P_M1_C3 = .166538611382291489501700795102105E+00
private const val INV_GAMMA1P_M1_C4 = -.421977345555443367482083012891874E-01
private const val INV_GAMMA1P_M1_C5 = -.962197152787697356211492167234820E-02
private const val INV_GAMMA1P_M1_C6 = .721894324666309954239501034044657E-02
private const val INV_GAMMA1P_M1_C7 = -.116516759185906511211397108401839E-02
private const val INV_GAMMA1P_M1_C8 = -.215241674114950972815729963053648E-03
private const val INV_GAMMA1P_M1_C9 = .128050282388116186153198626328164E-03
private const val INV_GAMMA1P_M1_C10 = -.201348547807882386556893914210218E-04
private const val INV_GAMMA1P_M1_C11 = -.125049348214267065734535947383309E-05
private const val INV_GAMMA1P_M1_C12 = .113302723198169588237412962033074E-05
private const val INV_GAMMA1P_M1_C13 = -.205633841697760710345015413002057E-06
fun logGamma(x: Double): Double = when {
x.isNaN() || x <= 0.0 -> Double.NaN
x < 0.5 -> logGamma1p(x) - ln(x)
x <= 2.5 -> logGamma1p(x - 0.5 - 0.5)
x <= 8.0 -> {
val n = floor(x - 1.5).toInt()
val prod = (1..n).fold(1.0, { prod, i -> prod * (x - i) })
logGamma1p(x - (n + 1)) + ln(prod)
}
else -> {
val tmp = x + LANCZOS_G + .5
(x + .5) * ln(tmp) - tmp + HALF_LOG_2_PI + ln(lanczos(x) / x)
}
}
private fun regularizedGammaP(
a: Double,
x: Double,
maxIterations: Int = Int.MAX_VALUE
): Double = when {
a.isNaN() || x.isNaN() || a <= 0.0 || x < 0.0 -> Double.NaN
x == 0.0 -> 0.0
x >= a + 1 -> 1.0 - regularizedGammaQ(a, x, maxIterations)
else -> {
// calculate series
var n = 0.0 // current element index
var an = 1.0 / a // n-th element in the series
var sum = an // partial sum
while (abs(an / sum) > 10e-15 && n < maxIterations && sum < Double.POSITIVE_INFINITY) {
// compute next element in the series
n += 1.0
an *= x / (a + n)
// update partial sum
sum += an
}
when {
n >= maxIterations -> throw error("Maximal iterations is exceeded $maxIterations")
sum.isInfinite() -> 1.0
else -> exp(-x + a * ln(x) - logGamma(a)) * sum
}
}
}
fun regularizedGammaQ(
a: Double,
x: Double,
maxIterations: Int = Int.MAX_VALUE
): Double = when {
a.isNaN() || x.isNaN() || a <= 0.0 || x < 0.0 -> Double.NaN
x == 0.0 -> 1.0
x < a + 1.0 -> 1.0 - regularizedGammaP(a, x, maxIterations)
else -> 1.0 / object : ContinuedFraction() {
override fun getA(n: Int, x: Double): Double = 2.0 * n + 1.0 - a + x
override fun getB(n: Int, x: Double): Double = n * (a - n)
}.evaluate(x, maxIterations) * exp(-x + a * ln(x) - logGamma(a))
}
private fun lanczos(x: Double): Double =
(LANCZOS.size - 1 downTo 1).sumByDouble { LANCZOS[it] / (x + it) } + LANCZOS[0]
private fun invGamma1pm1(x: Double): Double {
require(x >= -0.5)
require(x <= 1.5)
val ret: Double
val t = if (x <= 0.5) x else x - 0.5 - 0.5
if (t < 0.0) {
val a = INV_GAMMA1P_M1_A0 + t * INV_GAMMA1P_M1_A1
var b = INV_GAMMA1P_M1_B8
b = INV_GAMMA1P_M1_B7 + t * b
b = INV_GAMMA1P_M1_B6 + t * b
b = INV_GAMMA1P_M1_B5 + t * b
b = INV_GAMMA1P_M1_B4 + t * b
b = INV_GAMMA1P_M1_B3 + t * b
b = INV_GAMMA1P_M1_B2 + t * b
b = INV_GAMMA1P_M1_B1 + t * b
b = 1.0 + t * b
var c = INV_GAMMA1P_M1_C13 + t * (a / b)
c = INV_GAMMA1P_M1_C12 + t * c
c = INV_GAMMA1P_M1_C11 + t * c
c = INV_GAMMA1P_M1_C10 + t * c
c = INV_GAMMA1P_M1_C9 + t * c
c = INV_GAMMA1P_M1_C8 + t * c
c = INV_GAMMA1P_M1_C7 + t * c
c = INV_GAMMA1P_M1_C6 + t * c
c = INV_GAMMA1P_M1_C5 + t * c
c = INV_GAMMA1P_M1_C4 + t * c
c = INV_GAMMA1P_M1_C3 + t * c
c = INV_GAMMA1P_M1_C2 + t * c
c = INV_GAMMA1P_M1_C1 + t * c
c = INV_GAMMA1P_M1_C + t * c
ret = (if (x > 0.5) t * c / x else x * (c + 0.5 + 0.5))
} else {
var p = INV_GAMMA1P_M1_P6
p = INV_GAMMA1P_M1_P5 + t * p
p = INV_GAMMA1P_M1_P4 + t * p
p = INV_GAMMA1P_M1_P3 + t * p
p = INV_GAMMA1P_M1_P2 + t * p
p = INV_GAMMA1P_M1_P1 + t * p
p = INV_GAMMA1P_M1_P0 + t * p
var q = INV_GAMMA1P_M1_Q4
q = INV_GAMMA1P_M1_Q3 + t * q
q = INV_GAMMA1P_M1_Q2 + t * q
q = INV_GAMMA1P_M1_Q1 + t * q
q = 1.0 + t * q
var c = INV_GAMMA1P_M1_C13 + p / q * t
c = INV_GAMMA1P_M1_C12 + t * c
c = INV_GAMMA1P_M1_C11 + t * c
c = INV_GAMMA1P_M1_C10 + t * c
c = INV_GAMMA1P_M1_C9 + t * c
c = INV_GAMMA1P_M1_C8 + t * c
c = INV_GAMMA1P_M1_C7 + t * c
c = INV_GAMMA1P_M1_C6 + t * c
c = INV_GAMMA1P_M1_C5 + t * c
c = INV_GAMMA1P_M1_C4 + t * c
c = INV_GAMMA1P_M1_C3 + t * c
c = INV_GAMMA1P_M1_C2 + t * c
c = INV_GAMMA1P_M1_C1 + t * c
c = INV_GAMMA1P_M1_C0 + t * c
ret = (if (x > 0.5) t / x * (c - 0.5 - 0.5) else x * c)
}
return ret
}
private fun logGamma1p(x: Double): Double {
require(x >= -0.5)
require(x <= 1.5)
return -ln1p(invGamma1pm1(x))
}
}

View File

@ -0,0 +1,70 @@
package space.kscience.kmath.stat.internal
import kotlin.math.ln
import kotlin.math.min
internal object InternalUtils {
private val FACTORIALS = longArrayOf(
1L, 1L, 2L,
6L, 24L, 120L,
720L, 5040L, 40320L,
362880L, 3628800L, 39916800L,
479001600L, 6227020800L, 87178291200L,
1307674368000L, 20922789888000L, 355687428096000L,
6402373705728000L, 121645100408832000L, 2432902008176640000L
)
private const val BEGIN_LOG_FACTORIALS = 2
fun factorial(n: Int): Long = FACTORIALS[n]
fun validateProbabilities(probabilities: DoubleArray?): Double {
require(!(probabilities == null || probabilities.isEmpty())) { "Probabilities must not be empty." }
val sumProb = probabilities.sumByDouble { prob ->
require(!(prob < 0 || prob.isInfinite() || prob.isNaN())) { "Invalid probability: $prob" }
prob
}
require(!(sumProb.isInfinite() || sumProb <= 0)) { "Invalid sum of probabilities: $sumProb" }
return sumProb
}
class FactorialLog private constructor(numValues: Int, cache: DoubleArray?) {
private val logFactorials: DoubleArray = DoubleArray(numValues)
init {
val endCopy: Int
if (cache != null && cache.size > BEGIN_LOG_FACTORIALS) {
// Copy available values.
endCopy = min(cache.size, numValues)
cache.copyInto(
logFactorials,
BEGIN_LOG_FACTORIALS,
BEGIN_LOG_FACTORIALS, endCopy
)
} else
// All values to be computed
endCopy = BEGIN_LOG_FACTORIALS
// Compute remaining values.
(endCopy until numValues).forEach { i ->
if (i < FACTORIALS.size)
logFactorials[i] = ln(FACTORIALS[i].toDouble())
else
logFactorials[i] = logFactorials[i - 1] + ln(i.toDouble())
}
}
fun value(n: Int): Double {
if (n < logFactorials.size) return logFactorials[n]
return if (n < FACTORIALS.size) ln(FACTORIALS[n].toDouble()) else InternalGamma.logGamma(n + 1.0)
}
companion object {
fun create(): FactorialLog = FactorialLog(0, null)
}
}
}

View File

@ -0,0 +1,73 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import kotlin.math.ln
import kotlin.math.pow
/**
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
*/
public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler<Double> {
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
// Step 1:
var a = 0.0
var u = nextDouble()
// Step 2 and 3:
while (u < 0.5) {
a += EXPONENTIAL_SA_QI[0]
u *= 2.0
}
// Step 4 (now u >= 0.5):
u += u - 1
// Step 5:
if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u)
// Step 6:
var i = 0 // Should be 1, be we iterate before it in while using 0.
var u2 = nextDouble()
var umin = u2
// Step 7 and 8:
do {
++i
u2 = nextDouble()
if (u2 < umin) umin = u2
// Step 8:
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
mean * (a + umin * EXPONENTIAL_SA_QI[0])
}
override fun toString(): String = "Ahrens-Dieter Exponential deviate"
public companion object {
private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) }
init {
/**
* Filling EXPONENTIAL_SA_QI table.
* Note that we don't want qi = 0 in the table.
*/
val ln2 = ln(2.0)
var qi = 0.0
EXPONENTIAL_SA_QI.indices.forEach { i ->
qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1)
EXPONENTIAL_SA_QI[i] = qi
}
}
public fun of(mean: Double): AhrensDieterExponentialSampler {
require(mean > 0) { "mean is not strictly positive: $mean" }
return AhrensDieterExponentialSampler(mean)
}
}
}

View File

@ -0,0 +1,120 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.next
import kotlin.math.*
/**
* Sampling from the [gamma distribution](http://mathworld.wolfram.com/GammaDistribution.html).
* - For 0 < alpha < 1:
* Ahrens, J. H. and Dieter, U., Computer methods for sampling from gamma, beta, Poisson and binomial distributions, Computing, 12, 223-246, 1974.
* - For alpha >= 1:
* Marsaglia and Tsang, A Simple Method for Generating Gamma Variables. ACM Transactions on Mathematical Software, Volume 26 Issue 3, September, 2000.
*
* Based on Commons RNG implementation.
*
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterMarsagliaTsangGammaSampler.html].
*/
public class AhrensDieterMarsagliaTsangGammaSampler private constructor(
alpha: Double,
theta: Double
) : Sampler<Double> {
private val delegate: BaseGammaSampler =
if (alpha < 1) AhrensDieterGammaSampler(alpha, theta) else MarsagliaTsangGammaSampler(alpha, theta)
private abstract class BaseGammaSampler internal constructor(
protected val alpha: Double,
protected val theta: Double
) : Sampler<Double> {
init {
require(alpha > 0) { "alpha is not strictly positive: $alpha" }
require(theta > 0) { "theta is not strictly positive: $theta" }
}
override fun toString(): String = "Ahrens-Dieter-Marsaglia-Tsang Gamma deviate"
}
private class AhrensDieterGammaSampler(alpha: Double, theta: Double) :
BaseGammaSampler(alpha, theta) {
private val oneOverAlpha: Double = 1.0 / alpha
private val bGSOptim: Double = 1.0 + alpha / E
override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
var x: Double
// [1]: p. 228, Algorithm GS.
while (true) {
// Step 1:
val u = generator.nextDouble()
val p = bGSOptim * u
if (p <= 1) {
// Step 2:
x = p.pow(oneOverAlpha)
val u2 = generator.nextDouble()
if (u2 > exp(-x)) // Reject.
continue
break
}
// Step 3:
x = -ln((bGSOptim - p) * oneOverAlpha)
val u2: Double = generator.nextDouble()
if (u2 <= x.pow(alpha - 1.0)) break
// Reject and continue.
}
x * theta
}
}
private class MarsagliaTsangGammaSampler(alpha: Double, theta: Double) :
BaseGammaSampler(alpha, theta) {
private val dOptim: Double
private val cOptim: Double
private val gaussian: NormalizedGaussianSampler
init {
gaussian = ZigguratNormalizedGaussianSampler.of()
dOptim = alpha - ONE_THIRD
cOptim = ONE_THIRD / sqrt(dOptim)
}
override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
var v: Double
while (true) {
val x = gaussian.next(generator)
val oPcTx = 1 + cOptim * x
v = oPcTx * oPcTx * oPcTx
if (v <= 0) continue
val x2 = x * x
val u = generator.nextDouble()
// Squeeze.
if (u < 1 - 0.0331 * x2 * x2) break
if (ln(u) < 0.5 * x2 + dOptim * (1 - v + ln(v))) break
}
theta * dOptim * v
}
companion object {
private const val ONE_THIRD = 1.0 / 3.0
}
}
public override fun sample(generator: RandomGenerator): Chain<Double> = delegate.sample(generator)
public override fun toString(): String = delegate.toString()
public companion object {
public fun of(
alpha: Double,
theta: Double
): Sampler<Double> = AhrensDieterMarsagliaTsangGammaSampler(alpha, theta)
}
}

View File

@ -0,0 +1,286 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import kotlin.math.ceil
import kotlin.math.max
import kotlin.math.min
/**
* Distribution sampler that uses the Alias method. It can be used to sample from n values each with an associated
* probability. This implementation is based on the detailed explanation of the alias method by Keith Schartz and
* implements Vose's algorithm.
*
* Vose, M.D., A linear algorithm for generating random numbers with a given distribution, IEEE Transactions on
* Software Engineering, 17, 972-975, 1991. he algorithm will sample values in O(1) time after a pre-processing step
* of O(n) time.
*
* The alias tables are constructed using fraction probabilities with an assumed denominator of 253. In the generic
* case sampling uses UniformRandomProvider.nextInt(int) and the upper 53-bits from UniformRandomProvider.nextLong().
*
* Zero padding the input probabilities can be used to make more sampling more efficient. Any zero entry will always be
* aliased removing the requirement to compute a long. Increased sampling speed comes at the cost of increased storage
* space. The algorithm requires approximately 12 bytes of storage per input probability, that is n * 12 for size n.
* Zero-padding only requires 4 bytes of storage per padded value as the probability is known to be zero.
*
* An optimisation is performed for small table sizes that are a power of 2. In this case the sampling uses 1 or 2
* calls from UniformRandomProvider.nextInt() to generate up to 64-bits for creation of an 11-bit index and 53-bits
* for the long. This optimisation requires a generator with a high cycle length for the lower order bits.
*
* Larger table sizes that are a power of 2 will benefit from fast algorithms for UniformRandomProvider.nextInt(int)
* that exploit the power of 2.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AliasMethodDiscreteSampler.html].
*/
public open class AliasMethodDiscreteSampler private constructor(
// Deliberate direct storage of input arrays
protected val probability: LongArray,
protected val alias: IntArray
) : Sampler<Int> {
private class SmallTableAliasMethodDiscreteSampler(
probability: LongArray,
alias: IntArray
) : AliasMethodDiscreteSampler(probability, alias) {
// Assume the table size is a power of 2 and create the mask
private val mask: Int = alias.size - 1
override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
val bits = generator.nextInt()
// Isolate lower bits
val j = bits and mask
// Optimisation for zero-padded input tables
if (j >= probability.size)
// No probability must use the alias
return@chain alias[j]
// Create a uniform random deviate as a long.
// This replicates functionality from the o.a.c.rng.core.utils.NumberFactory.makeLong
val longBits = generator.nextInt().toLong() shl 32 or (bits.toLong() and hex_ffffffff)
// Choose between the two. Use a 53-bit long for the probability.
if (longBits ushr 11 < probability[j]) j else alias[j]
}
private companion object {
private const val hex_ffffffff = 4294967295L
}
}
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// This implements the algorithm as per Vose (1991):
// v = uniform() in [0, 1)
// j = uniform(n) in [0, n)
// if v < prob[j] then
// return j
// else
// return alias[j]
val j = generator.nextInt(alias.size)
// Optimisation for zero-padded input tables
// No probability must use the alias
if (j >= probability.size) return@chain alias[j]
// Note: We could check the probability before computing a deviate.
// p(j) == 0 => alias[j]
// p(j) == 1 => j
// However it is assumed these edge cases are rare:
//
// The probability table will be 1 for approximately 1/n samples, i.e. only the
// last unpaired probability. This is only worth checking for when the table size (n)
// is small. But in that case the user should zero-pad the table for performance.
//
// The probability table will be 0 when an input probability was zero. We
// will assume this is also rare if modelling a discrete distribution where
// all samples are possible. The edge case for zero-padded tables is handled above.
// Choose between the two. Use a 53-bit long for the probability.
if (generator.nextLong() ushr 11 < probability[j]) j else alias[j]
}
public override fun toString(): String = "Alias method"
public companion object {
private const val DEFAULT_ALPHA = 0
private const val ZERO = 0.0
private const val ONE_AS_NUMERATOR = 1L shl 53
private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
public fun of(
probabilities: DoubleArray,
alpha: Int = DEFAULT_ALPHA
): Sampler<Int> {
// The Alias method balances N categories with counts around the mean into N sections,
// each allocated 'mean' observations.
//
// Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
// 2D array as 4 sections with a height of the mean:
//
// 6
// 6
// 6
// 63 => 6366 --
// 632 6326 |-- mean
// 6321 6321 --
//
// section abcd
//
// Each section is divided as:
// a: 6=1/1
// b: 3=1/1
// c: 2=2/3; 6=1/3 (6 is the alias)
// d: 1=1/3; 6=2/3 (6 is the alias)
//
// The sample is obtained by randomly selecting a section, then choosing which category
// from the pair based on a uniform random deviate.
val sumProb = InternalUtils.validateProbabilities(probabilities)
// Allow zero-padding
val n = computeSize(probabilities.size, alpha)
// Partition into small and large by splitting on the average.
val mean = sumProb / n
// The cardinality of smallSize + largeSize = n.
// So fill the same array from either end.
val indices = IntArray(n)
var large = n
var small = 0
probabilities.indices.forEach { i ->
if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i
}
small = fillRemainingIndices(probabilities.size, indices, small)
// This may be smaller than the input length if the probabilities were already padded.
val nonZeroIndex = findLastNonZeroIndex(probabilities)
// The probabilities are modified so use a copy.
// Note: probabilities are required only up to last nonZeroIndex
val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1)
// Allocate the final tables.
// Probability table may be truncated (when zero padded).
// The alias table is full length.
val probability = LongArray(remainingProbabilities.size)
val alias = IntArray(n)
// This loop uses each large in turn to fill the alias table for small probabilities that
// do not reach the requirement to fill an entire section alone (i.e. p < mean).
// Since the sum of the small should be less than the sum of the large it should use up
// all the small first. However floating point round-off can result in
// misclassification of items as small or large. The Vose algorithm handles this using
// a while loop conditioned on the size of both sets and a subsequent loop to use
// unpaired items.
while (large != n && small != 0) {
// Index of the small and the large probabilities.
val j = indices[--small]
val k = indices[large++]
// Optimisation for zero-padded input:
// p(j) = 0 above the last nonZeroIndex
if (j > nonZeroIndex)
// The entire amount for the section is taken from the alias.
remainingProbabilities[k] -= mean
else {
val pj = remainingProbabilities[j]
// Item j is a small probability that is below the mean.
// Compute the weight of the section for item j: pj / mean.
// This is scaled by 2^53 and the ceiling function used to round-up
// the probability to a numerator of a fraction in the range [1,2^53].
// Ceiling ensures non-zero values.
probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong()
// The remaining amount for the section is taken from the alias.
// Effectively: probabilities[k] -= (mean - pj)
remainingProbabilities[k] += pj - mean
}
// If not j then the alias is k
alias[j] = k
// Add the remaining probability from large to the appropriate list.
if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k
}
// Final loop conditions to consume unpaired items.
// Note: The large set should never be non-empty but this can occur due to round-off
// error so consume from both.
fillTable(probability, alias, indices, 0, small)
fillTable(probability, alias, indices, large, n)
// Change the algorithm for small power of 2 sized tables
return if (isSmallPowerOf2(n))
SmallTableAliasMethodDiscreteSampler(probability, alias)
else
AliasMethodDiscreteSampler(probability, alias)
}
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
var updatedSmall = small
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
return updatedSmall
}
private fun findLastNonZeroIndex(probabilities: DoubleArray): Int {
// No bounds check is performed when decrementing as the array contains at least one
// value above zero.
var nonZeroIndex = probabilities.size - 1
while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex--
return nonZeroIndex
}
private fun computeSize(length: Int, alpha: Int): Int {
// If No padding
if (alpha < 0) return length
// Use the number of leading zeros function to find the next power of 2,
// i.e. ceil(log2(x))
var pow2 = 32 - numberOfLeadingZeros(length - 1)
// Increase by the alpha. Clip this to limit to a positive integer (2^30)
pow2 = min(30, pow2 + alpha)
// Use max to handle a length above the highest possible power of 2
return max(length, 1 shl pow2)
}
private fun fillTable(
probability: LongArray,
alias: IntArray,
indices: IntArray,
start: Int,
end: Int
) = (start until end).forEach { i ->
val index = indices[i]
probability[index] = ONE_AS_NUMERATOR
alias[index] = index
}
private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0
private fun numberOfLeadingZeros(i: Int): Int {
var mutI = i
if (mutI <= 0) return if (mutI == 0) 32 else 0
var n = 31
if (mutI >= 1 shl 16) {
n -= 16
mutI = mutI ushr 16
}
if (mutI >= 1 shl 8) {
n -= 8
mutI = mutI ushr 8
}
if (mutI >= 1 shl 4) {
n -= 4
mutI = mutI ushr 4
}
if (mutI >= 1 shl 2) {
n -= 2
mutI = mutI ushr 2
}
return n - (mutI ushr 1)
}
}
}

View File

@ -0,0 +1,48 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.*
/**
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
* distribution.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
*/
public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
private var nextGaussian: Double = Double.NaN
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
val random: Double
if (nextGaussian.isNaN()) {
// Generate a pair of Gaussian numbers.
val x = nextDouble()
val y = nextDouble()
val alpha = 2 * PI * x
val r = sqrt(-2 * ln(y))
// Return the first element of the generated pair.
random = r * cos(alpha)
// Keep second element of the pair for next invocation.
nextGaussian = r * sin(alpha)
} else {
// Use the second element of the pair (generated at the
// previous invocation).
random = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
}
random
}
public override fun toString(): String = "Box-Muller normalized Gaussian deviate"
public companion object {
public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler()
}
}

View File

@ -0,0 +1,43 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.map
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampling from a Gaussian distribution with given mean and standard deviation.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
*
* @property mean the mean of the distribution.
* @property standardDeviation the variance of the distribution.
*/
public class GaussianSampler private constructor(
public val mean: Double,
public val standardDeviation: Double,
private val normalized: NormalizedGaussianSampler
) : Sampler<Double> {
public override fun sample(generator: RandomGenerator): Chain<Double> = normalized
.sample(generator)
.map { standardDeviation * it + mean }
override fun toString(): String = "Gaussian deviate [$normalized]"
public companion object {
public fun of(
mean: Double,
standardDeviation: Double,
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of()
): GaussianSampler {
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
return GaussianSampler(
mean,
standardDeviation,
normalized
)
}
}
}

View File

@ -0,0 +1,63 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
*
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
*
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
*/
public class KempSmallMeanPoissonSampler private constructor(
private val p0: Double,
private val mean: Double
) : Sampler<Int> {
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// Note on the algorithm:
// - X is the unknown sample deviate (the output of the algorithm)
// - x is the current value from the distribution
// - p is the probability of the current value x, p(X=x)
// - u is effectively the cumulative probability that the sample X
// is equal or above the current value x, p(X>=x)
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
var u = nextDouble()
var x = 0
var p = p0
while (u > p) {
u -= p
// Compute the next probability using a recurrence relation.
// p(x+1) = p(x) * mean / (x+1)
p *= mean / ++x
// The algorithm listed in Kemp (1981) does not check that the rolling probability
// is positive. This check is added to ensure no errors when the limit of the summation
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
if (p == 0.0) return@chain x
}
x
}
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
public companion object {
public fun of(mean: Double): KempSmallMeanPoissonSampler {
require(mean > 0) { "Mean is not strictly positive: $mean" }
val p0 = exp(-mean)
// Probability must be positive. As mean increases then p(0) decreases.
require(p0 > 0) { "No probability for mean: $mean" }
return KempSmallMeanPoissonSampler(p0, mean)
}
}
}

View File

@ -0,0 +1,130 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.chains.ConstantChain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import space.kscience.kmath.stat.internal.InternalUtils
import space.kscience.kmath.stat.next
import kotlin.math.*
/**
* Sampler for the Poisson distribution.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
* Computing vol. 26 pp. 197-207.
*
* This sampler is suitable for mean >= 40.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
*/
public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler<Int> {
private val exponential: Sampler<Double> = AhrensDieterExponentialSampler.of(1.0)
private val gaussian: Sampler<Double> = ZigguratNormalizedGaussianSampler.of()
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
private val lambda: Double = floor(mean)
private val logLambda: Double = ln(lambda)
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
private val halfDelta: Double = delta / 2
private val twolpd: Double = 2 * lambda + delta
private val c1: Double = 1 / (8 * lambda)
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
private val aSum: Double = a1 + a2 + 1
private val p1: Double = a1 / aSum
private val p2: Double = a2 / aSum
private val smallMeanPoissonSampler: Sampler<Int> = if (mean - lambda < Double.MIN_VALUE)
NO_SMALL_MEAN_POISSON_SAMPLER
else // Not used.
KempSmallMeanPoissonSampler.of(mean - lambda)
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
// This will never be null. It may be a no-op delegate that returns zero.
val y2 = smallMeanPoissonSampler.next(generator)
var x: Double
var y: Double
var v: Double
var a: Int
var t: Double
var qr: Double
var qa: Double
while (true) {
// Step 1:
val u = generator.nextDouble()
if (u <= p1) {
// Step 2:
val n = gaussian.next(generator)
x = n * sqrt(lambda + halfDelta) - 0.5
if (x > delta || x < -lambda) continue
y = if (x < 0) floor(x) else ceil(x)
val e = exponential.next(generator)
v = -e - 0.5 * n * n + c1
} else {
// Step 3:
if (u > p1 + p2) {
y = lambda
break
}
x = delta + twolpd / delta * exponential.next(generator)
y = ceil(x)
v = -exponential.next(generator) - delta * (x + 1) / twolpd
}
// The Squeeze Principle
// Step 4.1:
a = if (x < 0) 1 else 0
t = y * (y + 1) / (2 * lambda)
// Step 4.2
if (v < -t && a == 0) {
y += lambda
break
}
// Step 4.3:
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
// Step 4.4:
if (v < qa) {
y += lambda
break
}
// Step 4.5:
if (v > qr) continue
// Step 4.6:
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
y += lambda
break
}
}
min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
}
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
public override fun toString(): String = "Large Mean Poisson deviate"
public companion object {
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = Sampler { ConstantChain(0) }
public fun of(mean: Double): LargeMeanPoissonSampler {
require(mean >= 1) { "mean is not >= 1: $mean" }
// The algorithm is not valid if Math.floor(mean) is not an integer.
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
return LargeMeanPoissonSampler(mean)
}
}
}

View File

@ -0,0 +1,61 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.ln
import kotlin.math.sqrt
/**
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
* [BoxMullerNormalizedGaussianSampler].
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
*/
public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
private var nextGaussian = Double.NaN
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
if (nextGaussian.isNaN()) {
val alpha: Double
var x: Double
// Rejection scheme for selecting a pair that lies within the unit circle.
while (true) {
// Generate a pair of numbers within [-1 , 1).
x = 2.0 * generator.nextDouble() - 1.0
val y = 2.0 * generator.nextDouble() - 1.0
val r2 = x * x + y * y
if (r2 < 1 && r2 > 0) {
// Pair (x, y) is within unit circle.
alpha = sqrt(-2 * ln(r2) / r2)
// Keep second element of the pair for next invocation.
nextGaussian = alpha * y
// Return the first element of the generated pair.
break
}
// Pair is not within the unit circle: Generate another one.
}
// Return the first element of the generated pair.
alpha * x
} else {
// Use the second element of the pair (generated at the
// previous invocation).
val r = nextGaussian
// Both elements of the pair have been used.
nextGaussian = Double.NaN
r
}
}
public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate"
public companion object {
public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler()
}
}

View File

@ -0,0 +1,9 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.stat.Sampler
/**
* Marker interface for a sampler that generates values from an N(0,1)
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
*/
public interface NormalizedGaussianSampler : Sampler<Double>

View File

@ -0,0 +1,30 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
*/
public class PoissonSampler private constructor(mean: Double) : Sampler<Int> {
private val poissonSamplerDelegate: Sampler<Int> = of(mean)
public override fun sample(generator: RandomGenerator): Chain<Int> = poissonSamplerDelegate.sample(generator)
public override fun toString(): String = poissonSamplerDelegate.toString()
public companion object {
private const val PIVOT = 40.0
public fun of(mean: Double): Sampler<Int> =// Each sampler should check the input arguments.
if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean)
}
}

View File

@ -0,0 +1,50 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.ceil
import kotlin.math.exp
/**
* Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
* integer-valued distributions: The Poisson distribution. Addison Wesley.
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
*
* Based on Commons RNG implementation.
*
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
*/
public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler<Int> {
private val p0: Double = exp(-mean)
private val limit: Int = (if (p0 > 0)
ceil(1000 * mean)
else
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt()
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
var n = 0
var r = 1.0
while (n < limit) {
r *= nextDouble()
if (r >= p0) n++ else break
}
n
}
public override fun toString(): String = "Small Mean Poisson deviate"
public companion object {
public fun of(mean: Double): SmallMeanPoissonSampler {
require(mean > 0) { "mean is not strictly positive: $mean" }
return SmallMeanPoissonSampler(mean)
}
}
}

View File

@ -0,0 +1,88 @@
package space.kscience.kmath.stat.samplers
import space.kscience.kmath.chains.Chain
import space.kscience.kmath.stat.RandomGenerator
import space.kscience.kmath.stat.Sampler
import space.kscience.kmath.stat.chain
import kotlin.math.*
/**
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
* implementation has been adapted from the C code provided therein.
*
* Based on Commons RNG implementation.
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
*/
public class ZigguratNormalizedGaussianSampler private constructor() :
NormalizedGaussianSampler, Sampler<Double> {
private fun sampleOne(generator: RandomGenerator): Double {
val j = generator.nextLong()
val i = (j and LAST.toLong()).toInt()
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
}
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain { sampleOne(this) }
public override fun toString(): String = "Ziggurat normalized Gaussian deviate"
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
var x = hz * W[iz]
return when {
iz == 0 -> {
var y: Double
do {
y = -ln(generator.nextDouble())
x = -ln(generator.nextDouble()) * ONE_OVER_R
} while (y + y < x * x)
val out = R + x
if (hz > 0) out else -out
}
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
else -> sampleOne(generator)
}
}
public companion object {
private const val R: Double = 3.442619855899
private const val ONE_OVER_R: Double = 1 / R
private const val V: Double = 9.91256303526217e-3
private val MAX: Double = 2.0.pow(63.0)
private val ONE_OVER_MAX: Double = 1.0 / MAX
private const val LEN: Int = 128
private const val LAST: Int = LEN - 1
private val K: LongArray = LongArray(LEN)
private val W: DoubleArray = DoubleArray(LEN)
private val F: DoubleArray = DoubleArray(LEN)
init {
// Filling the tables.
var d = R
var t = d
var fd = gauss(d)
val q = V / fd
K[0] = (d / q * MAX).toLong()
K[1] = 0
W[0] = q * ONE_OVER_MAX
W[LAST] = d * ONE_OVER_MAX
F[0] = 1.0
F[LAST] = fd
(LAST - 1 downTo 1).forEach { i ->
d = sqrt(-2 * ln(V / d + fd))
fd = gauss(d)
K[i + 1] = (d / t * MAX).toLong()
t = d
F[i] = fd
W[i] = d * ONE_OVER_MAX
}
}
public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler()
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
}
}

View File

@ -3,10 +3,14 @@ package space.kscience.kmath.stat
import org.apache.commons.rng.UniformRandomProvider import org.apache.commons.rng.UniformRandomProvider
import org.apache.commons.rng.simple.RandomSource import org.apache.commons.rng.simple.RandomSource
public class RandomSourceGenerator(public val source: RandomSource, seed: Long?) : RandomGenerator { /**
internal val random: UniformRandomProvider = seed?.let { * Implements [RandomGenerator] by delegating all operations to [RandomSource].
RandomSource.create(source, seed) *
} ?: RandomSource.create(source) * @property source the underlying [RandomSource] object.
*/
public class RandomSourceGenerator internal constructor(public val source: RandomSource, seed: Long?) : RandomGenerator {
internal val random: UniformRandomProvider = seed?.let { RandomSource.create(source, seed) }
?: RandomSource.create(source)
public override fun nextBoolean(): Boolean = random.nextBoolean() public override fun nextBoolean(): Boolean = random.nextBoolean()
public override fun nextDouble(): Double = random.nextDouble() public override fun nextDouble(): Double = random.nextDouble()
@ -23,22 +27,84 @@ public class RandomSourceGenerator(public val source: RandomSource, seed: Long?)
public override fun fork(): RandomGenerator = RandomSourceGenerator(source, nextLong()) public override fun fork(): RandomGenerator = RandomSourceGenerator(source, nextLong())
} }
/**
* Implements [UniformRandomProvider] by delegating all operations to [RandomGenerator].
*
* @property generator the underlying [RandomGenerator] object.
*/
public inline class RandomGeneratorProvider(public val generator: RandomGenerator) : UniformRandomProvider { public inline class RandomGeneratorProvider(public val generator: RandomGenerator) : UniformRandomProvider {
/**
* Generates a [Boolean] value.
*
* @return the next random value.
*/
public override fun nextBoolean(): Boolean = generator.nextBoolean() public override fun nextBoolean(): Boolean = generator.nextBoolean()
/**
* Generates a [Float] value between 0 and 1.
*
* @return the next random value between 0 and 1.
*/
public override fun nextFloat(): Float = generator.nextDouble().toFloat() public override fun nextFloat(): Float = generator.nextDouble().toFloat()
public override fun nextBytes(bytes: ByteArray) { /**
generator.fillBytes(bytes) * Generates [Byte] values and places them into a user-supplied array.
} *
* The number of random bytes produced is equal to the length of the the byte array.
*
* @param bytes byte array in which to put the random bytes.
*/
public override fun nextBytes(bytes: ByteArray): Unit = generator.fillBytes(bytes)
/**
* Generates [Byte] values and places them into a user-supplied array.
*
* The array is filled with bytes extracted from random integers. This implies that the number of random bytes
* generated may be larger than the length of the byte array.
*
* @param bytes the array in which to put the generated bytes.
* @param start the index at which to start inserting the generated bytes.
* @param len the number of bytes to insert.
*/
public override fun nextBytes(bytes: ByteArray, start: Int, len: Int) { public override fun nextBytes(bytes: ByteArray, start: Int, len: Int) {
generator.fillBytes(bytes, start, start + len) generator.fillBytes(bytes, start, start + len)
} }
/**
* Generates an [Int] value.
*
* @return the next random value.
*/
public override fun nextInt(): Int = generator.nextInt() public override fun nextInt(): Int = generator.nextInt()
/**
* Generates an [Int] value between 0 (inclusive) and the specified value (exclusive).
*
* @param n the bound on the random number to be returned. Must be positive.
* @return a random integer between 0 (inclusive) and [n] (exclusive).
*/
public override fun nextInt(n: Int): Int = generator.nextInt(n) public override fun nextInt(n: Int): Int = generator.nextInt(n)
/**
* Generates a [Double] value between 0 and 1.
*
* @return the next random value between 0 and 1.
*/
public override fun nextDouble(): Double = generator.nextDouble() public override fun nextDouble(): Double = generator.nextDouble()
/**
* Generates a [Long] value.
*
* @return the next random value.
*/
public override fun nextLong(): Long = generator.nextLong() public override fun nextLong(): Long = generator.nextLong()
/**
* Generates a [Long] value between 0 (inclusive) and the specified value (exclusive).
*
* @param n Bound on the random number to be returned. Must be positive.
* @return a random long value between 0 (inclusive) and [n] (exclusive).
*/
public override fun nextLong(n: Long): Long = generator.nextLong(n) public override fun nextLong(n: Long): Long = generator.nextLong(n)
} }
@ -51,8 +117,14 @@ public fun RandomGenerator.asUniformRandomProvider(): UniformRandomProvider = if
else else
RandomGeneratorProvider(this) RandomGeneratorProvider(this)
/**
* Returns [RandomSourceGenerator] with given [RandomSource] and [seed].
*/
public fun RandomGenerator.Companion.fromSource(source: RandomSource, seed: Long? = null): RandomSourceGenerator = public fun RandomGenerator.Companion.fromSource(source: RandomSource, seed: Long? = null): RandomSourceGenerator =
RandomSourceGenerator(source, seed) RandomSourceGenerator(source, seed)
/**
* Returns [RandomSourceGenerator] with [RandomSource.MT] algorithm and given [seed].
*/
public fun RandomGenerator.Companion.mersenneTwister(seed: Long? = null): RandomSourceGenerator = public fun RandomGenerator.Companion.mersenneTwister(seed: Long? = null): RandomSourceGenerator =
fromSource(RandomSource.MT, seed) fromSource(RandomSource.MT, seed)

View File

@ -1,99 +0,0 @@
package space.kscience.kmath.stat
import org.apache.commons.rng.UniformRandomProvider
import org.apache.commons.rng.sampling.distribution.*
import space.kscience.kmath.chains.BlockingDoubleChain
import space.kscience.kmath.chains.BlockingIntChain
import space.kscience.kmath.chains.Chain
import kotlin.math.PI
import kotlin.math.exp
import kotlin.math.pow
import kotlin.math.sqrt
public abstract class ContinuousSamplerDistribution : Distribution<Double> {
private inner class ContinuousSamplerChain(val generator: RandomGenerator) : BlockingDoubleChain() {
private val sampler = buildCMSampler(generator)
override fun nextDouble(): Double = sampler.sample()
override fun fork(): Chain<Double> = ContinuousSamplerChain(generator.fork())
}
protected abstract fun buildCMSampler(generator: RandomGenerator): ContinuousSampler
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = ContinuousSamplerChain(generator)
}
public abstract class DiscreteSamplerDistribution : Distribution<Int> {
private inner class ContinuousSamplerChain(val generator: RandomGenerator) : BlockingIntChain() {
private val sampler = buildSampler(generator)
override fun nextInt(): Int = sampler.sample()
override fun fork(): Chain<Int> = ContinuousSamplerChain(generator.fork())
}
protected abstract fun buildSampler(generator: RandomGenerator): DiscreteSampler
public override fun sample(generator: RandomGenerator): BlockingIntChain = ContinuousSamplerChain(generator)
}
public enum class NormalSamplerMethod {
BoxMuller,
Marsaglia,
Ziggurat
}
private fun normalSampler(method: NormalSamplerMethod, provider: UniformRandomProvider): NormalizedGaussianSampler =
when (method) {
NormalSamplerMethod.BoxMuller -> BoxMullerNormalizedGaussianSampler(provider)
NormalSamplerMethod.Marsaglia -> MarsagliaNormalizedGaussianSampler(provider)
NormalSamplerMethod.Ziggurat -> ZigguratNormalizedGaussianSampler(provider)
}
public fun Distribution.Companion.normal(
method: NormalSamplerMethod = NormalSamplerMethod.Ziggurat,
): ContinuousSamplerDistribution = object : ContinuousSamplerDistribution() {
override fun buildCMSampler(generator: RandomGenerator): ContinuousSampler {
val provider = generator.asUniformRandomProvider()
return normalSampler(method, provider)
}
override fun probability(arg: Double): Double = exp(-arg.pow(2) / 2) / sqrt(PI * 2)
}
/**
* A univariate normal distribution with given [mean] and [sigma]. [method] defines commons-rng generation method
*/
public fun Distribution.Companion.normal(
mean: Double,
sigma: Double,
method: NormalSamplerMethod = NormalSamplerMethod.Ziggurat,
): ContinuousSamplerDistribution = object : ContinuousSamplerDistribution() {
private val sigma2 = sigma.pow(2)
private val norm = sigma * sqrt(PI * 2)
override fun buildCMSampler(generator: RandomGenerator): ContinuousSampler {
val provider = generator.asUniformRandomProvider()
val normalizedSampler = normalSampler(method, provider)
return GaussianSampler(normalizedSampler, mean, sigma)
}
override fun probability(arg: Double): Double = exp(-(arg - mean).pow(2) / 2 / sigma2) / norm
}
public fun Distribution.Companion.poisson(
lambda: Double,
): DiscreteSamplerDistribution = object : DiscreteSamplerDistribution() {
private val computedProb: HashMap<Int, Double> = hashMapOf(0 to exp(-lambda))
override fun buildSampler(generator: RandomGenerator): DiscreteSampler =
PoissonSampler.of(generator.asUniformRandomProvider(), lambda)
override fun probability(arg: Int): Double {
require(arg >= 0) { "The argument must be >= 0" }
return if (arg > 40)
exp(-(arg - lambda).pow(2) / 2 / lambda) / sqrt(2 * PI * lambda)
else
computedProb.getOrPut(arg) { probability(arg - 1) * lambda / arg }
}
}

View File

@ -5,23 +5,22 @@ import kotlinx.coroutines.flow.toList
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import space.kscience.kmath.stat.samplers.GaussianSampler
internal class CommonsDistributionsTest { internal class CommonsDistributionsTest {
@Test @Test
fun testNormalDistributionSuspend() { fun testNormalDistributionSuspend() {
val distribution = Distribution.normal(7.0, 2.0) val distribution = GaussianSampler.of(7.0, 2.0)
val generator = RandomGenerator.default(1) val generator = RandomGenerator.default(1)
val sample = runBlocking { val sample = runBlocking { distribution.sample(generator).take(1000).toList() }
distribution.sample(generator).take(1000).toList()
}
Assertions.assertEquals(7.0, sample.average(), 0.1) Assertions.assertEquals(7.0, sample.average(), 0.1)
} }
@Test @Test
fun testNormalDistributionBlocking() { fun testNormalDistributionBlocking() {
val distribution = Distribution.normal(7.0, 2.0) val distribution = GaussianSampler.of(7.0, 2.0)
val generator = RandomGenerator.default(1) val generator = RandomGenerator.default(1)
val sample = distribution.sample(generator).nextBlock(1000) val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) }
Assertions.assertEquals(7.0, sample.average(), 0.1) Assertions.assertEquals(7.0, sample.average(), 0.1)
} }
} }

View File

@ -7,11 +7,8 @@ class SamplerTest {
@Test @Test
fun bufferSamplerTest() { fun bufferSamplerTest() {
val sampler: Sampler<Double> = val sampler = Sampler { it.chain { nextDouble() } }
BasicSampler { it.chain { nextDouble() } }
val data = sampler.sampleBuffer(RandomGenerator.default, 100) val data = sampler.sampleBuffer(RandomGenerator.default, 100)
runBlocking { runBlocking { println(data.next()) }
println(data.next())
}
} }
} }