forked from kscience/kmath
Merge pull request #164 from mipt-npm/feature/mp-samplers
Implement Commons RNG-like samplers in kmath-prob module for Multiplatform
This commit is contained in:
commit
b4fc311668
@ -106,6 +106,7 @@ kotlin.sourceSets.all {
|
||||
with(languageSettings) {
|
||||
useExperimentalAnnotation("kotlin.contracts.ExperimentalContracts")
|
||||
useExperimentalAnnotation("kotlin.ExperimentalUnsignedTypes")
|
||||
useExperimentalAnnotation("space.kscience.kmath.misc.UnstableKMathAPI")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,9 +13,8 @@ import space.kscience.kmath.optimization.OptimizationResult
|
||||
import space.kscience.kmath.real.DoubleVector
|
||||
import space.kscience.kmath.real.map
|
||||
import space.kscience.kmath.real.step
|
||||
import space.kscience.kmath.stat.Distribution
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.normal
|
||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
||||
import space.kscience.kmath.structures.asIterable
|
||||
import space.kscience.kmath.structures.toList
|
||||
import kotlin.math.pow
|
||||
@ -37,10 +36,9 @@ operator fun TraceValues.invoke(vector: DoubleVector) {
|
||||
/**
|
||||
* Least squares fie with auto-differentiation. Uses `kmath-commons` and `kmath-for-real` modules.
|
||||
*/
|
||||
fun main() {
|
||||
|
||||
suspend fun main() {
|
||||
//A generator for a normally distributed values
|
||||
val generator = Distribution.normal()
|
||||
val generator = NormalDistribution(2.0, 7.0)
|
||||
|
||||
//A chain/flow of random values with the given seed
|
||||
val chain = generator.sample(RandomGenerator.default(112667))
|
||||
@ -53,7 +51,7 @@ fun main() {
|
||||
//Perform an operation on each x value (much more effective, than numpy)
|
||||
val y = x.map {
|
||||
val value = it.pow(2) + it + 1
|
||||
value + chain.nextDouble() * sqrt(value)
|
||||
value + chain.next() * sqrt(value)
|
||||
}
|
||||
// this will also work, but less effective:
|
||||
// val y = x.pow(2)+ x + 1 + chain.nextDouble()
|
||||
@ -103,4 +101,4 @@ fun main() {
|
||||
}
|
||||
|
||||
page.makeFile()
|
||||
}
|
||||
}
|
||||
|
@ -1,23 +1,24 @@
|
||||
package kscience.kmath.commons.prob
|
||||
package space.kscience.kmath.stat
|
||||
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.async
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler
|
||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
||||
import org.apache.commons.rng.simple.RandomSource
|
||||
import space.kscience.kmath.stat.*
|
||||
import java.time.Duration
|
||||
import java.time.Instant
|
||||
import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler
|
||||
import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler as CMZigguratNormalizedGaussianSampler
|
||||
|
||||
private fun runChain(): Duration {
|
||||
private suspend fun runKMathChained(): Duration {
|
||||
val generator = RandomGenerator.fromSource(RandomSource.MT, 123L)
|
||||
val normal = Distribution.normal(NormalSamplerMethod.Ziggurat)
|
||||
val chain = normal.sample(generator)
|
||||
val normal = GaussianSampler.of(7.0, 2.0)
|
||||
val chain = normal.sample(generator).blocking()
|
||||
val startTime = Instant.now()
|
||||
var sum = 0.0
|
||||
|
||||
repeat(10000001) { counter ->
|
||||
sum += chain.nextDouble()
|
||||
sum += chain.next()
|
||||
|
||||
if (counter % 100000 == 0) {
|
||||
val duration = Duration.between(startTime, Instant.now())
|
||||
@ -29,9 +30,15 @@ private fun runChain(): Duration {
|
||||
return Duration.between(startTime, Instant.now())
|
||||
}
|
||||
|
||||
private fun runDirect(): Duration {
|
||||
val provider = RandomSource.create(RandomSource.MT, 123L)
|
||||
val sampler = ZigguratNormalizedGaussianSampler(provider)
|
||||
private fun runApacheDirect(): Duration {
|
||||
val rng = RandomSource.create(RandomSource.MT, 123L)
|
||||
|
||||
val sampler = CMGaussianSampler.of(
|
||||
CMZigguratNormalizedGaussianSampler.of(rng),
|
||||
7.0,
|
||||
2.0
|
||||
)
|
||||
|
||||
val startTime = Instant.now()
|
||||
var sum = 0.0
|
||||
|
||||
@ -51,11 +58,9 @@ private fun runDirect(): Duration {
|
||||
/**
|
||||
* Comparing chain sampling performance with direct sampling performance
|
||||
*/
|
||||
fun main() {
|
||||
runBlocking(Dispatchers.Default) {
|
||||
val chainJob = async { runChain() }
|
||||
val directJob = async { runDirect() }
|
||||
println("Chain: ${chainJob.await()}")
|
||||
println("Direct: ${directJob.await()}")
|
||||
}
|
||||
fun main(): Unit = runBlocking(Dispatchers.Default) {
|
||||
val chainJob = async { runKMathChained() }
|
||||
val directJob = async { runApacheDirect() }
|
||||
println("KMath Chained: ${chainJob.await()}")
|
||||
println("Apache Direct: ${directJob.await()}")
|
||||
}
|
||||
|
@ -3,14 +3,15 @@ package space.kscience.kmath.stat
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.collectWithState
|
||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
||||
|
||||
/**
|
||||
* The state of distribution averager
|
||||
* The state of distribution averager.
|
||||
*/
|
||||
private data class AveragingChainState(var num: Int = 0, var value: Double = 0.0)
|
||||
|
||||
/**
|
||||
* Averaging
|
||||
* Averaging.
|
||||
*/
|
||||
private fun Chain<Double>.mean(): Chain<Double> = collectWithState(AveragingChainState(), { it.copy() }) { chain ->
|
||||
val next = chain.next()
|
||||
@ -21,7 +22,7 @@ private fun Chain<Double>.mean(): Chain<Double> = collectWithState(AveragingChai
|
||||
|
||||
|
||||
fun main() {
|
||||
val normal = Distribution.normal()
|
||||
val normal = NormalDistribution(0.0, 2.0)
|
||||
val chain = normal.sample(RandomGenerator.default).mean()
|
||||
|
||||
runBlocking {
|
||||
@ -32,4 +33,4 @@ fun main() {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -34,4 +34,4 @@ fun main() {
|
||||
strides.indices().forEach { res = array[strides.offset(it)] }
|
||||
}
|
||||
println("Array reading finished in $time3 millis")
|
||||
}
|
||||
}
|
||||
|
@ -1,13 +1,13 @@
|
||||
package space.kscience.kmath.commons.optimization
|
||||
|
||||
import org.junit.jupiter.api.Test
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import space.kscience.kmath.commons.expressions.DerivativeStructureExpression
|
||||
import space.kscience.kmath.misc.symbol
|
||||
import space.kscience.kmath.optimization.FunctionOptimization
|
||||
import space.kscience.kmath.stat.Distribution
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.normal
|
||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
||||
import kotlin.math.pow
|
||||
import kotlin.test.Test
|
||||
|
||||
internal class OptimizeTest {
|
||||
val x by symbol
|
||||
@ -34,23 +34,24 @@ internal class OptimizeTest {
|
||||
simplexSteps(x to 2.0, y to 0.5)
|
||||
//this sets simplex optimizer
|
||||
}
|
||||
|
||||
println(result.point)
|
||||
println(result.value)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testCmFit() {
|
||||
fun testCmFit() = runBlocking {
|
||||
val a by symbol
|
||||
val b by symbol
|
||||
val c by symbol
|
||||
|
||||
val sigma = 1.0
|
||||
val generator = Distribution.normal(0.0, sigma)
|
||||
val generator = NormalDistribution(0.0, sigma)
|
||||
val chain = generator.sample(RandomGenerator.default(112667))
|
||||
val x = (1..100).map(Int::toDouble)
|
||||
|
||||
val y = x.map {
|
||||
it.pow(2) + it + 1 + chain.nextDouble()
|
||||
it.pow(2) + it + 1 + chain.next()
|
||||
}
|
||||
|
||||
val yErr = List(x.size) { sigma }
|
||||
@ -64,5 +65,4 @@ internal class OptimizeTest {
|
||||
println(result)
|
||||
println("Chi2/dof = ${result.value / (x.size - 3)}")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -241,18 +241,18 @@ public class BigInt internal constructor(
|
||||
)
|
||||
|
||||
private fun compareMagnitudes(mag1: Magnitude, mag2: Magnitude): Int {
|
||||
when {
|
||||
mag1.size > mag2.size -> return 1
|
||||
mag1.size < mag2.size -> return -1
|
||||
return when {
|
||||
mag1.size > mag2.size -> 1
|
||||
mag1.size < mag2.size -> -1
|
||||
|
||||
else -> {
|
||||
for (i in mag1.size - 1 downTo 0) {
|
||||
if (mag1[i] > mag2[i]) {
|
||||
return 1
|
||||
} else if (mag1[i] < mag2[i]) {
|
||||
return -1
|
||||
}
|
||||
for (i in mag1.size - 1 downTo 0) return when {
|
||||
mag1[i] > mag2[i] -> 1
|
||||
mag1[i] < mag2[i] -> -1
|
||||
else -> continue
|
||||
}
|
||||
return 0
|
||||
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -302,10 +302,11 @@ public class BigInt internal constructor(
|
||||
var carry = 0uL
|
||||
|
||||
for (i in mag.indices) {
|
||||
val cur: ULong = carry + mag[i].toULong() * x.toULong()
|
||||
val cur = carry + mag[i].toULong() * x.toULong()
|
||||
result[i] = (cur and BASE).toUInt()
|
||||
carry = cur shr BASE_SIZE
|
||||
}
|
||||
|
||||
result[resultLength - 1] = (carry and BASE).toUInt()
|
||||
|
||||
return stripLeadingZeros(result)
|
||||
|
@ -40,7 +40,6 @@ public interface Buffer<out T> {
|
||||
public operator fun iterator(): Iterator<T>
|
||||
|
||||
public companion object {
|
||||
|
||||
/**
|
||||
* Check the element-by-element match of content of two buffers.
|
||||
*/
|
||||
@ -110,7 +109,6 @@ public interface MutableBuffer<T> : Buffer<T> {
|
||||
public fun copy(): MutableBuffer<T>
|
||||
|
||||
public companion object {
|
||||
|
||||
/**
|
||||
* Creates a [DoubleBuffer] with the specified [size], where each element is calculated by calling the specified
|
||||
* [initializer] function.
|
||||
|
@ -38,12 +38,11 @@ class NumberNDFieldTest {
|
||||
(i * 10 + j).toDouble()
|
||||
}
|
||||
|
||||
for (i in 0..2) {
|
||||
for (i in 0..2)
|
||||
for (j in 0..2) {
|
||||
val expected = (i * 10 + j).toDouble()
|
||||
assertEquals(expected, array[i, j], "Error at index [$i, $j]")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -1,12 +1,13 @@
|
||||
package space.kscience.kmath.chains
|
||||
|
||||
/**
|
||||
* Performance optimized chain for real values
|
||||
* Chunked, specialized chain for real values.
|
||||
*/
|
||||
public abstract class BlockingDoubleChain : Chain<Double> {
|
||||
public abstract fun nextDouble(): Double
|
||||
public interface BlockingDoubleChain : Chain<Double> {
|
||||
public override suspend fun next(): Double
|
||||
|
||||
override suspend fun next(): Double = nextDouble()
|
||||
|
||||
public open fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { nextDouble() }
|
||||
/**
|
||||
* Returns an [DoubleArray] chunk of [size] values of [next].
|
||||
*/
|
||||
public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() }
|
||||
}
|
||||
|
@ -3,10 +3,7 @@ package space.kscience.kmath.chains
|
||||
/**
|
||||
* Performance optimized chain for integer values
|
||||
*/
|
||||
public abstract class BlockingIntChain : Chain<Int> {
|
||||
public abstract fun nextInt(): Int
|
||||
|
||||
override suspend fun next(): Int = nextInt()
|
||||
|
||||
public fun nextBlock(size: Int): IntArray = IntArray(size) { nextInt() }
|
||||
public interface BlockingIntChain : Chain<Int> {
|
||||
public override suspend fun next(): Int
|
||||
public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
|
||||
}
|
||||
|
@ -63,12 +63,10 @@ public class MarkovChain<out R : Any>(private val seed: suspend () -> R, private
|
||||
|
||||
public fun value(): R? = value
|
||||
|
||||
public override suspend fun next(): R {
|
||||
mutex.withLock {
|
||||
val newValue = gen(value ?: seed())
|
||||
value = newValue
|
||||
return newValue
|
||||
}
|
||||
public override suspend fun next(): R = mutex.withLock {
|
||||
val newValue = gen(value ?: seed())
|
||||
value = newValue
|
||||
newValue
|
||||
}
|
||||
|
||||
public override fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
|
||||
@ -90,12 +88,10 @@ public class StatefulChain<S, out R>(
|
||||
|
||||
public fun value(): R? = value
|
||||
|
||||
public override suspend fun next(): R {
|
||||
mutex.withLock {
|
||||
val newValue = state.gen(value ?: state.seed())
|
||||
value = newValue
|
||||
return newValue
|
||||
}
|
||||
public override suspend fun next(): R = mutex.withLock {
|
||||
val newValue = state.gen(value ?: state.seed())
|
||||
value = newValue
|
||||
newValue
|
||||
}
|
||||
|
||||
public override fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)
|
||||
|
@ -28,7 +28,7 @@ public fun <T> Flow<T>.chunked(bufferSize: Int, bufferFactory: BufferFactory<T>)
|
||||
var counter = 0
|
||||
|
||||
this@chunked.collect { element ->
|
||||
list.add(element)
|
||||
list += element
|
||||
counter++
|
||||
|
||||
if (counter == bufferSize) {
|
||||
|
@ -48,11 +48,9 @@ public class RingBuffer<T>(
|
||||
/**
|
||||
* A safe snapshot operation
|
||||
*/
|
||||
public suspend fun snapshot(): Buffer<T> {
|
||||
mutex.withLock {
|
||||
val copy = buffer.copy()
|
||||
return VirtualBuffer(size) { i -> copy[startIndex.forward(i)] as T }
|
||||
}
|
||||
public suspend fun snapshot(): Buffer<T> = mutex.withLock {
|
||||
val copy = buffer.copy()
|
||||
VirtualBuffer(size) { i -> copy[startIndex.forward(i)] as T }
|
||||
}
|
||||
|
||||
public suspend fun push(element: T) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
package kscience.dimensions
|
||||
package space.kscience.dimensions
|
||||
|
||||
import space.kscience.kmath.dimensions.D2
|
||||
import space.kscience.kmath.dimensions.D3
|
||||
|
@ -38,8 +38,8 @@ public class OrderedPiecewisePolynomial<T : Comparable<T>>(delimiter: T) :
|
||||
*/
|
||||
public fun putRight(right: T, piece: Polynomial<T>) {
|
||||
require(right > delimiters.last()) { "New delimiter should be to the right of old one" }
|
||||
delimiters.add(right)
|
||||
pieces.add(piece)
|
||||
delimiters += right
|
||||
pieces += piece
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,17 +1,29 @@
|
||||
package space.kscience.kmath.stat
|
||||
|
||||
import kotlinx.coroutines.flow.first
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.collect
|
||||
import space.kscience.kmath.structures.Buffer
|
||||
import space.kscience.kmath.structures.BufferFactory
|
||||
import space.kscience.kmath.structures.DoubleBuffer
|
||||
import space.kscience.kmath.structures.IntBuffer
|
||||
import space.kscience.kmath.structures.MutableBuffer
|
||||
import kotlin.jvm.JvmName
|
||||
|
||||
public interface Sampler<T : Any> {
|
||||
/**
|
||||
* Sampler that generates chains of values of type [T].
|
||||
*/
|
||||
public fun interface Sampler<T : Any> {
|
||||
/**
|
||||
* Generates a chain of samples.
|
||||
*
|
||||
* @param generator the randomness provider.
|
||||
* @return the new chain.
|
||||
*/
|
||||
public fun sample(generator: RandomGenerator): Chain<T>
|
||||
}
|
||||
|
||||
/**
|
||||
* A distribution of typed objects
|
||||
* A distribution of typed objects.
|
||||
*/
|
||||
public interface Distribution<T : Any> : Sampler<T> {
|
||||
/**
|
||||
@ -20,11 +32,7 @@ public interface Distribution<T : Any> : Sampler<T> {
|
||||
*/
|
||||
public fun probability(arg: T): Double
|
||||
|
||||
/**
|
||||
* Create a chain of samples from this distribution.
|
||||
* The chain is not guaranteed to be stateless, but different sample chains should be independent.
|
||||
*/
|
||||
override fun sample(generator: RandomGenerator): Chain<T>
|
||||
public override fun sample(generator: RandomGenerator): Chain<T>
|
||||
|
||||
/**
|
||||
* An empty companion. Distribution factories should be written as its extensions
|
||||
@ -63,16 +71,27 @@ public fun <T : Any> Sampler<T>.sampleBuffer(
|
||||
//clear list from previous run
|
||||
tmp.clear()
|
||||
//Fill list
|
||||
repeat(size) {
|
||||
tmp.add(chain.next())
|
||||
}
|
||||
repeat(size) { tmp += chain.next() }
|
||||
//return new buffer with elements from tmp
|
||||
bufferFactory(size) { tmp[it] }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a bunch of samples from real distributions
|
||||
* Samples one value from this [Sampler].
|
||||
*/
|
||||
public suspend fun <T : Any> Sampler<T>.next(generator: RandomGenerator): T = sample(generator).first()
|
||||
|
||||
/**
|
||||
* Generates [size] real samples and chunks them into some buffers.
|
||||
*/
|
||||
@JvmName("sampleRealBuffer")
|
||||
public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> =
|
||||
sampleBuffer(generator, size, ::DoubleBuffer)
|
||||
sampleBuffer(generator, size, MutableBuffer.Companion::double)
|
||||
|
||||
/**
|
||||
* Generates [size] integer samples and chunks them into some buffers.
|
||||
*/
|
||||
@JvmName("sampleIntBuffer")
|
||||
public fun Sampler<Int>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Int>> =
|
||||
sampleBuffer(generator, size, ::IntBuffer)
|
||||
|
@ -14,7 +14,7 @@ public interface NamedDistribution<T> : Distribution<Map<String, T>>
|
||||
public class FactorizedDistribution<T>(public val distributions: Collection<NamedDistribution<T>>) :
|
||||
NamedDistribution<T> {
|
||||
override fun probability(arg: Map<String, T>): Double =
|
||||
distributions.fold(1.0) { acc, distr -> acc * distr.probability(arg) }
|
||||
distributions.fold(1.0) { acc, dist -> acc * dist.probability(arg) }
|
||||
|
||||
override fun sample(generator: RandomGenerator): Chain<Map<String, T>> {
|
||||
val chains = distributions.map { it.sample(generator) }
|
||||
@ -38,6 +38,6 @@ public class DistributionBuilder<T : Any> {
|
||||
private val distributions = ArrayList<NamedDistribution<T>>()
|
||||
|
||||
public infix fun String.to(distribution: Distribution<T>) {
|
||||
distributions.add(NamedDistributionWrapper(this, distribution))
|
||||
distributions += NamedDistributionWrapper(this, distribution)
|
||||
}
|
||||
}
|
||||
|
@ -1,17 +1,22 @@
|
||||
package space.kscience.kmath.stat
|
||||
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.chains.BlockingIntChain
|
||||
import space.kscience.kmath.chains.Chain
|
||||
|
||||
/**
|
||||
* A possibly stateful chain producing random values.
|
||||
*
|
||||
* @property generator the underlying [RandomGenerator] instance.
|
||||
*/
|
||||
public class RandomChain<out R>(
|
||||
public val generator: RandomGenerator,
|
||||
private val gen: suspend RandomGenerator.() -> R,
|
||||
private val gen: suspend RandomGenerator.() -> R
|
||||
) : Chain<R> {
|
||||
override suspend fun next(): R = generator.gen()
|
||||
|
||||
override fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
|
||||
}
|
||||
|
||||
public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
|
||||
public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
|
||||
public fun Chain<Double>.blocking(): BlockingDoubleChain = object : Chain<Double> by this, BlockingDoubleChain {}
|
||||
public fun Chain<Int>.blocking(): BlockingIntChain = object : Chain<Int> by this, BlockingIntChain {}
|
||||
|
@ -82,6 +82,8 @@ public interface RandomGenerator {
|
||||
|
||||
/**
|
||||
* Implements [RandomGenerator] by delegating all operations to [Random].
|
||||
*
|
||||
* @property random the underlying [Random] object.
|
||||
*/
|
||||
public class DefaultGenerator(public val random: Random = Random) : RandomGenerator {
|
||||
public override fun nextBoolean(): Boolean = random.nextBoolean()
|
||||
|
@ -8,16 +8,28 @@ import space.kscience.kmath.operations.Group
|
||||
import space.kscience.kmath.operations.ScaleOperations
|
||||
import space.kscience.kmath.operations.invoke
|
||||
|
||||
public class BasicSampler<T : Any>(public val chainBuilder: (RandomGenerator) -> Chain<T>) : Sampler<T> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<T> = chainBuilder(generator)
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements [Sampler] by sampling only certain [value].
|
||||
*
|
||||
* @property value the value to sample.
|
||||
*/
|
||||
public class ConstantSampler<T : Any>(public val value: T) : Sampler<T> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<T> = ConstantChain(value)
|
||||
}
|
||||
|
||||
/**
|
||||
* A space for samplers. Allows to perform simple operations on distributions
|
||||
* Implements [Sampler] by delegating sampling to value of [chainBuilder].
|
||||
*
|
||||
* @property chainBuilder the provider of [Chain].
|
||||
*/
|
||||
public class BasicSampler<T : Any>(public val chainBuilder: (RandomGenerator) -> Chain<T>) : Sampler<T> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<T> = chainBuilder(generator)
|
||||
}
|
||||
|
||||
/**
|
||||
* A space of samplers. Allows to perform simple operations on distributions.
|
||||
*
|
||||
* @property algebra the space to provide addition and scalar multiplication for [T].
|
||||
*/
|
||||
public class SamplerSpace<T : Any, S>(public val algebra: S) : Group<Sampler<T>>,
|
||||
ScaleOperations<Sampler<T>> where S : Group<T>, S : ScaleOperations<T> {
|
||||
@ -29,8 +41,10 @@ public class SamplerSpace<T : Any, S>(public val algebra: S) : Group<Sampler<T>>
|
||||
}
|
||||
|
||||
public override fun scale(a: Sampler<T>, value: Double): Sampler<T> = BasicSampler { generator ->
|
||||
a.sample(generator).map { algebra { it * value } }
|
||||
a.sample(generator).map { a ->
|
||||
algebra { a * value }
|
||||
}
|
||||
}
|
||||
|
||||
override fun Sampler<T>.unaryMinus(): Sampler<T> = scale(this, -1.0)
|
||||
public override fun Sampler<T>.unaryMinus(): Sampler<T> = scale(this, -1.0)
|
||||
}
|
||||
|
@ -0,0 +1,41 @@
|
||||
package space.kscience.kmath.stat.distributions
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.UnivariateDistribution
|
||||
import space.kscience.kmath.stat.internal.InternalErf
|
||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
||||
import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler
|
||||
import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* Implements [UnivariateDistribution] for the normal (gaussian) distribution.
|
||||
*/
|
||||
public inline class NormalDistribution(public val sampler: GaussianSampler) : UnivariateDistribution<Double> {
|
||||
public constructor(
|
||||
mean: Double,
|
||||
standardDeviation: Double,
|
||||
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(),
|
||||
) : this(GaussianSampler.of(mean, standardDeviation, normalized))
|
||||
|
||||
public override fun probability(arg: Double): Double {
|
||||
val x1 = (arg - sampler.mean) / sampler.standardDeviation
|
||||
return exp(-0.5 * x1 * x1 - (ln(sampler.standardDeviation) + 0.5 * ln(2 * PI)))
|
||||
}
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = sampler.sample(generator)
|
||||
|
||||
public override fun cumulative(arg: Double): Double {
|
||||
val dev = arg - sampler.mean
|
||||
|
||||
return when {
|
||||
abs(dev) > 40 * sampler.standardDeviation -> if (dev < 0) 0.0 else 1.0
|
||||
else -> 0.5 * InternalErf.erfc(-dev / (sampler.standardDeviation * SQRT2))
|
||||
}
|
||||
}
|
||||
|
||||
private companion object {
|
||||
private val SQRT2 = sqrt(2.0)
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
package space.kscience.kmath.stat.internal
|
||||
|
||||
import kotlin.math.abs
|
||||
|
||||
/**
|
||||
* Based on Commons Math implementation.
|
||||
* See [https://commons.apache.org/proper/commons-math/javadocs/api-3.3/org/apache/commons/math3/special/Erf.html].
|
||||
*/
|
||||
internal object InternalErf {
|
||||
fun erfc(x: Double): Double {
|
||||
if (abs(x) > 40) return if (x > 0) 0.0 else 2.0
|
||||
val ret = InternalGamma.regularizedGammaQ(0.5, x * x, 10000)
|
||||
return if (x < 0) 2 - ret else ret
|
||||
}
|
||||
}
|
@ -0,0 +1,238 @@
|
||||
package space.kscience.kmath.stat.internal
|
||||
|
||||
import kotlin.math.*
|
||||
|
||||
private abstract class ContinuedFraction protected constructor() {
|
||||
protected abstract fun getA(n: Int, x: Double): Double
|
||||
protected abstract fun getB(n: Int, x: Double): Double
|
||||
|
||||
fun evaluate(x: Double, maxIterations: Int): Double {
|
||||
val small = 1e-50
|
||||
var hPrev = getA(0, x)
|
||||
if (hPrev == 0.0 || abs(0.0 - hPrev) <= small) hPrev = small
|
||||
var n = 1
|
||||
var dPrev = 0.0
|
||||
var cPrev = hPrev
|
||||
var hN = hPrev
|
||||
|
||||
while (n < maxIterations) {
|
||||
val a = getA(n, x)
|
||||
val b = getB(n, x)
|
||||
var dN = a + b * dPrev
|
||||
if (dN == 0.0 || abs(0.0 - dN) <= small) dN = small
|
||||
var cN = a + b / cPrev
|
||||
if (cN == 0.0 || abs(0.0 - cN) <= small) cN = small
|
||||
dN = 1 / dN
|
||||
val deltaN = cN * dN
|
||||
hN = hPrev * deltaN
|
||||
check(!hN.isInfinite()) { "hN is infinite" }
|
||||
check(!hN.isNaN()) { "hN is NaN" }
|
||||
if (abs(deltaN - 1.0) < 10e-9) break
|
||||
dPrev = dN
|
||||
cPrev = cN
|
||||
hPrev = hN
|
||||
n++
|
||||
}
|
||||
|
||||
check(n < maxIterations) { "n is more than maxIterations" }
|
||||
return hN
|
||||
}
|
||||
}
|
||||
|
||||
internal object InternalGamma {
|
||||
const val LANCZOS_G = 607.0 / 128.0
|
||||
|
||||
private val LANCZOS = doubleArrayOf(
|
||||
0.99999999999999709182,
|
||||
57.156235665862923517,
|
||||
-59.597960355475491248,
|
||||
14.136097974741747174,
|
||||
-0.49191381609762019978,
|
||||
.33994649984811888699e-4,
|
||||
.46523628927048575665e-4,
|
||||
-.98374475304879564677e-4,
|
||||
.15808870322491248884e-3,
|
||||
-.21026444172410488319e-3,
|
||||
.21743961811521264320e-3,
|
||||
-.16431810653676389022e-3,
|
||||
.84418223983852743293e-4,
|
||||
-.26190838401581408670e-4,
|
||||
.36899182659531622704e-5
|
||||
)
|
||||
|
||||
private val HALF_LOG_2_PI = 0.5 * ln(2.0 * PI)
|
||||
private const val INV_GAMMA1P_M1_A0 = .611609510448141581788E-08
|
||||
private const val INV_GAMMA1P_M1_A1 = .624730830116465516210E-08
|
||||
private const val INV_GAMMA1P_M1_B1 = .203610414066806987300E+00
|
||||
private const val INV_GAMMA1P_M1_B2 = .266205348428949217746E-01
|
||||
private const val INV_GAMMA1P_M1_B3 = .493944979382446875238E-03
|
||||
private const val INV_GAMMA1P_M1_B4 = -.851419432440314906588E-05
|
||||
private const val INV_GAMMA1P_M1_B5 = -.643045481779353022248E-05
|
||||
private const val INV_GAMMA1P_M1_B6 = .992641840672773722196E-06
|
||||
private const val INV_GAMMA1P_M1_B7 = -.607761895722825260739E-07
|
||||
private const val INV_GAMMA1P_M1_B8 = .195755836614639731882E-09
|
||||
private const val INV_GAMMA1P_M1_P0 = .6116095104481415817861E-08
|
||||
private const val INV_GAMMA1P_M1_P1 = .6871674113067198736152E-08
|
||||
private const val INV_GAMMA1P_M1_P2 = .6820161668496170657918E-09
|
||||
private const val INV_GAMMA1P_M1_P3 = .4686843322948848031080E-10
|
||||
private const val INV_GAMMA1P_M1_P4 = .1572833027710446286995E-11
|
||||
private const val INV_GAMMA1P_M1_P5 = -.1249441572276366213222E-12
|
||||
private const val INV_GAMMA1P_M1_P6 = .4343529937408594255178E-14
|
||||
private const val INV_GAMMA1P_M1_Q1 = .3056961078365221025009E+00
|
||||
private const val INV_GAMMA1P_M1_Q2 = .5464213086042296536016E-01
|
||||
private const val INV_GAMMA1P_M1_Q3 = .4956830093825887312020E-02
|
||||
private const val INV_GAMMA1P_M1_Q4 = .2692369466186361192876E-03
|
||||
private const val INV_GAMMA1P_M1_C = -.422784335098467139393487909917598E+00
|
||||
private const val INV_GAMMA1P_M1_C0 = .577215664901532860606512090082402E+00
|
||||
private const val INV_GAMMA1P_M1_C1 = -.655878071520253881077019515145390E+00
|
||||
private const val INV_GAMMA1P_M1_C2 = -.420026350340952355290039348754298E-01
|
||||
private const val INV_GAMMA1P_M1_C3 = .166538611382291489501700795102105E+00
|
||||
private const val INV_GAMMA1P_M1_C4 = -.421977345555443367482083012891874E-01
|
||||
private const val INV_GAMMA1P_M1_C5 = -.962197152787697356211492167234820E-02
|
||||
private const val INV_GAMMA1P_M1_C6 = .721894324666309954239501034044657E-02
|
||||
private const val INV_GAMMA1P_M1_C7 = -.116516759185906511211397108401839E-02
|
||||
private const val INV_GAMMA1P_M1_C8 = -.215241674114950972815729963053648E-03
|
||||
private const val INV_GAMMA1P_M1_C9 = .128050282388116186153198626328164E-03
|
||||
private const val INV_GAMMA1P_M1_C10 = -.201348547807882386556893914210218E-04
|
||||
private const val INV_GAMMA1P_M1_C11 = -.125049348214267065734535947383309E-05
|
||||
private const val INV_GAMMA1P_M1_C12 = .113302723198169588237412962033074E-05
|
||||
private const val INV_GAMMA1P_M1_C13 = -.205633841697760710345015413002057E-06
|
||||
|
||||
fun logGamma(x: Double): Double = when {
|
||||
x.isNaN() || x <= 0.0 -> Double.NaN
|
||||
x < 0.5 -> logGamma1p(x) - ln(x)
|
||||
x <= 2.5 -> logGamma1p(x - 0.5 - 0.5)
|
||||
|
||||
x <= 8.0 -> {
|
||||
val n = floor(x - 1.5).toInt()
|
||||
val prod = (1..n).fold(1.0, { prod, i -> prod * (x - i) })
|
||||
logGamma1p(x - (n + 1)) + ln(prod)
|
||||
}
|
||||
|
||||
else -> {
|
||||
val tmp = x + LANCZOS_G + .5
|
||||
(x + .5) * ln(tmp) - tmp + HALF_LOG_2_PI + ln(lanczos(x) / x)
|
||||
}
|
||||
}
|
||||
|
||||
private fun regularizedGammaP(
|
||||
a: Double,
|
||||
x: Double,
|
||||
maxIterations: Int = Int.MAX_VALUE
|
||||
): Double = when {
|
||||
a.isNaN() || x.isNaN() || a <= 0.0 || x < 0.0 -> Double.NaN
|
||||
x == 0.0 -> 0.0
|
||||
x >= a + 1 -> 1.0 - regularizedGammaQ(a, x, maxIterations)
|
||||
|
||||
else -> {
|
||||
// calculate series
|
||||
var n = 0.0 // current element index
|
||||
var an = 1.0 / a // n-th element in the series
|
||||
var sum = an // partial sum
|
||||
|
||||
while (abs(an / sum) > 10e-15 && n < maxIterations && sum < Double.POSITIVE_INFINITY) {
|
||||
// compute next element in the series
|
||||
n += 1.0
|
||||
an *= x / (a + n)
|
||||
|
||||
// update partial sum
|
||||
sum += an
|
||||
}
|
||||
|
||||
when {
|
||||
n >= maxIterations -> throw error("Maximal iterations is exceeded $maxIterations")
|
||||
sum.isInfinite() -> 1.0
|
||||
else -> exp(-x + a * ln(x) - logGamma(a)) * sum
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun regularizedGammaQ(
|
||||
a: Double,
|
||||
x: Double,
|
||||
maxIterations: Int = Int.MAX_VALUE
|
||||
): Double = when {
|
||||
a.isNaN() || x.isNaN() || a <= 0.0 || x < 0.0 -> Double.NaN
|
||||
x == 0.0 -> 1.0
|
||||
x < a + 1.0 -> 1.0 - regularizedGammaP(a, x, maxIterations)
|
||||
|
||||
else -> 1.0 / object : ContinuedFraction() {
|
||||
override fun getA(n: Int, x: Double): Double = 2.0 * n + 1.0 - a + x
|
||||
override fun getB(n: Int, x: Double): Double = n * (a - n)
|
||||
}.evaluate(x, maxIterations) * exp(-x + a * ln(x) - logGamma(a))
|
||||
}
|
||||
|
||||
private fun lanczos(x: Double): Double =
|
||||
(LANCZOS.size - 1 downTo 1).sumByDouble { LANCZOS[it] / (x + it) } + LANCZOS[0]
|
||||
|
||||
private fun invGamma1pm1(x: Double): Double {
|
||||
require(x >= -0.5)
|
||||
require(x <= 1.5)
|
||||
val ret: Double
|
||||
val t = if (x <= 0.5) x else x - 0.5 - 0.5
|
||||
|
||||
if (t < 0.0) {
|
||||
val a = INV_GAMMA1P_M1_A0 + t * INV_GAMMA1P_M1_A1
|
||||
var b = INV_GAMMA1P_M1_B8
|
||||
b = INV_GAMMA1P_M1_B7 + t * b
|
||||
b = INV_GAMMA1P_M1_B6 + t * b
|
||||
b = INV_GAMMA1P_M1_B5 + t * b
|
||||
b = INV_GAMMA1P_M1_B4 + t * b
|
||||
b = INV_GAMMA1P_M1_B3 + t * b
|
||||
b = INV_GAMMA1P_M1_B2 + t * b
|
||||
b = INV_GAMMA1P_M1_B1 + t * b
|
||||
b = 1.0 + t * b
|
||||
var c = INV_GAMMA1P_M1_C13 + t * (a / b)
|
||||
c = INV_GAMMA1P_M1_C12 + t * c
|
||||
c = INV_GAMMA1P_M1_C11 + t * c
|
||||
c = INV_GAMMA1P_M1_C10 + t * c
|
||||
c = INV_GAMMA1P_M1_C9 + t * c
|
||||
c = INV_GAMMA1P_M1_C8 + t * c
|
||||
c = INV_GAMMA1P_M1_C7 + t * c
|
||||
c = INV_GAMMA1P_M1_C6 + t * c
|
||||
c = INV_GAMMA1P_M1_C5 + t * c
|
||||
c = INV_GAMMA1P_M1_C4 + t * c
|
||||
c = INV_GAMMA1P_M1_C3 + t * c
|
||||
c = INV_GAMMA1P_M1_C2 + t * c
|
||||
c = INV_GAMMA1P_M1_C1 + t * c
|
||||
c = INV_GAMMA1P_M1_C + t * c
|
||||
ret = (if (x > 0.5) t * c / x else x * (c + 0.5 + 0.5))
|
||||
} else {
|
||||
var p = INV_GAMMA1P_M1_P6
|
||||
p = INV_GAMMA1P_M1_P5 + t * p
|
||||
p = INV_GAMMA1P_M1_P4 + t * p
|
||||
p = INV_GAMMA1P_M1_P3 + t * p
|
||||
p = INV_GAMMA1P_M1_P2 + t * p
|
||||
p = INV_GAMMA1P_M1_P1 + t * p
|
||||
p = INV_GAMMA1P_M1_P0 + t * p
|
||||
var q = INV_GAMMA1P_M1_Q4
|
||||
q = INV_GAMMA1P_M1_Q3 + t * q
|
||||
q = INV_GAMMA1P_M1_Q2 + t * q
|
||||
q = INV_GAMMA1P_M1_Q1 + t * q
|
||||
q = 1.0 + t * q
|
||||
var c = INV_GAMMA1P_M1_C13 + p / q * t
|
||||
c = INV_GAMMA1P_M1_C12 + t * c
|
||||
c = INV_GAMMA1P_M1_C11 + t * c
|
||||
c = INV_GAMMA1P_M1_C10 + t * c
|
||||
c = INV_GAMMA1P_M1_C9 + t * c
|
||||
c = INV_GAMMA1P_M1_C8 + t * c
|
||||
c = INV_GAMMA1P_M1_C7 + t * c
|
||||
c = INV_GAMMA1P_M1_C6 + t * c
|
||||
c = INV_GAMMA1P_M1_C5 + t * c
|
||||
c = INV_GAMMA1P_M1_C4 + t * c
|
||||
c = INV_GAMMA1P_M1_C3 + t * c
|
||||
c = INV_GAMMA1P_M1_C2 + t * c
|
||||
c = INV_GAMMA1P_M1_C1 + t * c
|
||||
c = INV_GAMMA1P_M1_C0 + t * c
|
||||
ret = (if (x > 0.5) t / x * (c - 0.5 - 0.5) else x * c)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
private fun logGamma1p(x: Double): Double {
|
||||
require(x >= -0.5)
|
||||
require(x <= 1.5)
|
||||
return -ln1p(invGamma1pm1(x))
|
||||
}
|
||||
}
|
@ -0,0 +1,70 @@
|
||||
package space.kscience.kmath.stat.internal
|
||||
|
||||
import kotlin.math.ln
|
||||
import kotlin.math.min
|
||||
|
||||
internal object InternalUtils {
|
||||
private val FACTORIALS = longArrayOf(
|
||||
1L, 1L, 2L,
|
||||
6L, 24L, 120L,
|
||||
720L, 5040L, 40320L,
|
||||
362880L, 3628800L, 39916800L,
|
||||
479001600L, 6227020800L, 87178291200L,
|
||||
1307674368000L, 20922789888000L, 355687428096000L,
|
||||
6402373705728000L, 121645100408832000L, 2432902008176640000L
|
||||
)
|
||||
|
||||
private const val BEGIN_LOG_FACTORIALS = 2
|
||||
|
||||
fun factorial(n: Int): Long = FACTORIALS[n]
|
||||
|
||||
fun validateProbabilities(probabilities: DoubleArray?): Double {
|
||||
require(!(probabilities == null || probabilities.isEmpty())) { "Probabilities must not be empty." }
|
||||
|
||||
val sumProb = probabilities.sumByDouble { prob ->
|
||||
require(!(prob < 0 || prob.isInfinite() || prob.isNaN())) { "Invalid probability: $prob" }
|
||||
prob
|
||||
}
|
||||
|
||||
require(!(sumProb.isInfinite() || sumProb <= 0)) { "Invalid sum of probabilities: $sumProb" }
|
||||
return sumProb
|
||||
}
|
||||
|
||||
class FactorialLog private constructor(numValues: Int, cache: DoubleArray?) {
|
||||
private val logFactorials: DoubleArray = DoubleArray(numValues)
|
||||
|
||||
init {
|
||||
val endCopy: Int
|
||||
|
||||
if (cache != null && cache.size > BEGIN_LOG_FACTORIALS) {
|
||||
// Copy available values.
|
||||
endCopy = min(cache.size, numValues)
|
||||
|
||||
cache.copyInto(
|
||||
logFactorials,
|
||||
BEGIN_LOG_FACTORIALS,
|
||||
BEGIN_LOG_FACTORIALS, endCopy
|
||||
)
|
||||
} else
|
||||
// All values to be computed
|
||||
endCopy = BEGIN_LOG_FACTORIALS
|
||||
|
||||
// Compute remaining values.
|
||||
(endCopy until numValues).forEach { i ->
|
||||
if (i < FACTORIALS.size)
|
||||
logFactorials[i] = ln(FACTORIALS[i].toDouble())
|
||||
else
|
||||
logFactorials[i] = logFactorials[i - 1] + ln(i.toDouble())
|
||||
}
|
||||
}
|
||||
|
||||
fun value(n: Int): Double {
|
||||
if (n < logFactorials.size) return logFactorials[n]
|
||||
return if (n < FACTORIALS.size) ln(FACTORIALS[n].toDouble()) else InternalGamma.logGamma(n + 1.0)
|
||||
}
|
||||
|
||||
companion object {
|
||||
fun create(): FactorialLog = FactorialLog(0, null)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,73 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import space.kscience.kmath.stat.internal.InternalUtils
|
||||
import kotlin.math.ln
|
||||
import kotlin.math.pow
|
||||
|
||||
/**
|
||||
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
|
||||
*/
|
||||
public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler<Double> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
||||
// Step 1:
|
||||
var a = 0.0
|
||||
var u = nextDouble()
|
||||
|
||||
// Step 2 and 3:
|
||||
while (u < 0.5) {
|
||||
a += EXPONENTIAL_SA_QI[0]
|
||||
u *= 2.0
|
||||
}
|
||||
|
||||
// Step 4 (now u >= 0.5):
|
||||
u += u - 1
|
||||
// Step 5:
|
||||
if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u)
|
||||
// Step 6:
|
||||
var i = 0 // Should be 1, be we iterate before it in while using 0.
|
||||
var u2 = nextDouble()
|
||||
var umin = u2
|
||||
|
||||
// Step 7 and 8:
|
||||
do {
|
||||
++i
|
||||
u2 = nextDouble()
|
||||
if (u2 < umin) umin = u2
|
||||
// Step 8:
|
||||
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
|
||||
|
||||
mean * (a + umin * EXPONENTIAL_SA_QI[0])
|
||||
}
|
||||
|
||||
override fun toString(): String = "Ahrens-Dieter Exponential deviate"
|
||||
|
||||
public companion object {
|
||||
private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) }
|
||||
|
||||
init {
|
||||
/**
|
||||
* Filling EXPONENTIAL_SA_QI table.
|
||||
* Note that we don't want qi = 0 in the table.
|
||||
*/
|
||||
val ln2 = ln(2.0)
|
||||
var qi = 0.0
|
||||
|
||||
EXPONENTIAL_SA_QI.indices.forEach { i ->
|
||||
qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1)
|
||||
EXPONENTIAL_SA_QI[i] = qi
|
||||
}
|
||||
}
|
||||
|
||||
public fun of(mean: Double): AhrensDieterExponentialSampler {
|
||||
require(mean > 0) { "mean is not strictly positive: $mean" }
|
||||
return AhrensDieterExponentialSampler(mean)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,120 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import space.kscience.kmath.stat.next
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* Sampling from the [gamma distribution](http://mathworld.wolfram.com/GammaDistribution.html).
|
||||
* - For 0 < alpha < 1:
|
||||
* Ahrens, J. H. and Dieter, U., Computer methods for sampling from gamma, beta, Poisson and binomial distributions, Computing, 12, 223-246, 1974.
|
||||
* - For alpha >= 1:
|
||||
* Marsaglia and Tsang, A Simple Method for Generating Gamma Variables. ACM Transactions on Mathematical Software, Volume 26 Issue 3, September, 2000.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
*
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterMarsagliaTsangGammaSampler.html].
|
||||
*/
|
||||
public class AhrensDieterMarsagliaTsangGammaSampler private constructor(
|
||||
alpha: Double,
|
||||
theta: Double
|
||||
) : Sampler<Double> {
|
||||
private val delegate: BaseGammaSampler =
|
||||
if (alpha < 1) AhrensDieterGammaSampler(alpha, theta) else MarsagliaTsangGammaSampler(alpha, theta)
|
||||
|
||||
private abstract class BaseGammaSampler internal constructor(
|
||||
protected val alpha: Double,
|
||||
protected val theta: Double
|
||||
) : Sampler<Double> {
|
||||
init {
|
||||
require(alpha > 0) { "alpha is not strictly positive: $alpha" }
|
||||
require(theta > 0) { "theta is not strictly positive: $theta" }
|
||||
}
|
||||
|
||||
override fun toString(): String = "Ahrens-Dieter-Marsaglia-Tsang Gamma deviate"
|
||||
}
|
||||
|
||||
private class AhrensDieterGammaSampler(alpha: Double, theta: Double) :
|
||||
BaseGammaSampler(alpha, theta) {
|
||||
private val oneOverAlpha: Double = 1.0 / alpha
|
||||
private val bGSOptim: Double = 1.0 + alpha / E
|
||||
|
||||
override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
||||
var x: Double
|
||||
|
||||
// [1]: p. 228, Algorithm GS.
|
||||
while (true) {
|
||||
// Step 1:
|
||||
val u = generator.nextDouble()
|
||||
val p = bGSOptim * u
|
||||
|
||||
if (p <= 1) {
|
||||
// Step 2:
|
||||
x = p.pow(oneOverAlpha)
|
||||
val u2 = generator.nextDouble()
|
||||
|
||||
if (u2 > exp(-x)) // Reject.
|
||||
continue
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
// Step 3:
|
||||
x = -ln((bGSOptim - p) * oneOverAlpha)
|
||||
val u2: Double = generator.nextDouble()
|
||||
if (u2 <= x.pow(alpha - 1.0)) break
|
||||
// Reject and continue.
|
||||
}
|
||||
|
||||
x * theta
|
||||
}
|
||||
}
|
||||
|
||||
private class MarsagliaTsangGammaSampler(alpha: Double, theta: Double) :
|
||||
BaseGammaSampler(alpha, theta) {
|
||||
private val dOptim: Double
|
||||
private val cOptim: Double
|
||||
private val gaussian: NormalizedGaussianSampler
|
||||
|
||||
init {
|
||||
gaussian = ZigguratNormalizedGaussianSampler.of()
|
||||
dOptim = alpha - ONE_THIRD
|
||||
cOptim = ONE_THIRD / sqrt(dOptim)
|
||||
}
|
||||
|
||||
override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
||||
var v: Double
|
||||
|
||||
while (true) {
|
||||
val x = gaussian.next(generator)
|
||||
val oPcTx = 1 + cOptim * x
|
||||
v = oPcTx * oPcTx * oPcTx
|
||||
if (v <= 0) continue
|
||||
val x2 = x * x
|
||||
val u = generator.nextDouble()
|
||||
// Squeeze.
|
||||
if (u < 1 - 0.0331 * x2 * x2) break
|
||||
if (ln(u) < 0.5 * x2 + dOptim * (1 - v + ln(v))) break
|
||||
}
|
||||
|
||||
theta * dOptim * v
|
||||
}
|
||||
|
||||
companion object {
|
||||
private const val ONE_THIRD = 1.0 / 3.0
|
||||
}
|
||||
}
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = delegate.sample(generator)
|
||||
public override fun toString(): String = delegate.toString()
|
||||
|
||||
public companion object {
|
||||
public fun of(
|
||||
alpha: Double,
|
||||
theta: Double
|
||||
): Sampler<Double> = AhrensDieterMarsagliaTsangGammaSampler(alpha, theta)
|
||||
}
|
||||
}
|
@ -0,0 +1,286 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import space.kscience.kmath.stat.internal.InternalUtils
|
||||
import kotlin.math.ceil
|
||||
import kotlin.math.max
|
||||
import kotlin.math.min
|
||||
|
||||
/**
|
||||
* Distribution sampler that uses the Alias method. It can be used to sample from n values each with an associated
|
||||
* probability. This implementation is based on the detailed explanation of the alias method by Keith Schartz and
|
||||
* implements Vose's algorithm.
|
||||
*
|
||||
* Vose, M.D., A linear algorithm for generating random numbers with a given distribution, IEEE Transactions on
|
||||
* Software Engineering, 17, 972-975, 1991. he algorithm will sample values in O(1) time after a pre-processing step
|
||||
* of O(n) time.
|
||||
*
|
||||
* The alias tables are constructed using fraction probabilities with an assumed denominator of 253. In the generic
|
||||
* case sampling uses UniformRandomProvider.nextInt(int) and the upper 53-bits from UniformRandomProvider.nextLong().
|
||||
*
|
||||
* Zero padding the input probabilities can be used to make more sampling more efficient. Any zero entry will always be
|
||||
* aliased removing the requirement to compute a long. Increased sampling speed comes at the cost of increased storage
|
||||
* space. The algorithm requires approximately 12 bytes of storage per input probability, that is n * 12 for size n.
|
||||
* Zero-padding only requires 4 bytes of storage per padded value as the probability is known to be zero.
|
||||
*
|
||||
* An optimisation is performed for small table sizes that are a power of 2. In this case the sampling uses 1 or 2
|
||||
* calls from UniformRandomProvider.nextInt() to generate up to 64-bits for creation of an 11-bit index and 53-bits
|
||||
* for the long. This optimisation requires a generator with a high cycle length for the lower order bits.
|
||||
*
|
||||
* Larger table sizes that are a power of 2 will benefit from fast algorithms for UniformRandomProvider.nextInt(int)
|
||||
* that exploit the power of 2.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AliasMethodDiscreteSampler.html].
|
||||
*/
|
||||
public open class AliasMethodDiscreteSampler private constructor(
|
||||
// Deliberate direct storage of input arrays
|
||||
protected val probability: LongArray,
|
||||
protected val alias: IntArray
|
||||
) : Sampler<Int> {
|
||||
|
||||
private class SmallTableAliasMethodDiscreteSampler(
|
||||
probability: LongArray,
|
||||
alias: IntArray
|
||||
) : AliasMethodDiscreteSampler(probability, alias) {
|
||||
// Assume the table size is a power of 2 and create the mask
|
||||
private val mask: Int = alias.size - 1
|
||||
|
||||
override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
||||
val bits = generator.nextInt()
|
||||
// Isolate lower bits
|
||||
val j = bits and mask
|
||||
|
||||
// Optimisation for zero-padded input tables
|
||||
if (j >= probability.size)
|
||||
// No probability must use the alias
|
||||
return@chain alias[j]
|
||||
|
||||
// Create a uniform random deviate as a long.
|
||||
// This replicates functionality from the o.a.c.rng.core.utils.NumberFactory.makeLong
|
||||
val longBits = generator.nextInt().toLong() shl 32 or (bits.toLong() and hex_ffffffff)
|
||||
// Choose between the two. Use a 53-bit long for the probability.
|
||||
if (longBits ushr 11 < probability[j]) j else alias[j]
|
||||
}
|
||||
|
||||
private companion object {
|
||||
private const val hex_ffffffff = 4294967295L
|
||||
}
|
||||
}
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
||||
// This implements the algorithm as per Vose (1991):
|
||||
// v = uniform() in [0, 1)
|
||||
// j = uniform(n) in [0, n)
|
||||
// if v < prob[j] then
|
||||
// return j
|
||||
// else
|
||||
// return alias[j]
|
||||
val j = generator.nextInt(alias.size)
|
||||
|
||||
// Optimisation for zero-padded input tables
|
||||
// No probability must use the alias
|
||||
if (j >= probability.size) return@chain alias[j]
|
||||
|
||||
// Note: We could check the probability before computing a deviate.
|
||||
// p(j) == 0 => alias[j]
|
||||
// p(j) == 1 => j
|
||||
// However it is assumed these edge cases are rare:
|
||||
//
|
||||
// The probability table will be 1 for approximately 1/n samples, i.e. only the
|
||||
// last unpaired probability. This is only worth checking for when the table size (n)
|
||||
// is small. But in that case the user should zero-pad the table for performance.
|
||||
//
|
||||
// The probability table will be 0 when an input probability was zero. We
|
||||
// will assume this is also rare if modelling a discrete distribution where
|
||||
// all samples are possible. The edge case for zero-padded tables is handled above.
|
||||
|
||||
// Choose between the two. Use a 53-bit long for the probability.
|
||||
if (generator.nextLong() ushr 11 < probability[j]) j else alias[j]
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Alias method"
|
||||
|
||||
public companion object {
|
||||
private const val DEFAULT_ALPHA = 0
|
||||
private const val ZERO = 0.0
|
||||
private const val ONE_AS_NUMERATOR = 1L shl 53
|
||||
private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
|
||||
private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
|
||||
|
||||
public fun of(
|
||||
probabilities: DoubleArray,
|
||||
alpha: Int = DEFAULT_ALPHA
|
||||
): Sampler<Int> {
|
||||
// The Alias method balances N categories with counts around the mean into N sections,
|
||||
// each allocated 'mean' observations.
|
||||
//
|
||||
// Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
|
||||
// 2D array as 4 sections with a height of the mean:
|
||||
//
|
||||
// 6
|
||||
// 6
|
||||
// 6
|
||||
// 63 => 6366 --
|
||||
// 632 6326 |-- mean
|
||||
// 6321 6321 --
|
||||
//
|
||||
// section abcd
|
||||
//
|
||||
// Each section is divided as:
|
||||
// a: 6=1/1
|
||||
// b: 3=1/1
|
||||
// c: 2=2/3; 6=1/3 (6 is the alias)
|
||||
// d: 1=1/3; 6=2/3 (6 is the alias)
|
||||
//
|
||||
// The sample is obtained by randomly selecting a section, then choosing which category
|
||||
// from the pair based on a uniform random deviate.
|
||||
val sumProb = InternalUtils.validateProbabilities(probabilities)
|
||||
// Allow zero-padding
|
||||
val n = computeSize(probabilities.size, alpha)
|
||||
// Partition into small and large by splitting on the average.
|
||||
val mean = sumProb / n
|
||||
// The cardinality of smallSize + largeSize = n.
|
||||
// So fill the same array from either end.
|
||||
val indices = IntArray(n)
|
||||
var large = n
|
||||
var small = 0
|
||||
|
||||
probabilities.indices.forEach { i ->
|
||||
if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i
|
||||
}
|
||||
|
||||
small = fillRemainingIndices(probabilities.size, indices, small)
|
||||
// This may be smaller than the input length if the probabilities were already padded.
|
||||
val nonZeroIndex = findLastNonZeroIndex(probabilities)
|
||||
// The probabilities are modified so use a copy.
|
||||
// Note: probabilities are required only up to last nonZeroIndex
|
||||
val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1)
|
||||
// Allocate the final tables.
|
||||
// Probability table may be truncated (when zero padded).
|
||||
// The alias table is full length.
|
||||
val probability = LongArray(remainingProbabilities.size)
|
||||
val alias = IntArray(n)
|
||||
|
||||
// This loop uses each large in turn to fill the alias table for small probabilities that
|
||||
// do not reach the requirement to fill an entire section alone (i.e. p < mean).
|
||||
// Since the sum of the small should be less than the sum of the large it should use up
|
||||
// all the small first. However floating point round-off can result in
|
||||
// misclassification of items as small or large. The Vose algorithm handles this using
|
||||
// a while loop conditioned on the size of both sets and a subsequent loop to use
|
||||
// unpaired items.
|
||||
while (large != n && small != 0) {
|
||||
// Index of the small and the large probabilities.
|
||||
val j = indices[--small]
|
||||
val k = indices[large++]
|
||||
|
||||
// Optimisation for zero-padded input:
|
||||
// p(j) = 0 above the last nonZeroIndex
|
||||
if (j > nonZeroIndex)
|
||||
// The entire amount for the section is taken from the alias.
|
||||
remainingProbabilities[k] -= mean
|
||||
else {
|
||||
val pj = remainingProbabilities[j]
|
||||
// Item j is a small probability that is below the mean.
|
||||
// Compute the weight of the section for item j: pj / mean.
|
||||
// This is scaled by 2^53 and the ceiling function used to round-up
|
||||
// the probability to a numerator of a fraction in the range [1,2^53].
|
||||
// Ceiling ensures non-zero values.
|
||||
probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong()
|
||||
// The remaining amount for the section is taken from the alias.
|
||||
// Effectively: probabilities[k] -= (mean - pj)
|
||||
remainingProbabilities[k] += pj - mean
|
||||
}
|
||||
|
||||
// If not j then the alias is k
|
||||
alias[j] = k
|
||||
|
||||
// Add the remaining probability from large to the appropriate list.
|
||||
if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k
|
||||
}
|
||||
|
||||
// Final loop conditions to consume unpaired items.
|
||||
// Note: The large set should never be non-empty but this can occur due to round-off
|
||||
// error so consume from both.
|
||||
fillTable(probability, alias, indices, 0, small)
|
||||
fillTable(probability, alias, indices, large, n)
|
||||
|
||||
// Change the algorithm for small power of 2 sized tables
|
||||
return if (isSmallPowerOf2(n))
|
||||
SmallTableAliasMethodDiscreteSampler(probability, alias)
|
||||
else
|
||||
AliasMethodDiscreteSampler(probability, alias)
|
||||
}
|
||||
|
||||
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
|
||||
var updatedSmall = small
|
||||
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
|
||||
return updatedSmall
|
||||
}
|
||||
|
||||
private fun findLastNonZeroIndex(probabilities: DoubleArray): Int {
|
||||
// No bounds check is performed when decrementing as the array contains at least one
|
||||
// value above zero.
|
||||
var nonZeroIndex = probabilities.size - 1
|
||||
while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex--
|
||||
return nonZeroIndex
|
||||
}
|
||||
|
||||
private fun computeSize(length: Int, alpha: Int): Int {
|
||||
// If No padding
|
||||
if (alpha < 0) return length
|
||||
// Use the number of leading zeros function to find the next power of 2,
|
||||
// i.e. ceil(log2(x))
|
||||
var pow2 = 32 - numberOfLeadingZeros(length - 1)
|
||||
// Increase by the alpha. Clip this to limit to a positive integer (2^30)
|
||||
pow2 = min(30, pow2 + alpha)
|
||||
// Use max to handle a length above the highest possible power of 2
|
||||
return max(length, 1 shl pow2)
|
||||
}
|
||||
|
||||
private fun fillTable(
|
||||
probability: LongArray,
|
||||
alias: IntArray,
|
||||
indices: IntArray,
|
||||
start: Int,
|
||||
end: Int
|
||||
) = (start until end).forEach { i ->
|
||||
val index = indices[i]
|
||||
probability[index] = ONE_AS_NUMERATOR
|
||||
alias[index] = index
|
||||
}
|
||||
|
||||
private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0
|
||||
|
||||
private fun numberOfLeadingZeros(i: Int): Int {
|
||||
var mutI = i
|
||||
if (mutI <= 0) return if (mutI == 0) 32 else 0
|
||||
var n = 31
|
||||
|
||||
if (mutI >= 1 shl 16) {
|
||||
n -= 16
|
||||
mutI = mutI ushr 16
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 8) {
|
||||
n -= 8
|
||||
mutI = mutI ushr 8
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 4) {
|
||||
n -= 4
|
||||
mutI = mutI ushr 4
|
||||
}
|
||||
|
||||
if (mutI >= 1 shl 2) {
|
||||
n -= 2
|
||||
mutI = mutI ushr 2
|
||||
}
|
||||
|
||||
return n - (mutI ushr 1)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
|
||||
* distribution.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
|
||||
*/
|
||||
public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
|
||||
private var nextGaussian: Double = Double.NaN
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
||||
val random: Double
|
||||
|
||||
if (nextGaussian.isNaN()) {
|
||||
// Generate a pair of Gaussian numbers.
|
||||
val x = nextDouble()
|
||||
val y = nextDouble()
|
||||
val alpha = 2 * PI * x
|
||||
val r = sqrt(-2 * ln(y))
|
||||
// Return the first element of the generated pair.
|
||||
random = r * cos(alpha)
|
||||
// Keep second element of the pair for next invocation.
|
||||
nextGaussian = r * sin(alpha)
|
||||
} else {
|
||||
// Use the second element of the pair (generated at the
|
||||
// previous invocation).
|
||||
random = nextGaussian
|
||||
// Both elements of the pair have been used.
|
||||
nextGaussian = Double.NaN
|
||||
}
|
||||
|
||||
random
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Box-Muller normalized Gaussian deviate"
|
||||
|
||||
public companion object {
|
||||
public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler()
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.map
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
/**
|
||||
* Sampling from a Gaussian distribution with given mean and standard deviation.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
|
||||
*
|
||||
* @property mean the mean of the distribution.
|
||||
* @property standardDeviation the variance of the distribution.
|
||||
*/
|
||||
public class GaussianSampler private constructor(
|
||||
public val mean: Double,
|
||||
public val standardDeviation: Double,
|
||||
private val normalized: NormalizedGaussianSampler
|
||||
) : Sampler<Double> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = normalized
|
||||
.sample(generator)
|
||||
.map { standardDeviation * it + mean }
|
||||
|
||||
override fun toString(): String = "Gaussian deviate [$normalized]"
|
||||
|
||||
public companion object {
|
||||
public fun of(
|
||||
mean: Double,
|
||||
standardDeviation: Double,
|
||||
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of()
|
||||
): GaussianSampler {
|
||||
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
|
||||
|
||||
return GaussianSampler(
|
||||
mean,
|
||||
standardDeviation,
|
||||
normalized
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.exp
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
|
||||
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
|
||||
*
|
||||
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
|
||||
*
|
||||
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
|
||||
*/
|
||||
public class KempSmallMeanPoissonSampler private constructor(
|
||||
private val p0: Double,
|
||||
private val mean: Double
|
||||
) : Sampler<Int> {
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
||||
// Note on the algorithm:
|
||||
// - X is the unknown sample deviate (the output of the algorithm)
|
||||
// - x is the current value from the distribution
|
||||
// - p is the probability of the current value x, p(X=x)
|
||||
// - u is effectively the cumulative probability that the sample X
|
||||
// is equal or above the current value x, p(X>=x)
|
||||
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
|
||||
var u = nextDouble()
|
||||
var x = 0
|
||||
var p = p0
|
||||
|
||||
while (u > p) {
|
||||
u -= p
|
||||
// Compute the next probability using a recurrence relation.
|
||||
// p(x+1) = p(x) * mean / (x+1)
|
||||
p *= mean / ++x
|
||||
// The algorithm listed in Kemp (1981) does not check that the rolling probability
|
||||
// is positive. This check is added to ensure no errors when the limit of the summation
|
||||
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
|
||||
if (p == 0.0) return@chain x
|
||||
}
|
||||
|
||||
x
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
|
||||
|
||||
public companion object {
|
||||
public fun of(mean: Double): KempSmallMeanPoissonSampler {
|
||||
require(mean > 0) { "Mean is not strictly positive: $mean" }
|
||||
val p0 = exp(-mean)
|
||||
// Probability must be positive. As mean increases then p(0) decreases.
|
||||
require(p0 > 0) { "No probability for mean: $mean" }
|
||||
return KempSmallMeanPoissonSampler(p0, mean)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,130 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.chains.ConstantChain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import space.kscience.kmath.stat.internal.InternalUtils
|
||||
import space.kscience.kmath.stat.next
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For large means, we use the rejection algorithm described in
|
||||
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
|
||||
* Computing vol. 26 pp. 197-207.
|
||||
*
|
||||
* This sampler is suitable for mean >= 40.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
|
||||
*/
|
||||
public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler<Int> {
|
||||
private val exponential: Sampler<Double> = AhrensDieterExponentialSampler.of(1.0)
|
||||
private val gaussian: Sampler<Double> = ZigguratNormalizedGaussianSampler.of()
|
||||
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
|
||||
private val lambda: Double = floor(mean)
|
||||
private val logLambda: Double = ln(lambda)
|
||||
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
|
||||
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
|
||||
private val halfDelta: Double = delta / 2
|
||||
private val twolpd: Double = 2 * lambda + delta
|
||||
private val c1: Double = 1 / (8 * lambda)
|
||||
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
|
||||
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
|
||||
private val aSum: Double = a1 + a2 + 1
|
||||
private val p1: Double = a1 / aSum
|
||||
private val p2: Double = a2 / aSum
|
||||
|
||||
private val smallMeanPoissonSampler: Sampler<Int> = if (mean - lambda < Double.MIN_VALUE)
|
||||
NO_SMALL_MEAN_POISSON_SAMPLER
|
||||
else // Not used.
|
||||
KempSmallMeanPoissonSampler.of(mean - lambda)
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
||||
// This will never be null. It may be a no-op delegate that returns zero.
|
||||
val y2 = smallMeanPoissonSampler.next(generator)
|
||||
var x: Double
|
||||
var y: Double
|
||||
var v: Double
|
||||
var a: Int
|
||||
var t: Double
|
||||
var qr: Double
|
||||
var qa: Double
|
||||
|
||||
while (true) {
|
||||
// Step 1:
|
||||
val u = generator.nextDouble()
|
||||
|
||||
if (u <= p1) {
|
||||
// Step 2:
|
||||
val n = gaussian.next(generator)
|
||||
x = n * sqrt(lambda + halfDelta) - 0.5
|
||||
if (x > delta || x < -lambda) continue
|
||||
y = if (x < 0) floor(x) else ceil(x)
|
||||
val e = exponential.next(generator)
|
||||
v = -e - 0.5 * n * n + c1
|
||||
} else {
|
||||
// Step 3:
|
||||
if (u > p1 + p2) {
|
||||
y = lambda
|
||||
break
|
||||
}
|
||||
|
||||
x = delta + twolpd / delta * exponential.next(generator)
|
||||
y = ceil(x)
|
||||
v = -exponential.next(generator) - delta * (x + 1) / twolpd
|
||||
}
|
||||
|
||||
// The Squeeze Principle
|
||||
// Step 4.1:
|
||||
a = if (x < 0) 1 else 0
|
||||
t = y * (y + 1) / (2 * lambda)
|
||||
|
||||
// Step 4.2
|
||||
if (v < -t && a == 0) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
|
||||
// Step 4.3:
|
||||
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
|
||||
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
|
||||
|
||||
// Step 4.4:
|
||||
if (v < qa) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
|
||||
// Step 4.5:
|
||||
if (v > qr) continue
|
||||
|
||||
// Step 4.6:
|
||||
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
|
||||
y += lambda
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
|
||||
}
|
||||
|
||||
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
|
||||
public override fun toString(): String = "Large Mean Poisson deviate"
|
||||
|
||||
public companion object {
|
||||
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
|
||||
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
|
||||
|
||||
private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = Sampler { ConstantChain(0) }
|
||||
|
||||
public fun of(mean: Double): LargeMeanPoissonSampler {
|
||||
require(mean >= 1) { "mean is not >= 1: $mean" }
|
||||
// The algorithm is not valid if Math.floor(mean) is not an integer.
|
||||
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
|
||||
return LargeMeanPoissonSampler(mean)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.ln
|
||||
import kotlin.math.sqrt
|
||||
|
||||
/**
|
||||
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
|
||||
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
|
||||
* [BoxMullerNormalizedGaussianSampler].
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
|
||||
*/
|
||||
public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
|
||||
private var nextGaussian = Double.NaN
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
||||
if (nextGaussian.isNaN()) {
|
||||
val alpha: Double
|
||||
var x: Double
|
||||
|
||||
// Rejection scheme for selecting a pair that lies within the unit circle.
|
||||
while (true) {
|
||||
// Generate a pair of numbers within [-1 , 1).
|
||||
x = 2.0 * generator.nextDouble() - 1.0
|
||||
val y = 2.0 * generator.nextDouble() - 1.0
|
||||
val r2 = x * x + y * y
|
||||
|
||||
if (r2 < 1 && r2 > 0) {
|
||||
// Pair (x, y) is within unit circle.
|
||||
alpha = sqrt(-2 * ln(r2) / r2)
|
||||
// Keep second element of the pair for next invocation.
|
||||
nextGaussian = alpha * y
|
||||
// Return the first element of the generated pair.
|
||||
break
|
||||
}
|
||||
// Pair is not within the unit circle: Generate another one.
|
||||
}
|
||||
|
||||
// Return the first element of the generated pair.
|
||||
alpha * x
|
||||
} else {
|
||||
// Use the second element of the pair (generated at the
|
||||
// previous invocation).
|
||||
val r = nextGaussian
|
||||
// Both elements of the pair have been used.
|
||||
nextGaussian = Double.NaN
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate"
|
||||
|
||||
public companion object {
|
||||
public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler()
|
||||
}
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
/**
|
||||
* Marker interface for a sampler that generates values from an N(0,1)
|
||||
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
|
||||
*/
|
||||
public interface NormalizedGaussianSampler : Sampler<Double>
|
@ -0,0 +1,30 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
||||
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
|
||||
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
|
||||
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
||||
* - For large means, we use the rejection algorithm described in
|
||||
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
|
||||
*/
|
||||
public class PoissonSampler private constructor(mean: Double) : Sampler<Int> {
|
||||
private val poissonSamplerDelegate: Sampler<Int> = of(mean)
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = poissonSamplerDelegate.sample(generator)
|
||||
public override fun toString(): String = poissonSamplerDelegate.toString()
|
||||
|
||||
public companion object {
|
||||
private const val PIVOT = 40.0
|
||||
|
||||
public fun of(mean: Double): Sampler<Int> =// Each sampler should check the input arguments.
|
||||
if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean)
|
||||
}
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.ceil
|
||||
import kotlin.math.exp
|
||||
|
||||
/**
|
||||
* Sampler for the Poisson distribution.
|
||||
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
||||
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
|
||||
* integer-valued distributions: The Poisson distribution. Addison Wesley.
|
||||
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
||||
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
*
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
|
||||
*/
|
||||
public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler<Int> {
|
||||
private val p0: Double = exp(-mean)
|
||||
|
||||
private val limit: Int = (if (p0 > 0)
|
||||
ceil(1000 * mean)
|
||||
else
|
||||
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt()
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
||||
var n = 0
|
||||
var r = 1.0
|
||||
|
||||
while (n < limit) {
|
||||
r *= nextDouble()
|
||||
if (r >= p0) n++ else break
|
||||
}
|
||||
|
||||
n
|
||||
}
|
||||
|
||||
public override fun toString(): String = "Small Mean Poisson deviate"
|
||||
|
||||
public companion object {
|
||||
public fun of(mean: Double): SmallMeanPoissonSampler {
|
||||
require(mean > 0) { "mean is not strictly positive: $mean" }
|
||||
return SmallMeanPoissonSampler(mean)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,88 @@
|
||||
package space.kscience.kmath.stat.samplers
|
||||
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import space.kscience.kmath.stat.RandomGenerator
|
||||
import space.kscience.kmath.stat.Sampler
|
||||
import space.kscience.kmath.stat.chain
|
||||
import kotlin.math.*
|
||||
|
||||
/**
|
||||
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
|
||||
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
|
||||
* implementation has been adapted from the C code provided therein.
|
||||
*
|
||||
* Based on Commons RNG implementation.
|
||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
|
||||
*/
|
||||
public class ZigguratNormalizedGaussianSampler private constructor() :
|
||||
NormalizedGaussianSampler, Sampler<Double> {
|
||||
|
||||
private fun sampleOne(generator: RandomGenerator): Double {
|
||||
val j = generator.nextLong()
|
||||
val i = (j and LAST.toLong()).toInt()
|
||||
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
|
||||
}
|
||||
|
||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain { sampleOne(this) }
|
||||
public override fun toString(): String = "Ziggurat normalized Gaussian deviate"
|
||||
|
||||
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
|
||||
var x = hz * W[iz]
|
||||
|
||||
return when {
|
||||
iz == 0 -> {
|
||||
var y: Double
|
||||
|
||||
do {
|
||||
y = -ln(generator.nextDouble())
|
||||
x = -ln(generator.nextDouble()) * ONE_OVER_R
|
||||
} while (y + y < x * x)
|
||||
|
||||
val out = R + x
|
||||
if (hz > 0) out else -out
|
||||
}
|
||||
|
||||
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
|
||||
else -> sampleOne(generator)
|
||||
}
|
||||
}
|
||||
|
||||
public companion object {
|
||||
private const val R: Double = 3.442619855899
|
||||
private const val ONE_OVER_R: Double = 1 / R
|
||||
private const val V: Double = 9.91256303526217e-3
|
||||
private val MAX: Double = 2.0.pow(63.0)
|
||||
private val ONE_OVER_MAX: Double = 1.0 / MAX
|
||||
private const val LEN: Int = 128
|
||||
private const val LAST: Int = LEN - 1
|
||||
private val K: LongArray = LongArray(LEN)
|
||||
private val W: DoubleArray = DoubleArray(LEN)
|
||||
private val F: DoubleArray = DoubleArray(LEN)
|
||||
|
||||
init {
|
||||
// Filling the tables.
|
||||
var d = R
|
||||
var t = d
|
||||
var fd = gauss(d)
|
||||
val q = V / fd
|
||||
K[0] = (d / q * MAX).toLong()
|
||||
K[1] = 0
|
||||
W[0] = q * ONE_OVER_MAX
|
||||
W[LAST] = d * ONE_OVER_MAX
|
||||
F[0] = 1.0
|
||||
F[LAST] = fd
|
||||
|
||||
(LAST - 1 downTo 1).forEach { i ->
|
||||
d = sqrt(-2 * ln(V / d + fd))
|
||||
fd = gauss(d)
|
||||
K[i + 1] = (d / t * MAX).toLong()
|
||||
t = d
|
||||
F[i] = fd
|
||||
W[i] = d * ONE_OVER_MAX
|
||||
}
|
||||
}
|
||||
|
||||
public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler()
|
||||
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
|
||||
}
|
||||
}
|
@ -3,10 +3,14 @@ package space.kscience.kmath.stat
|
||||
import org.apache.commons.rng.UniformRandomProvider
|
||||
import org.apache.commons.rng.simple.RandomSource
|
||||
|
||||
public class RandomSourceGenerator(public val source: RandomSource, seed: Long?) : RandomGenerator {
|
||||
internal val random: UniformRandomProvider = seed?.let {
|
||||
RandomSource.create(source, seed)
|
||||
} ?: RandomSource.create(source)
|
||||
/**
|
||||
* Implements [RandomGenerator] by delegating all operations to [RandomSource].
|
||||
*
|
||||
* @property source the underlying [RandomSource] object.
|
||||
*/
|
||||
public class RandomSourceGenerator internal constructor(public val source: RandomSource, seed: Long?) : RandomGenerator {
|
||||
internal val random: UniformRandomProvider = seed?.let { RandomSource.create(source, seed) }
|
||||
?: RandomSource.create(source)
|
||||
|
||||
public override fun nextBoolean(): Boolean = random.nextBoolean()
|
||||
public override fun nextDouble(): Double = random.nextDouble()
|
||||
@ -23,22 +27,84 @@ public class RandomSourceGenerator(public val source: RandomSource, seed: Long?)
|
||||
public override fun fork(): RandomGenerator = RandomSourceGenerator(source, nextLong())
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements [UniformRandomProvider] by delegating all operations to [RandomGenerator].
|
||||
*
|
||||
* @property generator the underlying [RandomGenerator] object.
|
||||
*/
|
||||
public inline class RandomGeneratorProvider(public val generator: RandomGenerator) : UniformRandomProvider {
|
||||
/**
|
||||
* Generates a [Boolean] value.
|
||||
*
|
||||
* @return the next random value.
|
||||
*/
|
||||
public override fun nextBoolean(): Boolean = generator.nextBoolean()
|
||||
|
||||
/**
|
||||
* Generates a [Float] value between 0 and 1.
|
||||
*
|
||||
* @return the next random value between 0 and 1.
|
||||
*/
|
||||
public override fun nextFloat(): Float = generator.nextDouble().toFloat()
|
||||
|
||||
public override fun nextBytes(bytes: ByteArray) {
|
||||
generator.fillBytes(bytes)
|
||||
}
|
||||
/**
|
||||
* Generates [Byte] values and places them into a user-supplied array.
|
||||
*
|
||||
* The number of random bytes produced is equal to the length of the the byte array.
|
||||
*
|
||||
* @param bytes byte array in which to put the random bytes.
|
||||
*/
|
||||
public override fun nextBytes(bytes: ByteArray): Unit = generator.fillBytes(bytes)
|
||||
|
||||
/**
|
||||
* Generates [Byte] values and places them into a user-supplied array.
|
||||
*
|
||||
* The array is filled with bytes extracted from random integers. This implies that the number of random bytes
|
||||
* generated may be larger than the length of the byte array.
|
||||
*
|
||||
* @param bytes the array in which to put the generated bytes.
|
||||
* @param start the index at which to start inserting the generated bytes.
|
||||
* @param len the number of bytes to insert.
|
||||
*/
|
||||
public override fun nextBytes(bytes: ByteArray, start: Int, len: Int) {
|
||||
generator.fillBytes(bytes, start, start + len)
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an [Int] value.
|
||||
*
|
||||
* @return the next random value.
|
||||
*/
|
||||
public override fun nextInt(): Int = generator.nextInt()
|
||||
|
||||
/**
|
||||
* Generates an [Int] value between 0 (inclusive) and the specified value (exclusive).
|
||||
*
|
||||
* @param n the bound on the random number to be returned. Must be positive.
|
||||
* @return a random integer between 0 (inclusive) and [n] (exclusive).
|
||||
*/
|
||||
public override fun nextInt(n: Int): Int = generator.nextInt(n)
|
||||
|
||||
/**
|
||||
* Generates a [Double] value between 0 and 1.
|
||||
*
|
||||
* @return the next random value between 0 and 1.
|
||||
*/
|
||||
public override fun nextDouble(): Double = generator.nextDouble()
|
||||
|
||||
/**
|
||||
* Generates a [Long] value.
|
||||
*
|
||||
* @return the next random value.
|
||||
*/
|
||||
public override fun nextLong(): Long = generator.nextLong()
|
||||
|
||||
/**
|
||||
* Generates a [Long] value between 0 (inclusive) and the specified value (exclusive).
|
||||
*
|
||||
* @param n Bound on the random number to be returned. Must be positive.
|
||||
* @return a random long value between 0 (inclusive) and [n] (exclusive).
|
||||
*/
|
||||
public override fun nextLong(n: Long): Long = generator.nextLong(n)
|
||||
}
|
||||
|
||||
@ -51,8 +117,14 @@ public fun RandomGenerator.asUniformRandomProvider(): UniformRandomProvider = if
|
||||
else
|
||||
RandomGeneratorProvider(this)
|
||||
|
||||
/**
|
||||
* Returns [RandomSourceGenerator] with given [RandomSource] and [seed].
|
||||
*/
|
||||
public fun RandomGenerator.Companion.fromSource(source: RandomSource, seed: Long? = null): RandomSourceGenerator =
|
||||
RandomSourceGenerator(source, seed)
|
||||
|
||||
/**
|
||||
* Returns [RandomSourceGenerator] with [RandomSource.MT] algorithm and given [seed].
|
||||
*/
|
||||
public fun RandomGenerator.Companion.mersenneTwister(seed: Long? = null): RandomSourceGenerator =
|
||||
fromSource(RandomSource.MT, seed)
|
||||
|
@ -1,99 +0,0 @@
|
||||
package space.kscience.kmath.stat
|
||||
|
||||
import org.apache.commons.rng.UniformRandomProvider
|
||||
import org.apache.commons.rng.sampling.distribution.*
|
||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||
import space.kscience.kmath.chains.BlockingIntChain
|
||||
import space.kscience.kmath.chains.Chain
|
||||
import kotlin.math.PI
|
||||
import kotlin.math.exp
|
||||
import kotlin.math.pow
|
||||
import kotlin.math.sqrt
|
||||
|
||||
public abstract class ContinuousSamplerDistribution : Distribution<Double> {
|
||||
private inner class ContinuousSamplerChain(val generator: RandomGenerator) : BlockingDoubleChain() {
|
||||
private val sampler = buildCMSampler(generator)
|
||||
|
||||
override fun nextDouble(): Double = sampler.sample()
|
||||
override fun fork(): Chain<Double> = ContinuousSamplerChain(generator.fork())
|
||||
}
|
||||
|
||||
protected abstract fun buildCMSampler(generator: RandomGenerator): ContinuousSampler
|
||||
|
||||
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = ContinuousSamplerChain(generator)
|
||||
}
|
||||
|
||||
public abstract class DiscreteSamplerDistribution : Distribution<Int> {
|
||||
private inner class ContinuousSamplerChain(val generator: RandomGenerator) : BlockingIntChain() {
|
||||
private val sampler = buildSampler(generator)
|
||||
|
||||
override fun nextInt(): Int = sampler.sample()
|
||||
override fun fork(): Chain<Int> = ContinuousSamplerChain(generator.fork())
|
||||
}
|
||||
|
||||
protected abstract fun buildSampler(generator: RandomGenerator): DiscreteSampler
|
||||
|
||||
public override fun sample(generator: RandomGenerator): BlockingIntChain = ContinuousSamplerChain(generator)
|
||||
}
|
||||
|
||||
public enum class NormalSamplerMethod {
|
||||
BoxMuller,
|
||||
Marsaglia,
|
||||
Ziggurat
|
||||
}
|
||||
|
||||
private fun normalSampler(method: NormalSamplerMethod, provider: UniformRandomProvider): NormalizedGaussianSampler =
|
||||
when (method) {
|
||||
NormalSamplerMethod.BoxMuller -> BoxMullerNormalizedGaussianSampler(provider)
|
||||
NormalSamplerMethod.Marsaglia -> MarsagliaNormalizedGaussianSampler(provider)
|
||||
NormalSamplerMethod.Ziggurat -> ZigguratNormalizedGaussianSampler(provider)
|
||||
}
|
||||
|
||||
public fun Distribution.Companion.normal(
|
||||
method: NormalSamplerMethod = NormalSamplerMethod.Ziggurat,
|
||||
): ContinuousSamplerDistribution = object : ContinuousSamplerDistribution() {
|
||||
override fun buildCMSampler(generator: RandomGenerator): ContinuousSampler {
|
||||
val provider = generator.asUniformRandomProvider()
|
||||
return normalSampler(method, provider)
|
||||
}
|
||||
|
||||
override fun probability(arg: Double): Double = exp(-arg.pow(2) / 2) / sqrt(PI * 2)
|
||||
}
|
||||
|
||||
/**
|
||||
* A univariate normal distribution with given [mean] and [sigma]. [method] defines commons-rng generation method
|
||||
*/
|
||||
public fun Distribution.Companion.normal(
|
||||
mean: Double,
|
||||
sigma: Double,
|
||||
method: NormalSamplerMethod = NormalSamplerMethod.Ziggurat,
|
||||
): ContinuousSamplerDistribution = object : ContinuousSamplerDistribution() {
|
||||
private val sigma2 = sigma.pow(2)
|
||||
private val norm = sigma * sqrt(PI * 2)
|
||||
|
||||
override fun buildCMSampler(generator: RandomGenerator): ContinuousSampler {
|
||||
val provider = generator.asUniformRandomProvider()
|
||||
val normalizedSampler = normalSampler(method, provider)
|
||||
return GaussianSampler(normalizedSampler, mean, sigma)
|
||||
}
|
||||
|
||||
override fun probability(arg: Double): Double = exp(-(arg - mean).pow(2) / 2 / sigma2) / norm
|
||||
}
|
||||
|
||||
public fun Distribution.Companion.poisson(
|
||||
lambda: Double,
|
||||
): DiscreteSamplerDistribution = object : DiscreteSamplerDistribution() {
|
||||
private val computedProb: HashMap<Int, Double> = hashMapOf(0 to exp(-lambda))
|
||||
|
||||
override fun buildSampler(generator: RandomGenerator): DiscreteSampler =
|
||||
PoissonSampler.of(generator.asUniformRandomProvider(), lambda)
|
||||
|
||||
override fun probability(arg: Int): Double {
|
||||
require(arg >= 0) { "The argument must be >= 0" }
|
||||
|
||||
return if (arg > 40)
|
||||
exp(-(arg - lambda).pow(2) / 2 / lambda) / sqrt(2 * PI * lambda)
|
||||
else
|
||||
computedProb.getOrPut(arg) { probability(arg - 1) * lambda / arg }
|
||||
}
|
||||
}
|
@ -5,23 +5,22 @@ import kotlinx.coroutines.flow.toList
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.junit.jupiter.api.Assertions
|
||||
import org.junit.jupiter.api.Test
|
||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
||||
|
||||
internal class CommonsDistributionsTest {
|
||||
@Test
|
||||
fun testNormalDistributionSuspend() {
|
||||
val distribution = Distribution.normal(7.0, 2.0)
|
||||
val distribution = GaussianSampler.of(7.0, 2.0)
|
||||
val generator = RandomGenerator.default(1)
|
||||
val sample = runBlocking {
|
||||
distribution.sample(generator).take(1000).toList()
|
||||
}
|
||||
val sample = runBlocking { distribution.sample(generator).take(1000).toList() }
|
||||
Assertions.assertEquals(7.0, sample.average(), 0.1)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testNormalDistributionBlocking() {
|
||||
val distribution = Distribution.normal(7.0, 2.0)
|
||||
val distribution = GaussianSampler.of(7.0, 2.0)
|
||||
val generator = RandomGenerator.default(1)
|
||||
val sample = distribution.sample(generator).nextBlock(1000)
|
||||
val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) }
|
||||
Assertions.assertEquals(7.0, sample.average(), 0.1)
|
||||
}
|
||||
}
|
||||
|
@ -7,11 +7,8 @@ class SamplerTest {
|
||||
|
||||
@Test
|
||||
fun bufferSamplerTest() {
|
||||
val sampler: Sampler<Double> =
|
||||
BasicSampler { it.chain { nextDouble() } }
|
||||
val sampler = Sampler { it.chain { nextDouble() } }
|
||||
val data = sampler.sampleBuffer(RandomGenerator.default, 100)
|
||||
runBlocking {
|
||||
println(data.next())
|
||||
}
|
||||
runBlocking { println(data.next()) }
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user