Fix Samplers and distribution API
This commit is contained in:
parent
ae911fcc2f
commit
c2bab5d138
@ -7,6 +7,7 @@ import kscience.plotly.models.ScatterMode
|
|||||||
import kscience.plotly.models.TraceValues
|
import kscience.plotly.models.TraceValues
|
||||||
import space.kscience.kmath.commons.optimization.chiSquared
|
import space.kscience.kmath.commons.optimization.chiSquared
|
||||||
import space.kscience.kmath.commons.optimization.minimize
|
import space.kscience.kmath.commons.optimization.minimize
|
||||||
|
import space.kscience.kmath.distributions.NormalDistribution
|
||||||
import space.kscience.kmath.misc.symbol
|
import space.kscience.kmath.misc.symbol
|
||||||
import space.kscience.kmath.optimization.FunctionOptimization
|
import space.kscience.kmath.optimization.FunctionOptimization
|
||||||
import space.kscience.kmath.optimization.OptimizationResult
|
import space.kscience.kmath.optimization.OptimizationResult
|
||||||
@ -14,7 +15,6 @@ import space.kscience.kmath.real.DoubleVector
|
|||||||
import space.kscience.kmath.real.map
|
import space.kscience.kmath.real.map
|
||||||
import space.kscience.kmath.real.step
|
import space.kscience.kmath.real.step
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
|
||||||
import space.kscience.kmath.structures.asIterable
|
import space.kscience.kmath.structures.asIterable
|
||||||
import space.kscience.kmath.structures.toList
|
import space.kscience.kmath.structures.toList
|
||||||
import kotlin.math.pow
|
import kotlin.math.pow
|
||||||
|
@ -3,8 +3,8 @@ package space.kscience.kmath.stat
|
|||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.async
|
import kotlinx.coroutines.async
|
||||||
import kotlinx.coroutines.runBlocking
|
import kotlinx.coroutines.runBlocking
|
||||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
|
||||||
import org.apache.commons.rng.simple.RandomSource
|
import org.apache.commons.rng.simple.RandomSource
|
||||||
|
import space.kscience.kmath.samplers.GaussianSampler
|
||||||
import java.time.Duration
|
import java.time.Duration
|
||||||
import java.time.Instant
|
import java.time.Instant
|
||||||
import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler
|
import org.apache.commons.rng.sampling.distribution.GaussianSampler as CMGaussianSampler
|
||||||
@ -12,8 +12,8 @@ import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSa
|
|||||||
|
|
||||||
private suspend fun runKMathChained(): Duration {
|
private suspend fun runKMathChained(): Duration {
|
||||||
val generator = RandomGenerator.fromSource(RandomSource.MT, 123L)
|
val generator = RandomGenerator.fromSource(RandomSource.MT, 123L)
|
||||||
val normal = GaussianSampler.of(7.0, 2.0)
|
val normal = GaussianSampler(7.0, 2.0)
|
||||||
val chain = normal.sample(generator).blocking()
|
val chain = normal.sample(generator)
|
||||||
val startTime = Instant.now()
|
val startTime = Instant.now()
|
||||||
var sum = 0.0
|
var sum = 0.0
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ package space.kscience.kmath.stat
|
|||||||
import kotlinx.coroutines.runBlocking
|
import kotlinx.coroutines.runBlocking
|
||||||
import space.kscience.kmath.chains.Chain
|
import space.kscience.kmath.chains.Chain
|
||||||
import space.kscience.kmath.chains.collectWithState
|
import space.kscience.kmath.chains.collectWithState
|
||||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
import space.kscience.kmath.distributions.NormalDistribution
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The state of distribution averager.
|
* The state of distribution averager.
|
||||||
|
@ -2,10 +2,10 @@ package space.kscience.kmath.commons.optimization
|
|||||||
|
|
||||||
import kotlinx.coroutines.runBlocking
|
import kotlinx.coroutines.runBlocking
|
||||||
import space.kscience.kmath.commons.expressions.DerivativeStructureExpression
|
import space.kscience.kmath.commons.expressions.DerivativeStructureExpression
|
||||||
|
import space.kscience.kmath.distributions.NormalDistribution
|
||||||
import space.kscience.kmath.misc.symbol
|
import space.kscience.kmath.misc.symbol
|
||||||
import space.kscience.kmath.optimization.FunctionOptimization
|
import space.kscience.kmath.optimization.FunctionOptimization
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
import space.kscience.kmath.stat.distributions.NormalDistribution
|
|
||||||
import kotlin.math.pow
|
import kotlin.math.pow
|
||||||
import kotlin.test.Test
|
import kotlin.test.Test
|
||||||
|
|
||||||
|
@ -0,0 +1,50 @@
|
|||||||
|
package space.kscience.kmath.chains
|
||||||
|
|
||||||
|
import space.kscience.kmath.structures.Buffer
|
||||||
|
|
||||||
|
|
||||||
|
public interface BufferChain<out T> : Chain<T> {
|
||||||
|
public suspend fun nextBuffer(size: Int): Buffer<T>
|
||||||
|
override suspend fun fork(): BufferChain<T>
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A chain with blocking generator that could be used without suspension
|
||||||
|
*/
|
||||||
|
public interface BlockingChain<out T> : Chain<T> {
|
||||||
|
/**
|
||||||
|
* Get the next value without concurrency support. Not guaranteed to be thread safe.
|
||||||
|
*/
|
||||||
|
public fun nextBlocking(): T
|
||||||
|
|
||||||
|
override suspend fun next(): T = nextBlocking()
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingChain<T>
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public interface BlockingBufferChain<out T> : BlockingChain<T>, BufferChain<T> {
|
||||||
|
|
||||||
|
public fun nextBufferBlocking(size: Int): Buffer<T>
|
||||||
|
|
||||||
|
public override fun nextBlocking(): T = nextBufferBlocking(1)[0]
|
||||||
|
|
||||||
|
public override suspend fun nextBuffer(size: Int): Buffer<T> = nextBufferBlocking(size)
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingBufferChain<T>
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public suspend inline fun <reified T : Any> Chain<T>.nextBuffer(size: Int): Buffer<T> = if (this is BufferChain) {
|
||||||
|
nextBuffer(size)
|
||||||
|
} else {
|
||||||
|
Buffer.auto(size) { next() }
|
||||||
|
}
|
||||||
|
|
||||||
|
public inline fun <reified T : Any> BlockingChain<T>.nextBufferBlocking(
|
||||||
|
size: Int,
|
||||||
|
): Buffer<T> = if (this is BlockingBufferChain) {
|
||||||
|
nextBufferBlocking(size)
|
||||||
|
} else {
|
||||||
|
Buffer.auto(size) { nextBlocking() }
|
||||||
|
}
|
@ -1,13 +1,27 @@
|
|||||||
package space.kscience.kmath.chains
|
package space.kscience.kmath.chains
|
||||||
|
|
||||||
|
import space.kscience.kmath.structures.DoubleBuffer
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Chunked, specialized chain for real values.
|
* Chunked, specialized chain for double values, which supports blocking [nextBlocking] operation
|
||||||
*/
|
*/
|
||||||
public interface BlockingDoubleChain : Chain<Double> {
|
public interface BlockingDoubleChain : BlockingBufferChain<Double> {
|
||||||
public override suspend fun next(): Double
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an [DoubleArray] chunk of [size] values of [next].
|
* Returns an [DoubleArray] chunk of [size] values of [next].
|
||||||
*/
|
*/
|
||||||
public suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() }
|
public override fun nextBufferBlocking(size: Int): DoubleBuffer
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingDoubleChain
|
||||||
|
|
||||||
|
public companion object
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public fun BlockingDoubleChain.map(transform: (Double) -> Double): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||||
|
override fun nextBufferBlocking(size: Int): DoubleBuffer {
|
||||||
|
val block = this@map.nextBufferBlocking(size)
|
||||||
|
return DoubleBuffer(size) { transform(block[it]) }
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingDoubleChain = this@map.fork().map(transform)
|
||||||
|
}
|
@ -1,9 +1,12 @@
|
|||||||
package space.kscience.kmath.chains
|
package space.kscience.kmath.chains
|
||||||
|
|
||||||
|
import space.kscience.kmath.structures.IntBuffer
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performance optimized chain for integer values
|
* Performance optimized chain for integer values
|
||||||
*/
|
*/
|
||||||
public interface BlockingIntChain : Chain<Int> {
|
public interface BlockingIntChain : BlockingBufferChain<Int> {
|
||||||
public override suspend fun next(): Int
|
override fun nextBufferBlocking(size: Int): IntBuffer
|
||||||
public suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
|
|
||||||
}
|
override suspend fun fork(): BlockingIntChain
|
||||||
|
}
|
@ -24,20 +24,20 @@ import kotlinx.coroutines.sync.withLock
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* A not-necessary-Markov chain of some type
|
* A not-necessary-Markov chain of some type
|
||||||
* @param R - the chain element type
|
* @param T - the chain element type
|
||||||
*/
|
*/
|
||||||
public interface Chain<out R> : Flow<R> {
|
public interface Chain<out T> : Flow<T> {
|
||||||
/**
|
/**
|
||||||
* Generate next value, changing state if needed
|
* Generate next value, changing state if needed
|
||||||
*/
|
*/
|
||||||
public suspend fun next(): R
|
public suspend fun next(): T
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a copy of current chain state. Consuming resulting chain does not affect initial chain
|
* Create a copy of current chain state. Consuming resulting chain does not affect initial chain
|
||||||
*/
|
*/
|
||||||
public fun fork(): Chain<R>
|
public suspend fun fork(): Chain<T>
|
||||||
|
|
||||||
override suspend fun collect(collector: FlowCollector<R>): Unit =
|
override suspend fun collect(collector: FlowCollector<T>): Unit =
|
||||||
flow { while (true) emit(next()) }.collect(collector)
|
flow { while (true) emit(next()) }.collect(collector)
|
||||||
|
|
||||||
public companion object
|
public companion object
|
||||||
@ -51,7 +51,7 @@ public fun <T> Sequence<T>.asChain(): Chain<T> = iterator().asChain()
|
|||||||
*/
|
*/
|
||||||
public class SimpleChain<out R>(private val gen: suspend () -> R) : Chain<R> {
|
public class SimpleChain<out R>(private val gen: suspend () -> R) : Chain<R> {
|
||||||
public override suspend fun next(): R = gen()
|
public override suspend fun next(): R = gen()
|
||||||
public override fun fork(): Chain<R> = this
|
public override suspend fun fork(): Chain<R> = this
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -69,7 +69,7 @@ public class MarkovChain<out R : Any>(private val seed: suspend () -> R, private
|
|||||||
newValue
|
newValue
|
||||||
}
|
}
|
||||||
|
|
||||||
public override fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
|
public override suspend fun fork(): Chain<R> = MarkovChain(seed = { value ?: seed() }, gen = gen)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -94,7 +94,7 @@ public class StatefulChain<S, out R>(
|
|||||||
newValue
|
newValue
|
||||||
}
|
}
|
||||||
|
|
||||||
public override fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)
|
public override suspend fun fork(): Chain<R> = StatefulChain(forkState(state), seed, forkState, gen)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -102,7 +102,7 @@ public class StatefulChain<S, out R>(
|
|||||||
*/
|
*/
|
||||||
public class ConstantChain<out T>(public val value: T) : Chain<T> {
|
public class ConstantChain<out T>(public val value: T) : Chain<T> {
|
||||||
public override suspend fun next(): T = value
|
public override suspend fun next(): T = value
|
||||||
public override fun fork(): Chain<T> = this
|
public override suspend fun fork(): Chain<T> = this
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -111,7 +111,7 @@ public class ConstantChain<out T>(public val value: T) : Chain<T> {
|
|||||||
*/
|
*/
|
||||||
public fun <T, R> Chain<T>.map(func: suspend (T) -> R): Chain<R> = object : Chain<R> {
|
public fun <T, R> Chain<T>.map(func: suspend (T) -> R): Chain<R> = object : Chain<R> {
|
||||||
override suspend fun next(): R = func(this@map.next())
|
override suspend fun next(): R = func(this@map.next())
|
||||||
override fun fork(): Chain<R> = this@map.fork().map(func)
|
override suspend fun fork(): Chain<R> = this@map.fork().map(func)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -127,7 +127,7 @@ public fun <T> Chain<T>.filter(block: (T) -> Boolean): Chain<T> = object : Chain
|
|||||||
return next
|
return next
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun fork(): Chain<T> = this@filter.fork().filter(block)
|
override suspend fun fork(): Chain<T> = this@filter.fork().filter(block)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -135,7 +135,7 @@ public fun <T> Chain<T>.filter(block: (T) -> Boolean): Chain<T> = object : Chain
|
|||||||
*/
|
*/
|
||||||
public fun <T, R> Chain<T>.collect(mapper: suspend (Chain<T>) -> R): Chain<R> = object : Chain<R> {
|
public fun <T, R> Chain<T>.collect(mapper: suspend (Chain<T>) -> R): Chain<R> = object : Chain<R> {
|
||||||
override suspend fun next(): R = mapper(this@collect)
|
override suspend fun next(): R = mapper(this@collect)
|
||||||
override fun fork(): Chain<R> = this@collect.fork().collect(mapper)
|
override suspend fun fork(): Chain<R> = this@collect.fork().collect(mapper)
|
||||||
}
|
}
|
||||||
|
|
||||||
public fun <T, S, R> Chain<T>.collectWithState(
|
public fun <T, S, R> Chain<T>.collectWithState(
|
||||||
@ -145,7 +145,7 @@ public fun <T, S, R> Chain<T>.collectWithState(
|
|||||||
): Chain<R> = object : Chain<R> {
|
): Chain<R> = object : Chain<R> {
|
||||||
override suspend fun next(): R = state.mapper(this@collectWithState)
|
override suspend fun next(): R = state.mapper(this@collectWithState)
|
||||||
|
|
||||||
override fun fork(): Chain<R> =
|
override suspend fun fork(): Chain<R> =
|
||||||
this@collectWithState.fork().collectWithState(stateFork(state), stateFork, mapper)
|
this@collectWithState.fork().collectWithState(stateFork(state), stateFork, mapper)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -154,5 +154,5 @@ public fun <T, S, R> Chain<T>.collectWithState(
|
|||||||
*/
|
*/
|
||||||
public fun <T, U, R> Chain<T>.zip(other: Chain<U>, block: suspend (T, U) -> R): Chain<R> = object : Chain<R> {
|
public fun <T, U, R> Chain<T>.zip(other: Chain<U>, block: suspend (T, U) -> R): Chain<R> = object : Chain<R> {
|
||||||
override suspend fun next(): R = block(this@zip.next(), other.next())
|
override suspend fun next(): R = block(this@zip.next(), other.next())
|
||||||
override fun fork(): Chain<R> = this@zip.fork().zip(other.fork(), block)
|
override suspend fun fork(): Chain<R> = this@zip.fork().zip(other.fork(), block)
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,6 @@ import space.kscience.kmath.chains.BlockingDoubleChain
|
|||||||
import space.kscience.kmath.structures.Buffer
|
import space.kscience.kmath.structures.Buffer
|
||||||
import space.kscience.kmath.structures.BufferFactory
|
import space.kscience.kmath.structures.BufferFactory
|
||||||
import space.kscience.kmath.structures.DoubleBuffer
|
import space.kscience.kmath.structures.DoubleBuffer
|
||||||
import space.kscience.kmath.structures.asBuffer
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a [Flow] from buffer
|
* Create a [Flow] from buffer
|
||||||
@ -50,7 +49,7 @@ public fun Flow<Double>.chunked(bufferSize: Int): Flow<DoubleBuffer> = flow {
|
|||||||
|
|
||||||
if (this@chunked is BlockingDoubleChain) {
|
if (this@chunked is BlockingDoubleChain) {
|
||||||
// performance optimization for blocking primitive chain
|
// performance optimization for blocking primitive chain
|
||||||
while (true) emit(nextBlock(bufferSize).asBuffer())
|
while (true) emit(nextBufferBlocking(bufferSize))
|
||||||
} else {
|
} else {
|
||||||
val array = DoubleArray(bufferSize)
|
val array = DoubleArray(bufferSize)
|
||||||
var counter = 0
|
var counter = 0
|
||||||
|
@ -2,6 +2,10 @@ plugins {
|
|||||||
id("ru.mipt.npm.gradle.mpp")
|
id("ru.mipt.npm.gradle.mpp")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kscience{
|
||||||
|
useAtomic()
|
||||||
|
}
|
||||||
|
|
||||||
kotlin.sourceSets {
|
kotlin.sourceSets {
|
||||||
commonMain {
|
commonMain {
|
||||||
dependencies {
|
dependencies {
|
||||||
|
@ -0,0 +1,38 @@
|
|||||||
|
package space.kscience.kmath.distributions
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.Chain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.stat.Sampler
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A distribution of typed objects.
|
||||||
|
*/
|
||||||
|
public interface Distribution<T : Any> : Sampler<T> {
|
||||||
|
/**
|
||||||
|
* A probability value for given argument [arg].
|
||||||
|
* For continuous distributions returns PDF
|
||||||
|
*/
|
||||||
|
public fun probability(arg: T): Double
|
||||||
|
|
||||||
|
public override fun sample(generator: RandomGenerator): Chain<T>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An empty companion. Distribution factories should be written as its extensions
|
||||||
|
*/
|
||||||
|
public companion object
|
||||||
|
}
|
||||||
|
|
||||||
|
public interface UnivariateDistribution<T : Comparable<T>> : Distribution<T> {
|
||||||
|
/**
|
||||||
|
* Cumulative distribution for ordered parameter (CDF)
|
||||||
|
*/
|
||||||
|
public fun cumulative(arg: T): Double
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute probability integral in an interval
|
||||||
|
*/
|
||||||
|
public fun <T : Comparable<T>> UnivariateDistribution<T>.integral(from: T, to: T): Double {
|
||||||
|
require(to > from)
|
||||||
|
return cumulative(to) - cumulative(from)
|
||||||
|
}
|
@ -1,7 +1,8 @@
|
|||||||
package space.kscience.kmath.stat
|
package space.kscience.kmath.distributions
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
import space.kscience.kmath.chains.Chain
|
||||||
import space.kscience.kmath.chains.SimpleChain
|
import space.kscience.kmath.chains.SimpleChain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A multivariate distribution which takes a map of parameters
|
* A multivariate distribution which takes a map of parameters
|
@ -1,12 +1,11 @@
|
|||||||
package space.kscience.kmath.stat.distributions
|
package space.kscience.kmath.distributions
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
import space.kscience.kmath.chains.Chain
|
||||||
|
import space.kscience.kmath.internal.InternalErf
|
||||||
|
import space.kscience.kmath.samplers.GaussianSampler
|
||||||
|
import space.kscience.kmath.samplers.NormalizedGaussianSampler
|
||||||
|
import space.kscience.kmath.samplers.ZigguratNormalizedGaussianSampler
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
import space.kscience.kmath.stat.UnivariateDistribution
|
|
||||||
import space.kscience.kmath.stat.internal.InternalErf
|
|
||||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
|
||||||
import space.kscience.kmath.stat.samplers.NormalizedGaussianSampler
|
|
||||||
import space.kscience.kmath.stat.samplers.ZigguratNormalizedGaussianSampler
|
|
||||||
import kotlin.math.*
|
import kotlin.math.*
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -16,8 +15,8 @@ public inline class NormalDistribution(public val sampler: GaussianSampler) : Un
|
|||||||
public constructor(
|
public constructor(
|
||||||
mean: Double,
|
mean: Double,
|
||||||
standardDeviation: Double,
|
standardDeviation: Double,
|
||||||
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of(),
|
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler,
|
||||||
) : this(GaussianSampler.of(mean, standardDeviation, normalized))
|
) : this(GaussianSampler(mean, standardDeviation, normalized))
|
||||||
|
|
||||||
public override fun probability(arg: Double): Double {
|
public override fun probability(arg: Double): Double {
|
||||||
val x1 = (arg - sampler.mean) / sampler.standardDeviation
|
val x1 = (arg - sampler.mean) / sampler.standardDeviation
|
@ -1,4 +1,4 @@
|
|||||||
package space.kscience.kmath.stat.internal
|
package space.kscience.kmath.internal
|
||||||
|
|
||||||
import kotlin.math.abs
|
import kotlin.math.abs
|
||||||
|
|
@ -1,4 +1,4 @@
|
|||||||
package space.kscience.kmath.stat.internal
|
package space.kscience.kmath.internal
|
||||||
|
|
||||||
import kotlin.math.*
|
import kotlin.math.*
|
||||||
|
|
@ -1,4 +1,4 @@
|
|||||||
package space.kscience.kmath.stat.internal
|
package space.kscience.kmath.internal
|
||||||
|
|
||||||
import kotlin.math.ln
|
import kotlin.math.ln
|
||||||
import kotlin.math.min
|
import kotlin.math.min
|
@ -0,0 +1,72 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.stat.Sampler
|
||||||
|
import space.kscience.kmath.structures.DoubleBuffer
|
||||||
|
import kotlin.math.ln
|
||||||
|
import kotlin.math.pow
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
|
||||||
|
*/
|
||||||
|
public class AhrensDieterExponentialSampler(public val mean: Double) : Sampler<Double> {
|
||||||
|
|
||||||
|
init {
|
||||||
|
require(mean > 0) { "mean is not strictly positive: $mean" }
|
||||||
|
}
|
||||||
|
|
||||||
|
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||||
|
override fun nextBlocking(): Double {
|
||||||
|
// Step 1:
|
||||||
|
var a = 0.0
|
||||||
|
var u = generator.nextDouble()
|
||||||
|
|
||||||
|
// Step 2 and 3:
|
||||||
|
while (u < 0.5) {
|
||||||
|
a += EXPONENTIAL_SA_QI[0]
|
||||||
|
u *= 2.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4 (now u >= 0.5):
|
||||||
|
u += u - 1
|
||||||
|
// Step 5:
|
||||||
|
if (u <= EXPONENTIAL_SA_QI[0]) return mean * (a + u)
|
||||||
|
// Step 6:
|
||||||
|
var i = 0 // Should be 1, be we iterate before it in while using 0.
|
||||||
|
var u2 = generator.nextDouble()
|
||||||
|
var umin = u2
|
||||||
|
|
||||||
|
// Step 7 and 8:
|
||||||
|
do {
|
||||||
|
++i
|
||||||
|
u2 = generator.nextDouble()
|
||||||
|
if (u2 < umin) umin = u2
|
||||||
|
// Step 8:
|
||||||
|
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
|
||||||
|
|
||||||
|
return mean * (a + umin * EXPONENTIAL_SA_QI[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() }
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
|
||||||
|
}
|
||||||
|
|
||||||
|
public companion object {
|
||||||
|
private val EXPONENTIAL_SA_QI by lazy {
|
||||||
|
val ln2 = ln(2.0)
|
||||||
|
var qi = 0.0
|
||||||
|
|
||||||
|
DoubleArray(16) { i ->
|
||||||
|
qi += ln2.pow(i + 1.0) / space.kscience.kmath.internal.InternalUtils.factorial(i + 1)
|
||||||
|
qi
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
import space.kscience.kmath.chains.Chain
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
@ -80,7 +80,7 @@ public class AhrensDieterMarsagliaTsangGammaSampler private constructor(
|
|||||||
private val gaussian: NormalizedGaussianSampler
|
private val gaussian: NormalizedGaussianSampler
|
||||||
|
|
||||||
init {
|
init {
|
||||||
gaussian = ZigguratNormalizedGaussianSampler.of()
|
gaussian = ZigguratNormalizedGaussianSampler
|
||||||
dOptim = alpha - ONE_THIRD
|
dOptim = alpha - ONE_THIRD
|
||||||
cOptim = ONE_THIRD / sqrt(dOptim)
|
cOptim = ONE_THIRD / sqrt(dOptim)
|
||||||
}
|
}
|
@ -1,10 +1,10 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
import space.kscience.kmath.chains.Chain
|
||||||
|
import space.kscience.kmath.internal.InternalUtils
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
import space.kscience.kmath.stat.Sampler
|
import space.kscience.kmath.stat.Sampler
|
||||||
import space.kscience.kmath.stat.chain
|
import space.kscience.kmath.stat.chain
|
||||||
import space.kscience.kmath.stat.internal.InternalUtils
|
|
||||||
import kotlin.math.ceil
|
import kotlin.math.ceil
|
||||||
import kotlin.math.max
|
import kotlin.math.max
|
||||||
import kotlin.math.min
|
import kotlin.math.min
|
||||||
@ -39,12 +39,12 @@ import kotlin.math.min
|
|||||||
public open class AliasMethodDiscreteSampler private constructor(
|
public open class AliasMethodDiscreteSampler private constructor(
|
||||||
// Deliberate direct storage of input arrays
|
// Deliberate direct storage of input arrays
|
||||||
protected val probability: LongArray,
|
protected val probability: LongArray,
|
||||||
protected val alias: IntArray
|
protected val alias: IntArray,
|
||||||
) : Sampler<Int> {
|
) : Sampler<Int> {
|
||||||
|
|
||||||
private class SmallTableAliasMethodDiscreteSampler(
|
private class SmallTableAliasMethodDiscreteSampler(
|
||||||
probability: LongArray,
|
probability: LongArray,
|
||||||
alias: IntArray
|
alias: IntArray,
|
||||||
) : AliasMethodDiscreteSampler(probability, alias) {
|
) : AliasMethodDiscreteSampler(probability, alias) {
|
||||||
// Assume the table size is a power of 2 and create the mask
|
// Assume the table size is a power of 2 and create the mask
|
||||||
private val mask: Int = alias.size - 1
|
private val mask: Int = alias.size - 1
|
||||||
@ -111,110 +111,6 @@ public open class AliasMethodDiscreteSampler private constructor(
|
|||||||
private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
|
private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
|
||||||
private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
|
private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
|
||||||
|
|
||||||
public fun of(
|
|
||||||
probabilities: DoubleArray,
|
|
||||||
alpha: Int = DEFAULT_ALPHA
|
|
||||||
): Sampler<Int> {
|
|
||||||
// The Alias method balances N categories with counts around the mean into N sections,
|
|
||||||
// each allocated 'mean' observations.
|
|
||||||
//
|
|
||||||
// Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
|
|
||||||
// 2D array as 4 sections with a height of the mean:
|
|
||||||
//
|
|
||||||
// 6
|
|
||||||
// 6
|
|
||||||
// 6
|
|
||||||
// 63 => 6366 --
|
|
||||||
// 632 6326 |-- mean
|
|
||||||
// 6321 6321 --
|
|
||||||
//
|
|
||||||
// section abcd
|
|
||||||
//
|
|
||||||
// Each section is divided as:
|
|
||||||
// a: 6=1/1
|
|
||||||
// b: 3=1/1
|
|
||||||
// c: 2=2/3; 6=1/3 (6 is the alias)
|
|
||||||
// d: 1=1/3; 6=2/3 (6 is the alias)
|
|
||||||
//
|
|
||||||
// The sample is obtained by randomly selecting a section, then choosing which category
|
|
||||||
// from the pair based on a uniform random deviate.
|
|
||||||
val sumProb = InternalUtils.validateProbabilities(probabilities)
|
|
||||||
// Allow zero-padding
|
|
||||||
val n = computeSize(probabilities.size, alpha)
|
|
||||||
// Partition into small and large by splitting on the average.
|
|
||||||
val mean = sumProb / n
|
|
||||||
// The cardinality of smallSize + largeSize = n.
|
|
||||||
// So fill the same array from either end.
|
|
||||||
val indices = IntArray(n)
|
|
||||||
var large = n
|
|
||||||
var small = 0
|
|
||||||
|
|
||||||
probabilities.indices.forEach { i ->
|
|
||||||
if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i
|
|
||||||
}
|
|
||||||
|
|
||||||
small = fillRemainingIndices(probabilities.size, indices, small)
|
|
||||||
// This may be smaller than the input length if the probabilities were already padded.
|
|
||||||
val nonZeroIndex = findLastNonZeroIndex(probabilities)
|
|
||||||
// The probabilities are modified so use a copy.
|
|
||||||
// Note: probabilities are required only up to last nonZeroIndex
|
|
||||||
val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1)
|
|
||||||
// Allocate the final tables.
|
|
||||||
// Probability table may be truncated (when zero padded).
|
|
||||||
// The alias table is full length.
|
|
||||||
val probability = LongArray(remainingProbabilities.size)
|
|
||||||
val alias = IntArray(n)
|
|
||||||
|
|
||||||
// This loop uses each large in turn to fill the alias table for small probabilities that
|
|
||||||
// do not reach the requirement to fill an entire section alone (i.e. p < mean).
|
|
||||||
// Since the sum of the small should be less than the sum of the large it should use up
|
|
||||||
// all the small first. However floating point round-off can result in
|
|
||||||
// misclassification of items as small or large. The Vose algorithm handles this using
|
|
||||||
// a while loop conditioned on the size of both sets and a subsequent loop to use
|
|
||||||
// unpaired items.
|
|
||||||
while (large != n && small != 0) {
|
|
||||||
// Index of the small and the large probabilities.
|
|
||||||
val j = indices[--small]
|
|
||||||
val k = indices[large++]
|
|
||||||
|
|
||||||
// Optimisation for zero-padded input:
|
|
||||||
// p(j) = 0 above the last nonZeroIndex
|
|
||||||
if (j > nonZeroIndex)
|
|
||||||
// The entire amount for the section is taken from the alias.
|
|
||||||
remainingProbabilities[k] -= mean
|
|
||||||
else {
|
|
||||||
val pj = remainingProbabilities[j]
|
|
||||||
// Item j is a small probability that is below the mean.
|
|
||||||
// Compute the weight of the section for item j: pj / mean.
|
|
||||||
// This is scaled by 2^53 and the ceiling function used to round-up
|
|
||||||
// the probability to a numerator of a fraction in the range [1,2^53].
|
|
||||||
// Ceiling ensures non-zero values.
|
|
||||||
probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong()
|
|
||||||
// The remaining amount for the section is taken from the alias.
|
|
||||||
// Effectively: probabilities[k] -= (mean - pj)
|
|
||||||
remainingProbabilities[k] += pj - mean
|
|
||||||
}
|
|
||||||
|
|
||||||
// If not j then the alias is k
|
|
||||||
alias[j] = k
|
|
||||||
|
|
||||||
// Add the remaining probability from large to the appropriate list.
|
|
||||||
if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k
|
|
||||||
}
|
|
||||||
|
|
||||||
// Final loop conditions to consume unpaired items.
|
|
||||||
// Note: The large set should never be non-empty but this can occur due to round-off
|
|
||||||
// error so consume from both.
|
|
||||||
fillTable(probability, alias, indices, 0, small)
|
|
||||||
fillTable(probability, alias, indices, large, n)
|
|
||||||
|
|
||||||
// Change the algorithm for small power of 2 sized tables
|
|
||||||
return if (isSmallPowerOf2(n))
|
|
||||||
SmallTableAliasMethodDiscreteSampler(probability, alias)
|
|
||||||
else
|
|
||||||
AliasMethodDiscreteSampler(probability, alias)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
|
private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
|
||||||
var updatedSmall = small
|
var updatedSmall = small
|
||||||
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
|
(length until indices.size).forEach { i -> indices[updatedSmall++] = i }
|
||||||
@ -246,7 +142,7 @@ public open class AliasMethodDiscreteSampler private constructor(
|
|||||||
alias: IntArray,
|
alias: IntArray,
|
||||||
indices: IntArray,
|
indices: IntArray,
|
||||||
start: Int,
|
start: Int,
|
||||||
end: Int
|
end: Int,
|
||||||
) = (start until end).forEach { i ->
|
) = (start until end).forEach { i ->
|
||||||
val index = indices[i]
|
val index = indices[i]
|
||||||
probability[index] = ONE_AS_NUMERATOR
|
probability[index] = ONE_AS_NUMERATOR
|
||||||
@ -283,4 +179,110 @@ public open class AliasMethodDiscreteSampler private constructor(
|
|||||||
return n - (mutI ushr 1)
|
return n - (mutI ushr 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Suppress("FunctionName")
|
||||||
|
public fun AliasMethodDiscreteSampler(
|
||||||
|
probabilities: DoubleArray,
|
||||||
|
alpha: Int = DEFAULT_ALPHA,
|
||||||
|
): Sampler<Int> {
|
||||||
|
// The Alias method balances N categories with counts around the mean into N sections,
|
||||||
|
// each allocated 'mean' observations.
|
||||||
|
//
|
||||||
|
// Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
|
||||||
|
// 2D array as 4 sections with a height of the mean:
|
||||||
|
//
|
||||||
|
// 6
|
||||||
|
// 6
|
||||||
|
// 6
|
||||||
|
// 63 => 6366 --
|
||||||
|
// 632 6326 |-- mean
|
||||||
|
// 6321 6321 --
|
||||||
|
//
|
||||||
|
// section abcd
|
||||||
|
//
|
||||||
|
// Each section is divided as:
|
||||||
|
// a: 6=1/1
|
||||||
|
// b: 3=1/1
|
||||||
|
// c: 2=2/3; 6=1/3 (6 is the alias)
|
||||||
|
// d: 1=1/3; 6=2/3 (6 is the alias)
|
||||||
|
//
|
||||||
|
// The sample is obtained by randomly selecting a section, then choosing which category
|
||||||
|
// from the pair based on a uniform random deviate.
|
||||||
|
val sumProb = InternalUtils.validateProbabilities(probabilities)
|
||||||
|
// Allow zero-padding
|
||||||
|
val n = computeSize(probabilities.size, alpha)
|
||||||
|
// Partition into small and large by splitting on the average.
|
||||||
|
val mean = sumProb / n
|
||||||
|
// The cardinality of smallSize + largeSize = n.
|
||||||
|
// So fill the same array from either end.
|
||||||
|
val indices = IntArray(n)
|
||||||
|
var large = n
|
||||||
|
var small = 0
|
||||||
|
|
||||||
|
probabilities.indices.forEach { i ->
|
||||||
|
if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
small = fillRemainingIndices(probabilities.size, indices, small)
|
||||||
|
// This may be smaller than the input length if the probabilities were already padded.
|
||||||
|
val nonZeroIndex = findLastNonZeroIndex(probabilities)
|
||||||
|
// The probabilities are modified so use a copy.
|
||||||
|
// Note: probabilities are required only up to last nonZeroIndex
|
||||||
|
val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1)
|
||||||
|
// Allocate the final tables.
|
||||||
|
// Probability table may be truncated (when zero padded).
|
||||||
|
// The alias table is full length.
|
||||||
|
val probability = LongArray(remainingProbabilities.size)
|
||||||
|
val alias = IntArray(n)
|
||||||
|
|
||||||
|
// This loop uses each large in turn to fill the alias table for small probabilities that
|
||||||
|
// do not reach the requirement to fill an entire section alone (i.e. p < mean).
|
||||||
|
// Since the sum of the small should be less than the sum of the large it should use up
|
||||||
|
// all the small first. However floating point round-off can result in
|
||||||
|
// misclassification of items as small or large. The Vose algorithm handles this using
|
||||||
|
// a while loop conditioned on the size of both sets and a subsequent loop to use
|
||||||
|
// unpaired items.
|
||||||
|
while (large != n && small != 0) {
|
||||||
|
// Index of the small and the large probabilities.
|
||||||
|
val j = indices[--small]
|
||||||
|
val k = indices[large++]
|
||||||
|
|
||||||
|
// Optimisation for zero-padded input:
|
||||||
|
// p(j) = 0 above the last nonZeroIndex
|
||||||
|
if (j > nonZeroIndex)
|
||||||
|
// The entire amount for the section is taken from the alias.
|
||||||
|
remainingProbabilities[k] -= mean
|
||||||
|
else {
|
||||||
|
val pj = remainingProbabilities[j]
|
||||||
|
// Item j is a small probability that is below the mean.
|
||||||
|
// Compute the weight of the section for item j: pj / mean.
|
||||||
|
// This is scaled by 2^53 and the ceiling function used to round-up
|
||||||
|
// the probability to a numerator of a fraction in the range [1,2^53].
|
||||||
|
// Ceiling ensures non-zero values.
|
||||||
|
probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong()
|
||||||
|
// The remaining amount for the section is taken from the alias.
|
||||||
|
// Effectively: probabilities[k] -= (mean - pj)
|
||||||
|
remainingProbabilities[k] += pj - mean
|
||||||
|
}
|
||||||
|
|
||||||
|
// If not j then the alias is k
|
||||||
|
alias[j] = k
|
||||||
|
|
||||||
|
// Add the remaining probability from large to the appropriate list.
|
||||||
|
if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final loop conditions to consume unpaired items.
|
||||||
|
// Note: The large set should never be non-empty but this can occur due to round-off
|
||||||
|
// error so consume from both.
|
||||||
|
fillTable(probability, alias, indices, 0, small)
|
||||||
|
fillTable(probability, alias, indices, large, n)
|
||||||
|
|
||||||
|
// Change the algorithm for small power of 2 sized tables
|
||||||
|
return if (isSmallPowerOf2(n)) {
|
||||||
|
SmallTableAliasMethodDiscreteSampler(probability, alias)
|
||||||
|
} else {
|
||||||
|
AliasMethodDiscreteSampler(probability, alias)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -0,0 +1,52 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.structures.DoubleBuffer
|
||||||
|
import kotlin.math.*
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
|
||||||
|
*/
|
||||||
|
|
||||||
|
public object BoxMullerSampler : NormalizedGaussianSampler {
|
||||||
|
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||||
|
var state = Double.NaN
|
||||||
|
|
||||||
|
override fun nextBufferBlocking(size: Int): DoubleBuffer {
|
||||||
|
val xs = generator.nextDoubleBuffer(size)
|
||||||
|
val ys = generator.nextDoubleBuffer(size)
|
||||||
|
|
||||||
|
return DoubleBuffer(size) { index ->
|
||||||
|
if (state.isNaN()) {
|
||||||
|
// Generate a pair of Gaussian numbers.
|
||||||
|
val x = xs[index]
|
||||||
|
val y = ys[index]
|
||||||
|
val alpha = 2 * PI * x
|
||||||
|
val r = sqrt(-2 * ln(y))
|
||||||
|
|
||||||
|
// Keep second element of the pair for next invocation.
|
||||||
|
state = r * sin(alpha)
|
||||||
|
|
||||||
|
// Return the first element of the generated pair.
|
||||||
|
r * cos(alpha)
|
||||||
|
} else {
|
||||||
|
// Use the second element of the pair (generated at the
|
||||||
|
// previous invocation).
|
||||||
|
state.also {
|
||||||
|
// Both elements of the pair have been used.
|
||||||
|
state = Double.NaN
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingBufferChain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.stat.Sampler
|
||||||
|
import space.kscience.kmath.structures.Buffer
|
||||||
|
|
||||||
|
public class ConstantSampler<T : Any>(public val const: T) : Sampler<T> {
|
||||||
|
override fun sample(generator: RandomGenerator): BlockingBufferChain<T> = object : BlockingBufferChain<T> {
|
||||||
|
override fun nextBufferBlocking(size: Int): Buffer<T> = Buffer.boxing(size) { const }
|
||||||
|
override suspend fun fork(): BlockingBufferChain<T> = this
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,34 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||||
|
import space.kscience.kmath.chains.map
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.stat.Sampler
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sampling from a Gaussian distribution with given mean and standard deviation.
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
|
||||||
|
*
|
||||||
|
* @property mean the mean of the distribution.
|
||||||
|
* @property standardDeviation the variance of the distribution.
|
||||||
|
*/
|
||||||
|
public class GaussianSampler(
|
||||||
|
public val mean: Double,
|
||||||
|
public val standardDeviation: Double,
|
||||||
|
private val normalized: NormalizedGaussianSampler = BoxMullerSampler
|
||||||
|
) : Sampler<Double> {
|
||||||
|
|
||||||
|
init {
|
||||||
|
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
|
||||||
|
}
|
||||||
|
|
||||||
|
public override fun sample(generator: RandomGenerator): BlockingDoubleChain = normalized
|
||||||
|
.sample(generator)
|
||||||
|
.map { standardDeviation * it + mean }
|
||||||
|
|
||||||
|
override fun toString(): String = "N($mean, $standardDeviation)"
|
||||||
|
|
||||||
|
public companion object
|
||||||
|
}
|
@ -0,0 +1,68 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingIntChain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.stat.Sampler
|
||||||
|
import space.kscience.kmath.structures.IntBuffer
|
||||||
|
import kotlin.math.exp
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sampler for the Poisson distribution.
|
||||||
|
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
|
||||||
|
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
|
||||||
|
*
|
||||||
|
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
|
||||||
|
*
|
||||||
|
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
|
||||||
|
*/
|
||||||
|
public class KempSmallMeanPoissonSampler internal constructor(
|
||||||
|
private val p0: Double,
|
||||||
|
private val mean: Double,
|
||||||
|
) : Sampler<Int> {
|
||||||
|
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
|
||||||
|
override fun nextBlocking(): Int {
|
||||||
|
//TODO move to nextBufferBlocking
|
||||||
|
// Note on the algorithm:
|
||||||
|
// - X is the unknown sample deviate (the output of the algorithm)
|
||||||
|
// - x is the current value from the distribution
|
||||||
|
// - p is the probability of the current value x, p(X=x)
|
||||||
|
// - u is effectively the cumulative probability that the sample X
|
||||||
|
// is equal or above the current value x, p(X>=x)
|
||||||
|
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
|
||||||
|
var u = generator.nextDouble()
|
||||||
|
var x = 0
|
||||||
|
var p = p0
|
||||||
|
|
||||||
|
while (u > p) {
|
||||||
|
u -= p
|
||||||
|
// Compute the next probability using a recurrence relation.
|
||||||
|
// p(x+1) = p(x) * mean / (x+1)
|
||||||
|
p *= mean / ++x
|
||||||
|
// The algorithm listed in Kemp (1981) does not check that the rolling probability
|
||||||
|
// is positive. This check is added to ensure no errors when the limit of the summation
|
||||||
|
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
|
||||||
|
if (p == 0.0) return x
|
||||||
|
}
|
||||||
|
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
|
||||||
|
}
|
||||||
|
|
||||||
|
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
|
||||||
|
}
|
||||||
|
|
||||||
|
public fun KempSmallMeanPoissonSampler(mean: Double): KempSmallMeanPoissonSampler {
|
||||||
|
require(mean > 0) { "Mean is not strictly positive: $mean" }
|
||||||
|
val p0 = exp(-mean)
|
||||||
|
// Probability must be positive. As mean increases then p(0) decreases.
|
||||||
|
require(p0 > 0) { "No probability for mean: $mean" }
|
||||||
|
return KempSmallMeanPoissonSampler(p0, mean)
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,61 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.structures.DoubleBuffer
|
||||||
|
import kotlin.math.ln
|
||||||
|
import kotlin.math.sqrt
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
|
||||||
|
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
|
||||||
|
* [BoxMullerNormalizedGaussianSampler].
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
|
||||||
|
*/
|
||||||
|
public object MarsagliaNormalizedGaussianSampler : NormalizedGaussianSampler {
|
||||||
|
|
||||||
|
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||||
|
var nextGaussian = Double.NaN
|
||||||
|
|
||||||
|
override fun nextBlocking(): Double {
|
||||||
|
return if (nextGaussian.isNaN()) {
|
||||||
|
val alpha: Double
|
||||||
|
var x: Double
|
||||||
|
|
||||||
|
// Rejection scheme for selecting a pair that lies within the unit circle.
|
||||||
|
while (true) {
|
||||||
|
// Generate a pair of numbers within [-1 , 1).
|
||||||
|
x = 2.0 * generator.nextDouble() - 1.0
|
||||||
|
val y = 2.0 * generator.nextDouble() - 1.0
|
||||||
|
val r2 = x * x + y * y
|
||||||
|
|
||||||
|
if (r2 < 1 && r2 > 0) {
|
||||||
|
// Pair (x, y) is within unit circle.
|
||||||
|
alpha = sqrt(-2 * ln(r2) / r2)
|
||||||
|
// Keep second element of the pair for next invocation.
|
||||||
|
nextGaussian = alpha * y
|
||||||
|
// Return the first element of the generated pair.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Pair is not within the unit circle: Generate another one.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the first element of the generated pair.
|
||||||
|
alpha * x
|
||||||
|
} else {
|
||||||
|
// Use the second element of the pair (generated at the
|
||||||
|
// previous invocation).
|
||||||
|
val r = nextGaussian
|
||||||
|
// Both elements of the pair have been used.
|
||||||
|
nextGaussian = Double.NaN
|
||||||
|
r
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { nextBlocking() }
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,18 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.stat.Sampler
|
||||||
|
|
||||||
|
public interface BlockingDoubleSampler: Sampler<Double>{
|
||||||
|
override fun sample(generator: RandomGenerator): BlockingDoubleChain
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Marker interface for a sampler that generates values from an N(0,1)
|
||||||
|
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
|
||||||
|
*/
|
||||||
|
public fun interface NormalizedGaussianSampler : BlockingDoubleSampler{
|
||||||
|
public companion object
|
||||||
|
}
|
@ -0,0 +1,203 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingIntChain
|
||||||
|
import space.kscience.kmath.internal.InternalUtils
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.stat.Sampler
|
||||||
|
import space.kscience.kmath.structures.IntBuffer
|
||||||
|
import kotlin.math.*
|
||||||
|
|
||||||
|
|
||||||
|
private const val PIVOT = 40.0
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sampler for the Poisson distribution.
|
||||||
|
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
||||||
|
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
|
||||||
|
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
|
||||||
|
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
||||||
|
* - For large means, we use the rejection algorithm described in
|
||||||
|
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
|
||||||
|
*/
|
||||||
|
@Suppress("FunctionName")
|
||||||
|
public fun PoissonSampler(mean: Double): Sampler<Int> {
|
||||||
|
return if (mean < PIVOT) SmallMeanPoissonSampler(mean) else LargeMeanPoissonSampler(mean)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sampler for the Poisson distribution.
|
||||||
|
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
||||||
|
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
|
||||||
|
* integer-valued distributions: The Poisson distribution. Addison Wesley.
|
||||||
|
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
||||||
|
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
*
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
|
||||||
|
*/
|
||||||
|
public class SmallMeanPoissonSampler(public val mean: Double) : Sampler<Int> {
|
||||||
|
|
||||||
|
init {
|
||||||
|
require(mean > 0) { "mean is not strictly positive: $mean" }
|
||||||
|
}
|
||||||
|
|
||||||
|
private val p0: Double = exp(-mean)
|
||||||
|
|
||||||
|
private val limit: Int = if (p0 > 0) {
|
||||||
|
ceil(1000 * mean)
|
||||||
|
} else {
|
||||||
|
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")
|
||||||
|
}.toInt()
|
||||||
|
|
||||||
|
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
|
||||||
|
override fun nextBlocking(): Int {
|
||||||
|
var n = 0
|
||||||
|
var r = 1.0
|
||||||
|
|
||||||
|
while (n < limit) {
|
||||||
|
r *= generator.nextDouble()
|
||||||
|
if (r >= p0) n++ else break
|
||||||
|
}
|
||||||
|
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
|
||||||
|
}
|
||||||
|
|
||||||
|
public override fun toString(): String = "Small Mean Poisson deviate"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sampler for the Poisson distribution.
|
||||||
|
* - For large means, we use the rejection algorithm described in
|
||||||
|
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
|
||||||
|
* Computing vol. 26 pp. 197-207.
|
||||||
|
*
|
||||||
|
* This sampler is suitable for mean >= 40.
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
|
||||||
|
*/
|
||||||
|
public class LargeMeanPoissonSampler(public val mean: Double) : Sampler<Int> {
|
||||||
|
|
||||||
|
init {
|
||||||
|
require(mean >= 1) { "mean is not >= 1: $mean" }
|
||||||
|
// The algorithm is not valid if Math.floor(mean) is not an integer.
|
||||||
|
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
|
||||||
|
}
|
||||||
|
|
||||||
|
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
|
||||||
|
private val lambda: Double = floor(mean)
|
||||||
|
private val logLambda: Double = ln(lambda)
|
||||||
|
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
|
||||||
|
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
|
||||||
|
private val halfDelta: Double = delta / 2
|
||||||
|
private val twolpd: Double = 2 * lambda + delta
|
||||||
|
private val c1: Double = 1 / (8 * lambda)
|
||||||
|
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
|
||||||
|
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
|
||||||
|
private val aSum: Double = a1 + a2 + 1
|
||||||
|
private val p1: Double = a1 / aSum
|
||||||
|
private val p2: Double = a2 / aSum
|
||||||
|
|
||||||
|
public override fun sample(generator: RandomGenerator): BlockingIntChain = object : BlockingIntChain {
|
||||||
|
override fun nextBlocking(): Int {
|
||||||
|
val exponential = AhrensDieterExponentialSampler(1.0).sample(generator)
|
||||||
|
val gaussian = ZigguratNormalizedGaussianSampler.sample(generator)
|
||||||
|
|
||||||
|
val smallMeanPoissonSampler = if (mean - lambda < Double.MIN_VALUE) {
|
||||||
|
null
|
||||||
|
} else {
|
||||||
|
KempSmallMeanPoissonSampler(mean - lambda).sample(generator)
|
||||||
|
}
|
||||||
|
|
||||||
|
val y2 = smallMeanPoissonSampler?.nextBlocking() ?: 0
|
||||||
|
var x: Double
|
||||||
|
var y: Double
|
||||||
|
var v: Double
|
||||||
|
var a: Int
|
||||||
|
var t: Double
|
||||||
|
var qr: Double
|
||||||
|
var qa: Double
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
// Step 1:
|
||||||
|
val u = generator.nextDouble()
|
||||||
|
|
||||||
|
if (u <= p1) {
|
||||||
|
// Step 2:
|
||||||
|
val n = gaussian.nextBlocking()
|
||||||
|
x = n * sqrt(lambda + halfDelta) - 0.5
|
||||||
|
if (x > delta || x < -lambda) continue
|
||||||
|
y = if (x < 0) floor(x) else ceil(x)
|
||||||
|
val e = exponential.nextBlocking()
|
||||||
|
v = -e - 0.5 * n * n + c1
|
||||||
|
} else {
|
||||||
|
// Step 3:
|
||||||
|
if (u > p1 + p2) {
|
||||||
|
y = lambda
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
x = delta + twolpd / delta * exponential.nextBlocking()
|
||||||
|
y = ceil(x)
|
||||||
|
v = -exponential.nextBlocking() - delta * (x + 1) / twolpd
|
||||||
|
}
|
||||||
|
|
||||||
|
// The Squeeze Principle
|
||||||
|
// Step 4.1:
|
||||||
|
a = if (x < 0) 1 else 0
|
||||||
|
t = y * (y + 1) / (2 * lambda)
|
||||||
|
|
||||||
|
// Step 4.2
|
||||||
|
if (v < -t && a == 0) {
|
||||||
|
y += lambda
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4.3:
|
||||||
|
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
|
||||||
|
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
|
||||||
|
|
||||||
|
// Step 4.4:
|
||||||
|
if (v < qa) {
|
||||||
|
y += lambda
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4.5:
|
||||||
|
if (v > qr) continue
|
||||||
|
|
||||||
|
// Step 4.6:
|
||||||
|
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
|
||||||
|
y += lambda
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun nextBufferBlocking(size: Int): IntBuffer = IntBuffer(size) { nextBlocking() }
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingIntChain = sample(generator.fork())
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
|
||||||
|
public override fun toString(): String = "Large Mean Poisson deviate"
|
||||||
|
|
||||||
|
public companion object {
|
||||||
|
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
|
||||||
|
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,88 @@
|
|||||||
|
package space.kscience.kmath.samplers
|
||||||
|
|
||||||
|
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||||
|
import space.kscience.kmath.stat.RandomGenerator
|
||||||
|
import space.kscience.kmath.structures.DoubleBuffer
|
||||||
|
import kotlin.math.*
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
|
||||||
|
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
|
||||||
|
* implementation has been adapted from the C code provided therein.
|
||||||
|
*
|
||||||
|
* Based on Commons RNG implementation.
|
||||||
|
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
|
||||||
|
*/
|
||||||
|
public object ZigguratNormalizedGaussianSampler : NormalizedGaussianSampler {
|
||||||
|
|
||||||
|
private const val R: Double = 3.442619855899
|
||||||
|
private const val ONE_OVER_R: Double = 1 / R
|
||||||
|
private const val V: Double = 9.91256303526217e-3
|
||||||
|
private val MAX: Double = 2.0.pow(63.0)
|
||||||
|
private val ONE_OVER_MAX: Double = 1.0 / MAX
|
||||||
|
private const val LEN: Int = 128
|
||||||
|
private const val LAST: Int = LEN - 1
|
||||||
|
private val K: LongArray = LongArray(LEN)
|
||||||
|
private val W: DoubleArray = DoubleArray(LEN)
|
||||||
|
private val F: DoubleArray = DoubleArray(LEN)
|
||||||
|
|
||||||
|
init {
|
||||||
|
// Filling the tables.
|
||||||
|
var d = R
|
||||||
|
var t = d
|
||||||
|
var fd = gauss(d)
|
||||||
|
val q = V / fd
|
||||||
|
K[0] = (d / q * MAX).toLong()
|
||||||
|
K[1] = 0
|
||||||
|
W[0] = q * ONE_OVER_MAX
|
||||||
|
W[LAST] = d * ONE_OVER_MAX
|
||||||
|
F[0] = 1.0
|
||||||
|
F[LAST] = fd
|
||||||
|
|
||||||
|
(LAST - 1 downTo 1).forEach { i ->
|
||||||
|
d = sqrt(-2 * ln(V / d + fd))
|
||||||
|
fd = gauss(d)
|
||||||
|
K[i + 1] = (d / t * MAX).toLong()
|
||||||
|
t = d
|
||||||
|
F[i] = fd
|
||||||
|
W[i] = d * ONE_OVER_MAX
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
|
||||||
|
|
||||||
|
private fun sampleOne(generator: RandomGenerator): Double {
|
||||||
|
val j = generator.nextLong()
|
||||||
|
val i = (j and LAST.toLong()).toInt()
|
||||||
|
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun sample(generator: RandomGenerator): BlockingDoubleChain = object : BlockingDoubleChain {
|
||||||
|
override fun nextBufferBlocking(size: Int): DoubleBuffer = DoubleBuffer(size) { sampleOne(generator) }
|
||||||
|
|
||||||
|
override suspend fun fork(): BlockingDoubleChain = sample(generator.fork())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
|
||||||
|
var x = hz * W[iz]
|
||||||
|
|
||||||
|
return when {
|
||||||
|
iz == 0 -> {
|
||||||
|
var y: Double
|
||||||
|
|
||||||
|
do {
|
||||||
|
y = -ln(generator.nextDouble())
|
||||||
|
x = -ln(generator.nextDouble()) * ONE_OVER_R
|
||||||
|
} while (y + y < x * x)
|
||||||
|
|
||||||
|
val out = R + x
|
||||||
|
if (hz > 0) out else -out
|
||||||
|
}
|
||||||
|
|
||||||
|
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
|
||||||
|
else -> sampleOne(generator)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,8 +1,8 @@
|
|||||||
package space.kscience.kmath.stat
|
package space.kscience.kmath.stat
|
||||||
|
|
||||||
import space.kscience.kmath.chains.BlockingDoubleChain
|
import space.kscience.kmath.chains.BlockingDoubleChain
|
||||||
import space.kscience.kmath.chains.BlockingIntChain
|
|
||||||
import space.kscience.kmath.chains.Chain
|
import space.kscience.kmath.chains.Chain
|
||||||
|
import space.kscience.kmath.structures.DoubleBuffer
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A possibly stateful chain producing random values.
|
* A possibly stateful chain producing random values.
|
||||||
@ -11,12 +11,24 @@ import space.kscience.kmath.chains.Chain
|
|||||||
*/
|
*/
|
||||||
public class RandomChain<out R>(
|
public class RandomChain<out R>(
|
||||||
public val generator: RandomGenerator,
|
public val generator: RandomGenerator,
|
||||||
private val gen: suspend RandomGenerator.() -> R
|
private val gen: suspend RandomGenerator.() -> R,
|
||||||
) : Chain<R> {
|
) : Chain<R> {
|
||||||
override suspend fun next(): R = generator.gen()
|
override suspend fun next(): R = generator.gen()
|
||||||
override fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
|
override suspend fun fork(): Chain<R> = RandomChain(generator.fork(), gen)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a generic random chain with provided [generator]
|
||||||
|
*/
|
||||||
|
public fun <R> RandomGenerator.chain(generator: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, generator)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A type-specific double chunk random chain
|
||||||
|
*/
|
||||||
|
public class UniformDoubleChain(public val generator: RandomGenerator) : BlockingDoubleChain {
|
||||||
|
public override fun nextBufferBlocking(size: Int): DoubleBuffer = generator.nextDoubleBuffer(size)
|
||||||
|
override suspend fun nextBuffer(size: Int): DoubleBuffer = nextBufferBlocking(size)
|
||||||
|
|
||||||
|
override suspend fun fork(): UniformDoubleChain = UniformDoubleChain(generator.fork())
|
||||||
}
|
}
|
||||||
|
|
||||||
public fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
|
|
||||||
public fun Chain<Double>.blocking(): BlockingDoubleChain = object : Chain<Double> by this, BlockingDoubleChain {}
|
|
||||||
public fun Chain<Int>.blocking(): BlockingIntChain = object : Chain<Int> by this, BlockingIntChain {}
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
package space.kscience.kmath.stat
|
package space.kscience.kmath.stat
|
||||||
|
|
||||||
|
import space.kscience.kmath.structures.DoubleBuffer
|
||||||
import kotlin.random.Random
|
import kotlin.random.Random
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -16,6 +17,11 @@ public interface RandomGenerator {
|
|||||||
*/
|
*/
|
||||||
public fun nextDouble(): Double
|
public fun nextDouble(): Double
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A chunk of doubles of given [size]
|
||||||
|
*/
|
||||||
|
public fun nextDoubleBuffer(size: Int): DoubleBuffer = DoubleBuffer(size) { nextDouble() }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the next random `Int` from the random number generator.
|
* Gets the next random `Int` from the random number generator.
|
||||||
*
|
*
|
||||||
|
@ -3,16 +3,13 @@ package space.kscience.kmath.stat
|
|||||||
import kotlinx.coroutines.flow.first
|
import kotlinx.coroutines.flow.first
|
||||||
import space.kscience.kmath.chains.Chain
|
import space.kscience.kmath.chains.Chain
|
||||||
import space.kscience.kmath.chains.collect
|
import space.kscience.kmath.chains.collect
|
||||||
import space.kscience.kmath.structures.Buffer
|
import space.kscience.kmath.structures.*
|
||||||
import space.kscience.kmath.structures.BufferFactory
|
|
||||||
import space.kscience.kmath.structures.IntBuffer
|
|
||||||
import space.kscience.kmath.structures.MutableBuffer
|
|
||||||
import kotlin.jvm.JvmName
|
import kotlin.jvm.JvmName
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sampler that generates chains of values of type [T].
|
* Sampler that generates chains of values of type [T] in a chain of type [C].
|
||||||
*/
|
*/
|
||||||
public fun interface Sampler<T : Any> {
|
public fun interface Sampler<out T : Any> {
|
||||||
/**
|
/**
|
||||||
* Generates a chain of samples.
|
* Generates a chain of samples.
|
||||||
*
|
*
|
||||||
@ -22,39 +19,6 @@ public fun interface Sampler<T : Any> {
|
|||||||
public fun sample(generator: RandomGenerator): Chain<T>
|
public fun sample(generator: RandomGenerator): Chain<T>
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* A distribution of typed objects.
|
|
||||||
*/
|
|
||||||
public interface Distribution<T : Any> : Sampler<T> {
|
|
||||||
/**
|
|
||||||
* A probability value for given argument [arg].
|
|
||||||
* For continuous distributions returns PDF
|
|
||||||
*/
|
|
||||||
public fun probability(arg: T): Double
|
|
||||||
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<T>
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An empty companion. Distribution factories should be written as its extensions
|
|
||||||
*/
|
|
||||||
public companion object
|
|
||||||
}
|
|
||||||
|
|
||||||
public interface UnivariateDistribution<T : Comparable<T>> : Distribution<T> {
|
|
||||||
/**
|
|
||||||
* Cumulative distribution for ordered parameter (CDF)
|
|
||||||
*/
|
|
||||||
public fun cumulative(arg: T): Double
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute probability integral in an interval
|
|
||||||
*/
|
|
||||||
public fun <T : Comparable<T>> UnivariateDistribution<T>.integral(from: T, to: T): Double {
|
|
||||||
require(to > from)
|
|
||||||
return cumulative(to) - cumulative(from)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sample a bunch of values
|
* Sample a bunch of values
|
||||||
*/
|
*/
|
||||||
@ -71,7 +35,7 @@ public fun <T : Any> Sampler<T>.sampleBuffer(
|
|||||||
//clear list from previous run
|
//clear list from previous run
|
||||||
tmp.clear()
|
tmp.clear()
|
||||||
//Fill list
|
//Fill list
|
||||||
repeat(size) { tmp += chain.next() }
|
repeat(size) { tmp.add(chain.next()) }
|
||||||
//return new buffer with elements from tmp
|
//return new buffer with elements from tmp
|
||||||
bufferFactory(size) { tmp[it] }
|
bufferFactory(size) { tmp[it] }
|
||||||
}
|
}
|
||||||
@ -87,7 +51,7 @@ public suspend fun <T : Any> Sampler<T>.next(generator: RandomGenerator): T = sa
|
|||||||
*/
|
*/
|
||||||
@JvmName("sampleRealBuffer")
|
@JvmName("sampleRealBuffer")
|
||||||
public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> =
|
public fun Sampler<Double>.sampleBuffer(generator: RandomGenerator, size: Int): Chain<Buffer<Double>> =
|
||||||
sampleBuffer(generator, size, MutableBuffer.Companion::double)
|
sampleBuffer(generator, size, ::DoubleBuffer)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates [size] integer samples and chunks them into some buffers.
|
* Generates [size] integer samples and chunks them into some buffers.
|
@ -81,7 +81,7 @@ public class Mean<T>(
|
|||||||
|
|
||||||
public companion object {
|
public companion object {
|
||||||
//TODO replace with optimized version which respects overflow
|
//TODO replace with optimized version which respects overflow
|
||||||
public val real: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count }
|
public val double: Mean<Double> = Mean(DoubleField) { sum, count -> sum / count }
|
||||||
public val int: Mean<Int> = Mean(IntRing) { sum, count -> sum / count }
|
public val int: Mean<Int> = Mean(IntRing) { sum, count -> sum / count }
|
||||||
public val long: Mean<Long> = Mean(LongRing) { sum, count -> sum / count }
|
public val long: Mean<Long> = Mean(LongRing) { sum, count -> sum / count }
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,8 @@ package space.kscience.kmath.stat
|
|||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
import space.kscience.kmath.chains.Chain
|
||||||
import space.kscience.kmath.chains.SimpleChain
|
import space.kscience.kmath.chains.SimpleChain
|
||||||
|
import space.kscience.kmath.distributions.Distribution
|
||||||
|
import space.kscience.kmath.distributions.UnivariateDistribution
|
||||||
|
|
||||||
public class UniformDistribution(public val range: ClosedFloatingPointRange<Double>) : UnivariateDistribution<Double> {
|
public class UniformDistribution(public val range: ClosedFloatingPointRange<Double>) : UnivariateDistribution<Double> {
|
||||||
private val length: Double = range.endInclusive - range.start
|
private val length: Double = range.endInclusive - range.start
|
||||||
|
@ -1,73 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
import space.kscience.kmath.stat.chain
|
|
||||||
import space.kscience.kmath.stat.internal.InternalUtils
|
|
||||||
import kotlin.math.ln
|
|
||||||
import kotlin.math.pow
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html].
|
|
||||||
*/
|
|
||||||
public class AhrensDieterExponentialSampler private constructor(public val mean: Double) : Sampler<Double> {
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
|
||||||
// Step 1:
|
|
||||||
var a = 0.0
|
|
||||||
var u = nextDouble()
|
|
||||||
|
|
||||||
// Step 2 and 3:
|
|
||||||
while (u < 0.5) {
|
|
||||||
a += EXPONENTIAL_SA_QI[0]
|
|
||||||
u *= 2.0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 4 (now u >= 0.5):
|
|
||||||
u += u - 1
|
|
||||||
// Step 5:
|
|
||||||
if (u <= EXPONENTIAL_SA_QI[0]) return@chain mean * (a + u)
|
|
||||||
// Step 6:
|
|
||||||
var i = 0 // Should be 1, be we iterate before it in while using 0.
|
|
||||||
var u2 = nextDouble()
|
|
||||||
var umin = u2
|
|
||||||
|
|
||||||
// Step 7 and 8:
|
|
||||||
do {
|
|
||||||
++i
|
|
||||||
u2 = nextDouble()
|
|
||||||
if (u2 < umin) umin = u2
|
|
||||||
// Step 8:
|
|
||||||
} while (u > EXPONENTIAL_SA_QI[i]) // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1.
|
|
||||||
|
|
||||||
mean * (a + umin * EXPONENTIAL_SA_QI[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun toString(): String = "Ahrens-Dieter Exponential deviate"
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
private val EXPONENTIAL_SA_QI by lazy { DoubleArray(16) }
|
|
||||||
|
|
||||||
init {
|
|
||||||
/**
|
|
||||||
* Filling EXPONENTIAL_SA_QI table.
|
|
||||||
* Note that we don't want qi = 0 in the table.
|
|
||||||
*/
|
|
||||||
val ln2 = ln(2.0)
|
|
||||||
var qi = 0.0
|
|
||||||
|
|
||||||
EXPONENTIAL_SA_QI.indices.forEach { i ->
|
|
||||||
qi += ln2.pow(i + 1.0) / InternalUtils.factorial(i + 1)
|
|
||||||
EXPONENTIAL_SA_QI[i] = qi
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public fun of(mean: Double): AhrensDieterExponentialSampler {
|
|
||||||
require(mean > 0) { "mean is not strictly positive: $mean" }
|
|
||||||
return AhrensDieterExponentialSampler(mean)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,48 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
import space.kscience.kmath.stat.chain
|
|
||||||
import kotlin.math.*
|
|
||||||
|
|
||||||
/**
|
|
||||||
* [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
|
|
||||||
* distribution.
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html].
|
|
||||||
*/
|
|
||||||
public class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
|
|
||||||
private var nextGaussian: Double = Double.NaN
|
|
||||||
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
|
||||||
val random: Double
|
|
||||||
|
|
||||||
if (nextGaussian.isNaN()) {
|
|
||||||
// Generate a pair of Gaussian numbers.
|
|
||||||
val x = nextDouble()
|
|
||||||
val y = nextDouble()
|
|
||||||
val alpha = 2 * PI * x
|
|
||||||
val r = sqrt(-2 * ln(y))
|
|
||||||
// Return the first element of the generated pair.
|
|
||||||
random = r * cos(alpha)
|
|
||||||
// Keep second element of the pair for next invocation.
|
|
||||||
nextGaussian = r * sin(alpha)
|
|
||||||
} else {
|
|
||||||
// Use the second element of the pair (generated at the
|
|
||||||
// previous invocation).
|
|
||||||
random = nextGaussian
|
|
||||||
// Both elements of the pair have been used.
|
|
||||||
nextGaussian = Double.NaN
|
|
||||||
}
|
|
||||||
|
|
||||||
random
|
|
||||||
}
|
|
||||||
|
|
||||||
public override fun toString(): String = "Box-Muller normalized Gaussian deviate"
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
public fun of(): BoxMullerNormalizedGaussianSampler = BoxMullerNormalizedGaussianSampler()
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,43 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.chains.map
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sampling from a Gaussian distribution with given mean and standard deviation.
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html].
|
|
||||||
*
|
|
||||||
* @property mean the mean of the distribution.
|
|
||||||
* @property standardDeviation the variance of the distribution.
|
|
||||||
*/
|
|
||||||
public class GaussianSampler private constructor(
|
|
||||||
public val mean: Double,
|
|
||||||
public val standardDeviation: Double,
|
|
||||||
private val normalized: NormalizedGaussianSampler
|
|
||||||
) : Sampler<Double> {
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Double> = normalized
|
|
||||||
.sample(generator)
|
|
||||||
.map { standardDeviation * it + mean }
|
|
||||||
|
|
||||||
override fun toString(): String = "Gaussian deviate [$normalized]"
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
public fun of(
|
|
||||||
mean: Double,
|
|
||||||
standardDeviation: Double,
|
|
||||||
normalized: NormalizedGaussianSampler = ZigguratNormalizedGaussianSampler.of()
|
|
||||||
): GaussianSampler {
|
|
||||||
require(standardDeviation > 0.0) { "standard deviation is not strictly positive: $standardDeviation" }
|
|
||||||
|
|
||||||
return GaussianSampler(
|
|
||||||
mean,
|
|
||||||
standardDeviation,
|
|
||||||
normalized
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,63 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
import space.kscience.kmath.stat.chain
|
|
||||||
import kotlin.math.exp
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sampler for the Poisson distribution.
|
|
||||||
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
|
|
||||||
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
|
|
||||||
*
|
|
||||||
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
|
|
||||||
*
|
|
||||||
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html].
|
|
||||||
*/
|
|
||||||
public class KempSmallMeanPoissonSampler private constructor(
|
|
||||||
private val p0: Double,
|
|
||||||
private val mean: Double
|
|
||||||
) : Sampler<Int> {
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
|
||||||
// Note on the algorithm:
|
|
||||||
// - X is the unknown sample deviate (the output of the algorithm)
|
|
||||||
// - x is the current value from the distribution
|
|
||||||
// - p is the probability of the current value x, p(X=x)
|
|
||||||
// - u is effectively the cumulative probability that the sample X
|
|
||||||
// is equal or above the current value x, p(X>=x)
|
|
||||||
// So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
|
|
||||||
var u = nextDouble()
|
|
||||||
var x = 0
|
|
||||||
var p = p0
|
|
||||||
|
|
||||||
while (u > p) {
|
|
||||||
u -= p
|
|
||||||
// Compute the next probability using a recurrence relation.
|
|
||||||
// p(x+1) = p(x) * mean / (x+1)
|
|
||||||
p *= mean / ++x
|
|
||||||
// The algorithm listed in Kemp (1981) does not check that the rolling probability
|
|
||||||
// is positive. This check is added to ensure no errors when the limit of the summation
|
|
||||||
// 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic.
|
|
||||||
if (p == 0.0) return@chain x
|
|
||||||
}
|
|
||||||
|
|
||||||
x
|
|
||||||
}
|
|
||||||
|
|
||||||
public override fun toString(): String = "Kemp Small Mean Poisson deviate"
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
public fun of(mean: Double): KempSmallMeanPoissonSampler {
|
|
||||||
require(mean > 0) { "Mean is not strictly positive: $mean" }
|
|
||||||
val p0 = exp(-mean)
|
|
||||||
// Probability must be positive. As mean increases then p(0) decreases.
|
|
||||||
require(p0 > 0) { "No probability for mean: $mean" }
|
|
||||||
return KempSmallMeanPoissonSampler(p0, mean)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,130 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.chains.ConstantChain
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
import space.kscience.kmath.stat.chain
|
|
||||||
import space.kscience.kmath.stat.internal.InternalUtils
|
|
||||||
import space.kscience.kmath.stat.next
|
|
||||||
import kotlin.math.*
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sampler for the Poisson distribution.
|
|
||||||
* - For large means, we use the rejection algorithm described in
|
|
||||||
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
|
|
||||||
* Computing vol. 26 pp. 197-207.
|
|
||||||
*
|
|
||||||
* This sampler is suitable for mean >= 40.
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html].
|
|
||||||
*/
|
|
||||||
public class LargeMeanPoissonSampler private constructor(public val mean: Double) : Sampler<Int> {
|
|
||||||
private val exponential: Sampler<Double> = AhrensDieterExponentialSampler.of(1.0)
|
|
||||||
private val gaussian: Sampler<Double> = ZigguratNormalizedGaussianSampler.of()
|
|
||||||
private val factorialLog: InternalUtils.FactorialLog = NO_CACHE_FACTORIAL_LOG
|
|
||||||
private val lambda: Double = floor(mean)
|
|
||||||
private val logLambda: Double = ln(lambda)
|
|
||||||
private val logLambdaFactorial: Double = getFactorialLog(lambda.toInt())
|
|
||||||
private val delta: Double = sqrt(lambda * ln(32 * lambda / PI + 1))
|
|
||||||
private val halfDelta: Double = delta / 2
|
|
||||||
private val twolpd: Double = 2 * lambda + delta
|
|
||||||
private val c1: Double = 1 / (8 * lambda)
|
|
||||||
private val a1: Double = sqrt(PI * twolpd) * exp(c1)
|
|
||||||
private val a2: Double = twolpd / delta * exp(-delta * (1 + delta) / twolpd)
|
|
||||||
private val aSum: Double = a1 + a2 + 1
|
|
||||||
private val p1: Double = a1 / aSum
|
|
||||||
private val p2: Double = a2 / aSum
|
|
||||||
|
|
||||||
private val smallMeanPoissonSampler: Sampler<Int> = if (mean - lambda < Double.MIN_VALUE)
|
|
||||||
NO_SMALL_MEAN_POISSON_SAMPLER
|
|
||||||
else // Not used.
|
|
||||||
KempSmallMeanPoissonSampler.of(mean - lambda)
|
|
||||||
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
|
||||||
// This will never be null. It may be a no-op delegate that returns zero.
|
|
||||||
val y2 = smallMeanPoissonSampler.next(generator)
|
|
||||||
var x: Double
|
|
||||||
var y: Double
|
|
||||||
var v: Double
|
|
||||||
var a: Int
|
|
||||||
var t: Double
|
|
||||||
var qr: Double
|
|
||||||
var qa: Double
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
// Step 1:
|
|
||||||
val u = generator.nextDouble()
|
|
||||||
|
|
||||||
if (u <= p1) {
|
|
||||||
// Step 2:
|
|
||||||
val n = gaussian.next(generator)
|
|
||||||
x = n * sqrt(lambda + halfDelta) - 0.5
|
|
||||||
if (x > delta || x < -lambda) continue
|
|
||||||
y = if (x < 0) floor(x) else ceil(x)
|
|
||||||
val e = exponential.next(generator)
|
|
||||||
v = -e - 0.5 * n * n + c1
|
|
||||||
} else {
|
|
||||||
// Step 3:
|
|
||||||
if (u > p1 + p2) {
|
|
||||||
y = lambda
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
x = delta + twolpd / delta * exponential.next(generator)
|
|
||||||
y = ceil(x)
|
|
||||||
v = -exponential.next(generator) - delta * (x + 1) / twolpd
|
|
||||||
}
|
|
||||||
|
|
||||||
// The Squeeze Principle
|
|
||||||
// Step 4.1:
|
|
||||||
a = if (x < 0) 1 else 0
|
|
||||||
t = y * (y + 1) / (2 * lambda)
|
|
||||||
|
|
||||||
// Step 4.2
|
|
||||||
if (v < -t && a == 0) {
|
|
||||||
y += lambda
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 4.3:
|
|
||||||
qr = t * ((2 * y + 1) / (6 * lambda) - 1)
|
|
||||||
qa = qr - t * t / (3 * (lambda + a * (y + 1)))
|
|
||||||
|
|
||||||
// Step 4.4:
|
|
||||||
if (v < qa) {
|
|
||||||
y += lambda
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 4.5:
|
|
||||||
if (v > qr) continue
|
|
||||||
|
|
||||||
// Step 4.6:
|
|
||||||
if (v < y * logLambda - getFactorialLog((y + lambda).toInt()) + logLambdaFactorial) {
|
|
||||||
y += lambda
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
min(y2 + y.toLong(), Int.MAX_VALUE.toLong()).toInt()
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun getFactorialLog(n: Int): Double = factorialLog.value(n)
|
|
||||||
public override fun toString(): String = "Large Mean Poisson deviate"
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
|
|
||||||
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
|
|
||||||
|
|
||||||
private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = Sampler { ConstantChain(0) }
|
|
||||||
|
|
||||||
public fun of(mean: Double): LargeMeanPoissonSampler {
|
|
||||||
require(mean >= 1) { "mean is not >= 1: $mean" }
|
|
||||||
// The algorithm is not valid if Math.floor(mean) is not an integer.
|
|
||||||
require(mean <= MAX_MEAN) { "mean $mean > $MAX_MEAN" }
|
|
||||||
return LargeMeanPoissonSampler(mean)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,61 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
import space.kscience.kmath.stat.chain
|
|
||||||
import kotlin.math.ln
|
|
||||||
import kotlin.math.sqrt
|
|
||||||
|
|
||||||
/**
|
|
||||||
* [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
|
|
||||||
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
|
|
||||||
* [BoxMullerNormalizedGaussianSampler].
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html]
|
|
||||||
*/
|
|
||||||
public class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
|
|
||||||
private var nextGaussian = Double.NaN
|
|
||||||
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
|
|
||||||
if (nextGaussian.isNaN()) {
|
|
||||||
val alpha: Double
|
|
||||||
var x: Double
|
|
||||||
|
|
||||||
// Rejection scheme for selecting a pair that lies within the unit circle.
|
|
||||||
while (true) {
|
|
||||||
// Generate a pair of numbers within [-1 , 1).
|
|
||||||
x = 2.0 * generator.nextDouble() - 1.0
|
|
||||||
val y = 2.0 * generator.nextDouble() - 1.0
|
|
||||||
val r2 = x * x + y * y
|
|
||||||
|
|
||||||
if (r2 < 1 && r2 > 0) {
|
|
||||||
// Pair (x, y) is within unit circle.
|
|
||||||
alpha = sqrt(-2 * ln(r2) / r2)
|
|
||||||
// Keep second element of the pair for next invocation.
|
|
||||||
nextGaussian = alpha * y
|
|
||||||
// Return the first element of the generated pair.
|
|
||||||
break
|
|
||||||
}
|
|
||||||
// Pair is not within the unit circle: Generate another one.
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the first element of the generated pair.
|
|
||||||
alpha * x
|
|
||||||
} else {
|
|
||||||
// Use the second element of the pair (generated at the
|
|
||||||
// previous invocation).
|
|
||||||
val r = nextGaussian
|
|
||||||
// Both elements of the pair have been used.
|
|
||||||
nextGaussian = Double.NaN
|
|
||||||
r
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public override fun toString(): String = "Box-Muller (with rejection) normalized Gaussian deviate"
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
public fun of(): MarsagliaNormalizedGaussianSampler = MarsagliaNormalizedGaussianSampler()
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,9 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Marker interface for a sampler that generates values from an N(0,1)
|
|
||||||
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
|
|
||||||
*/
|
|
||||||
public interface NormalizedGaussianSampler : Sampler<Double>
|
|
@ -1,30 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sampler for the Poisson distribution.
|
|
||||||
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
|
||||||
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
|
|
||||||
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
|
|
||||||
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
|
||||||
* - For large means, we use the rejection algorithm described in
|
|
||||||
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html].
|
|
||||||
*/
|
|
||||||
public class PoissonSampler private constructor(mean: Double) : Sampler<Int> {
|
|
||||||
private val poissonSamplerDelegate: Sampler<Int> = of(mean)
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Int> = poissonSamplerDelegate.sample(generator)
|
|
||||||
public override fun toString(): String = poissonSamplerDelegate.toString()
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
private const val PIVOT = 40.0
|
|
||||||
|
|
||||||
public fun of(mean: Double): Sampler<Int> =// Each sampler should check the input arguments.
|
|
||||||
if (mean < PIVOT) SmallMeanPoissonSampler.of(mean) else LargeMeanPoissonSampler.of(mean)
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,50 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
import space.kscience.kmath.stat.chain
|
|
||||||
import kotlin.math.ceil
|
|
||||||
import kotlin.math.exp
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sampler for the Poisson distribution.
|
|
||||||
* - For small means, a Poisson process is simulated using uniform deviates, as described in
|
|
||||||
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
|
|
||||||
* integer-valued distributions: The Poisson distribution. Addison Wesley.
|
|
||||||
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
|
|
||||||
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
*
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html].
|
|
||||||
*/
|
|
||||||
public class SmallMeanPoissonSampler private constructor(mean: Double) : Sampler<Int> {
|
|
||||||
private val p0: Double = exp(-mean)
|
|
||||||
|
|
||||||
private val limit: Int = (if (p0 > 0)
|
|
||||||
ceil(1000 * mean)
|
|
||||||
else
|
|
||||||
throw IllegalArgumentException("No p(x=0) probability for mean: $mean")).toInt()
|
|
||||||
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
|
|
||||||
var n = 0
|
|
||||||
var r = 1.0
|
|
||||||
|
|
||||||
while (n < limit) {
|
|
||||||
r *= nextDouble()
|
|
||||||
if (r >= p0) n++ else break
|
|
||||||
}
|
|
||||||
|
|
||||||
n
|
|
||||||
}
|
|
||||||
|
|
||||||
public override fun toString(): String = "Small Mean Poisson deviate"
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
public fun of(mean: Double): SmallMeanPoissonSampler {
|
|
||||||
require(mean > 0) { "mean is not strictly positive: $mean" }
|
|
||||||
return SmallMeanPoissonSampler(mean)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,88 +0,0 @@
|
|||||||
package space.kscience.kmath.stat.samplers
|
|
||||||
|
|
||||||
import space.kscience.kmath.chains.Chain
|
|
||||||
import space.kscience.kmath.stat.RandomGenerator
|
|
||||||
import space.kscience.kmath.stat.Sampler
|
|
||||||
import space.kscience.kmath.stat.chain
|
|
||||||
import kotlin.math.*
|
|
||||||
|
|
||||||
/**
|
|
||||||
* [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
|
|
||||||
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
|
|
||||||
* implementation has been adapted from the C code provided therein.
|
|
||||||
*
|
|
||||||
* Based on Commons RNG implementation.
|
|
||||||
* See [https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html].
|
|
||||||
*/
|
|
||||||
public class ZigguratNormalizedGaussianSampler private constructor() :
|
|
||||||
NormalizedGaussianSampler, Sampler<Double> {
|
|
||||||
|
|
||||||
private fun sampleOne(generator: RandomGenerator): Double {
|
|
||||||
val j = generator.nextLong()
|
|
||||||
val i = (j and LAST.toLong()).toInt()
|
|
||||||
return if (abs(j) < K[i]) j * W[i] else fix(generator, j, i)
|
|
||||||
}
|
|
||||||
|
|
||||||
public override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain { sampleOne(this) }
|
|
||||||
public override fun toString(): String = "Ziggurat normalized Gaussian deviate"
|
|
||||||
|
|
||||||
private fun fix(generator: RandomGenerator, hz: Long, iz: Int): Double {
|
|
||||||
var x = hz * W[iz]
|
|
||||||
|
|
||||||
return when {
|
|
||||||
iz == 0 -> {
|
|
||||||
var y: Double
|
|
||||||
|
|
||||||
do {
|
|
||||||
y = -ln(generator.nextDouble())
|
|
||||||
x = -ln(generator.nextDouble()) * ONE_OVER_R
|
|
||||||
} while (y + y < x * x)
|
|
||||||
|
|
||||||
val out = R + x
|
|
||||||
if (hz > 0) out else -out
|
|
||||||
}
|
|
||||||
|
|
||||||
F[iz] + generator.nextDouble() * (F[iz - 1] - F[iz]) < gauss(x) -> x
|
|
||||||
else -> sampleOne(generator)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public companion object {
|
|
||||||
private const val R: Double = 3.442619855899
|
|
||||||
private const val ONE_OVER_R: Double = 1 / R
|
|
||||||
private const val V: Double = 9.91256303526217e-3
|
|
||||||
private val MAX: Double = 2.0.pow(63.0)
|
|
||||||
private val ONE_OVER_MAX: Double = 1.0 / MAX
|
|
||||||
private const val LEN: Int = 128
|
|
||||||
private const val LAST: Int = LEN - 1
|
|
||||||
private val K: LongArray = LongArray(LEN)
|
|
||||||
private val W: DoubleArray = DoubleArray(LEN)
|
|
||||||
private val F: DoubleArray = DoubleArray(LEN)
|
|
||||||
|
|
||||||
init {
|
|
||||||
// Filling the tables.
|
|
||||||
var d = R
|
|
||||||
var t = d
|
|
||||||
var fd = gauss(d)
|
|
||||||
val q = V / fd
|
|
||||||
K[0] = (d / q * MAX).toLong()
|
|
||||||
K[1] = 0
|
|
||||||
W[0] = q * ONE_OVER_MAX
|
|
||||||
W[LAST] = d * ONE_OVER_MAX
|
|
||||||
F[0] = 1.0
|
|
||||||
F[LAST] = fd
|
|
||||||
|
|
||||||
(LAST - 1 downTo 1).forEach { i ->
|
|
||||||
d = sqrt(-2 * ln(V / d + fd))
|
|
||||||
fd = gauss(d)
|
|
||||||
K[i + 1] = (d / t * MAX).toLong()
|
|
||||||
t = d
|
|
||||||
F[i] = fd
|
|
||||||
W[i] = d * ONE_OVER_MAX
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public fun of(): ZigguratNormalizedGaussianSampler = ZigguratNormalizedGaussianSampler()
|
|
||||||
private fun gauss(x: Double): Double = exp(-0.5 * x * x)
|
|
||||||
}
|
|
||||||
}
|
|
@ -5,22 +5,23 @@ import kotlinx.coroutines.flow.toList
|
|||||||
import kotlinx.coroutines.runBlocking
|
import kotlinx.coroutines.runBlocking
|
||||||
import org.junit.jupiter.api.Assertions
|
import org.junit.jupiter.api.Assertions
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
import space.kscience.kmath.stat.samplers.GaussianSampler
|
import space.kscience.kmath.samplers.GaussianSampler
|
||||||
|
import space.kscience.kmath.structures.asBuffer
|
||||||
|
|
||||||
internal class CommonsDistributionsTest {
|
internal class CommonsDistributionsTest {
|
||||||
@Test
|
@Test
|
||||||
fun testNormalDistributionSuspend() {
|
fun testNormalDistributionSuspend() = runBlocking {
|
||||||
val distribution = GaussianSampler.of(7.0, 2.0)
|
val distribution = GaussianSampler(7.0, 2.0)
|
||||||
val generator = RandomGenerator.default(1)
|
val generator = RandomGenerator.default(1)
|
||||||
val sample = runBlocking { distribution.sample(generator).take(1000).toList() }
|
val sample = distribution.sample(generator).take(1000).toList().asBuffer()
|
||||||
Assertions.assertEquals(7.0, sample.average(), 0.1)
|
Assertions.assertEquals(7.0, Mean.double(sample), 0.2)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun testNormalDistributionBlocking() {
|
fun testNormalDistributionBlocking() = runBlocking {
|
||||||
val distribution = GaussianSampler.of(7.0, 2.0)
|
val distribution = GaussianSampler(7.0, 2.0)
|
||||||
val generator = RandomGenerator.default(1)
|
val generator = RandomGenerator.default(1)
|
||||||
val sample = runBlocking { distribution.sample(generator).blocking().nextBlock(1000) }
|
val sample = distribution.sample(generator).nextBufferBlocking(1000)
|
||||||
Assertions.assertEquals(7.0, sample.average(), 0.1)
|
Assertions.assertEquals(7.0, Mean.double(sample), 0.2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,7 @@ internal class StatisticTest {
|
|||||||
@Test
|
@Test
|
||||||
fun testParallelMean() {
|
fun testParallelMean() {
|
||||||
runBlocking {
|
runBlocking {
|
||||||
val average = Mean.real
|
val average = Mean.double
|
||||||
.flow(chunked) //create a flow with results
|
.flow(chunked) //create a flow with results
|
||||||
.drop(99) // Skip first 99 values and use one with total data
|
.drop(99) // Skip first 99 values and use one with total data
|
||||||
.first() //get 1e5 data samples average
|
.first() //get 1e5 data samples average
|
||||||
|
Loading…
Reference in New Issue
Block a user