Implement Commons RNG-like samplers in kmath-prob module for Multiplatform #164

Merged
CommanderTvis merged 44 commits from feature/mp-samplers into dev 2021-03-31 09:25:44 +03:00
15 changed files with 110 additions and 24 deletions
Showing only changes of commit a03c82f758 - Show all commits

View File

@ -4,9 +4,5 @@ package scientifik.kmath.chains
* Performance optimized chain for integer values * Performance optimized chain for integer values
*/ */
abstract class BlockingIntChain : Chain<Int> { abstract class BlockingIntChain : Chain<Int> {
abstract fun nextInt(): Int suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
override suspend fun next(): Int = nextInt()
fun nextBlock(size: Int): IntArray = IntArray(size) { nextInt() }
} }

View File

@ -4,9 +4,5 @@ package scientifik.kmath.chains
* Performance optimized chain for real values * Performance optimized chain for real values
*/ */
abstract class BlockingRealChain : Chain<Double> { abstract class BlockingRealChain : Chain<Double> {
abstract fun nextDouble(): Double suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() }
override suspend fun next(): Double = nextDouble()
fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { nextDouble() }
} }

View File

@ -1,5 +1,7 @@
package scientifik.kmath.prob package scientifik.kmath.prob
import scientifik.kmath.chains.BlockingIntChain
import scientifik.kmath.chains.BlockingRealChain
import scientifik.kmath.chains.Chain import scientifik.kmath.chains.Chain
/** /**
@ -12,3 +14,17 @@ class RandomChain<out R>(val generator: RandomGenerator, private val gen: suspen
} }
fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen) fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
fun RandomChain<Double>.blocking(): BlockingRealChain = let {
object : BlockingRealChain() {
override suspend fun next(): Double = it.next()
override fun fork(): Chain<Double> = it.fork()
}
}
fun RandomChain<Int>.blocking(): BlockingIntChain = let {
object : BlockingIntChain() {
override suspend fun next(): Int = it.next()
override fun fork(): Chain<Int> = it.fork()
}
}

View File

@ -8,8 +8,9 @@ import kotlin.math.ln
import kotlin.math.pow import kotlin.math.pow
/** /**
* Based on commons-rng implementation. * Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
* *
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html
*/ */
class AhrensDieterExponentialSampler private constructor(val mean: Double) : Sampler<Double> { class AhrensDieterExponentialSampler private constructor(val mean: Double) : Sampler<Double> {

View File

@ -7,6 +7,16 @@ import scientifik.kmath.prob.chain
import scientifik.kmath.prob.next import scientifik.kmath.prob.next
import kotlin.math.* import kotlin.math.*
/**
* Sampling from the [gamma distribution](http://mathworld.wolfram.com/GammaDistribution.html).
* - For 0 < alpha < 1:
* Ahrens, J. H. and Dieter, U., Computer methods for sampling from gamma, beta, Poisson and binomial distributions, Computing, 12, 223-246, 1974.
* - For alpha >= 1:
* Marsaglia and Tsang, A Simple Method for Generating Gamma Variables. ACM Transactions on Mathematical Software, Volume 26 Issue 3, September, 2000.
*
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterMarsagliaTsangGammaSampler.html
*/
class AhrensDieterMarsagliaTsangGammaSampler private constructor( class AhrensDieterMarsagliaTsangGammaSampler private constructor(
alpha: Double, alpha: Double,
theta: Double theta: Double

View File

@ -9,6 +9,33 @@ import kotlin.math.ceil
import kotlin.math.max import kotlin.math.max
import kotlin.math.min import kotlin.math.min
/**
* Distribution sampler that uses the Alias method. It can be used to sample from n values each with an associated
* probability. This implementation is based on the detailed explanation of the alias method by Keith Schartz and
* implements Vose's algorithm.
*
* Vose, M.D., A linear algorithm for generating random numbers with a given distribution, IEEE Transactions on
* Software Engineering, 17, 972-975, 1991. he algorithm will sample values in O(1) time after a pre-processing step
* of O(n) time.
*
* The alias tables are constructed using fraction probabilities with an assumed denominator of 253. In the generic
* case sampling uses UniformRandomProvider.nextInt(int) and the upper 53-bits from UniformRandomProvider.nextLong().
*
* Zero padding the input probabilities can be used to make more sampling more efficient. Any zero entry will always be
* aliased removing the requirement to compute a long. Increased sampling speed comes at the cost of increased storage
* space. The algorithm requires approximately 12 bytes of storage per input probability, that is n * 12 for size n.
* Zero-padding only requires 4 bytes of storage per padded value as the probability is known to be zero.
*
* An optimisation is performed for small table sizes that are a power of 2. In this case the sampling uses 1 or 2
* calls from UniformRandomProvider.nextInt() to generate up to 64-bits for creation of an 11-bit index and 53-bits
* for the long. This optimisation requires a generator with a high cycle length for the lower order bits.
*
* Larger table sizes that are a power of 2 will benefit from fast algorithms for UniformRandomProvider.nextInt(int)
* that exploit the power of 2.
*
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AliasMethodDiscreteSampler.html
*/
open class AliasMethodDiscreteSampler private constructor( open class AliasMethodDiscreteSampler private constructor(
// Deliberate direct storage of input arrays // Deliberate direct storage of input arrays
protected val probability: LongArray, protected val probability: LongArray,

View File

@ -7,8 +7,10 @@ import scientifik.kmath.prob.chain
import kotlin.math.* import kotlin.math.*
/** /**
* Based on commons-rng implementation. * [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
* distribution.
* *
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html
*/ */
class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> { class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {

View File

@ -6,8 +6,9 @@ import scientifik.kmath.prob.RandomGenerator
import scientifik.kmath.prob.Sampler import scientifik.kmath.prob.Sampler
/** /**
* Based on commons-rng implementation. * Sampling from a Gaussian distribution with given mean and standard deviation.
* *
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html
*/ */
class GaussianSampler private constructor( class GaussianSampler private constructor(

View File

@ -7,8 +7,15 @@ import scientifik.kmath.prob.chain
import kotlin.math.exp import kotlin.math.exp
/** /**
* Based on commons-rng implementation. * Sampler for the Poisson distribution.
* - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
* This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
* *
* Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
*
* Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
*
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html
*/ */
class KempSmallMeanPoissonSampler private constructor( class KempSmallMeanPoissonSampler private constructor(

View File

@ -9,8 +9,14 @@ import scientifik.kmath.prob.next
import kotlin.math.* import kotlin.math.*
/** /**
* Based on commons-rng implementation. * Sampler for the Poisson distribution.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
* Computing vol. 26 pp. 197-207.
* *
* This sampler is suitable for mean >= 40.
*
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html
*/ */
class LargeMeanPoissonSampler private constructor(val mean: Double) : Sampler<Int> { class LargeMeanPoissonSampler private constructor(val mean: Double) : Sampler<Int> {
@ -112,7 +118,7 @@ class LargeMeanPoissonSampler private constructor(val mean: Double) : Sampler<In
private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create() private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()
private val NO_SMALL_MEAN_POISSON_SAMPLER = object : Sampler<Int> { private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = object : Sampler<Int> {
override fun sample(generator: RandomGenerator): Chain<Int> = ConstantChain(0) override fun sample(generator: RandomGenerator): Chain<Int> = ConstantChain(0)
} }

View File

@ -8,8 +8,11 @@ import kotlin.math.ln
import kotlin.math.sqrt import kotlin.math.sqrt
/** /**
* Based on commons-rng implementation. * [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
* distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
* [BoxMullerNormalizedGaussianSampler].
* *
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html
*/ */
class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> { class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {

View File

@ -2,4 +2,8 @@ package scientifik.kmath.prob.samplers
import scientifik.kmath.prob.Sampler import scientifik.kmath.prob.Sampler
/**
* Marker interface for a sampler that generates values from an N(0,1)
* [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
*/
interface NormalizedGaussianSampler : Sampler<Double> interface NormalizedGaussianSampler : Sampler<Double>

View File

@ -5,9 +5,16 @@ import scientifik.kmath.prob.RandomGenerator
import scientifik.kmath.prob.Sampler import scientifik.kmath.prob.Sampler
/** /**
* Based on commons-rng implementation. * Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
* Important integer-valued distributions: The Poisson distribution. Addison Wesley.
* The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* - For large means, we use the rejection algorithm described in
* Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
* *
* https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html * Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html
*/ */
class PoissonSampler private constructor( class PoissonSampler private constructor(
mean: Double mean: Double

View File

@ -8,7 +8,14 @@ import kotlin.math.ceil
import kotlin.math.exp import kotlin.math.exp
/** /**
* Based on commons-rng implementation. * Sampler for the Poisson distribution.
* - For small means, a Poisson process is simulated using uniform deviates, as described in
* Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
* integer-valued distributions: The Poisson distribution. Addison Wesley.
* - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
* This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
*
* Based on Commons RNG implementation.
* *
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html
*/ */

View File

@ -7,8 +7,11 @@ import scientifik.kmath.prob.chain
import kotlin.math.* import kotlin.math.*
/** /**
* Based on commons-rng implementation. * [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
* Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
* implementation has been adapted from the C code provided therein.
* *
* Based on Commons RNG implementation.
* See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html
*/ */
class ZigguratNormalizedGaussianSampler private constructor() : class ZigguratNormalizedGaussianSampler private constructor() :