Simplify BlockingIntChain and BlockingRealChain; add blocking extension function for RandomChain; copy general documentation to samplers created with Apache Commons RNG

2020-06-12 02:49:06 +07:00 · 2020-06-12 02:49:06 +07:00 · a03c82f758
commit a03c82f758
parent 46f6d57fd9
15 changed files with 110 additions and 24 deletions
--- a/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/chains/BlockingIntChain.kt
+++ b/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/chains/BlockingIntChain.kt
@ -4,9 +4,5 @@ package scientifik.kmath.chains
 * Performance optimized chain for integer values
 */
 abstract class BlockingIntChain : Chain<Int> {
-    abstract fun nextInt(): Int
-
-    override suspend fun next(): Int = nextInt()
-
-    fun nextBlock(size: Int): IntArray = IntArray(size) { nextInt() }
-}
+    suspend fun nextBlock(size: Int): IntArray = IntArray(size) { next() }
+}
--- a/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/chains/BlockingRealChain.kt
+++ b/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/chains/BlockingRealChain.kt
@ -4,9 +4,5 @@ package scientifik.kmath.chains
 * Performance optimized chain for real values
 */
 abstract class BlockingRealChain : Chain<Double> {
-    abstract fun nextDouble(): Double
-
-    override suspend fun next(): Double = nextDouble()
-
-    fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { nextDouble() }
-}
+    suspend fun nextBlock(size: Int): DoubleArray = DoubleArray(size) { next() }
+}
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/RandomChain.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/RandomChain.kt
@ -1,5 +1,7 @@
 package scientifik.kmath.prob

+import scientifik.kmath.chains.BlockingIntChain
+import scientifik.kmath.chains.BlockingRealChain
 import scientifik.kmath.chains.Chain

 /**
@ -12,3 +14,17 @@ class RandomChain<out R>(val generator: RandomGenerator, private val gen: suspen
 }

 fun <R> RandomGenerator.chain(gen: suspend RandomGenerator.() -> R): RandomChain<R> = RandomChain(this, gen)
+
+fun RandomChain<Double>.blocking(): BlockingRealChain = let {
+    object : BlockingRealChain() {
+        override suspend fun next(): Double = it.next()
+        override fun fork(): Chain<Double> = it.fork()
+    }
+}
+
+fun RandomChain<Int>.blocking(): BlockingIntChain = let {
+    object : BlockingIntChain() {
+        override suspend fun next(): Int = it.next()
+        override fun fork(): Chain<Int> = it.fork()
+    }
+}
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AhrensDieterExponentialSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AhrensDieterExponentialSampler.kt
@ -8,8 +8,9 @@ import kotlin.math.ln
 import kotlin.math.pow

 /**
- * Based on commons-rng implementation.
+ * Sampling from an [exponential distribution](http://mathworld.wolfram.com/ExponentialDistribution.html).
 *
+ * Based on Commons RNG implementation.
 * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterExponentialSampler.html
 */
 class AhrensDieterExponentialSampler private constructor(val mean: Double) : Sampler<Double> {
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt
@ -7,6 +7,16 @@ import scientifik.kmath.prob.chain
 import scientifik.kmath.prob.next
 import kotlin.math.*

+/**
+ * Sampling from the [gamma distribution](http://mathworld.wolfram.com/GammaDistribution.html).
+ *  - For 0 < alpha < 1:
+ *    Ahrens, J. H. and Dieter, U., Computer methods for sampling from gamma, beta, Poisson and binomial distributions, Computing, 12, 223-246, 1974.
+ *  - For alpha >= 1:
+ *    Marsaglia and Tsang, A Simple Method for Generating Gamma Variables. ACM Transactions on Mathematical Software, Volume 26 Issue 3, September, 2000.
+ *
+ * Based on Commons RNG implementation.
+ * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AhrensDieterMarsagliaTsangGammaSampler.html
+ */
 class AhrensDieterMarsagliaTsangGammaSampler private constructor(
    alpha: Double,
    theta: Double
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AliasMethodDiscreteSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AliasMethodDiscreteSampler.kt
@ -9,6 +9,33 @@ import kotlin.math.ceil
 import kotlin.math.max
 import kotlin.math.min

+/**
+ * Distribution sampler that uses the Alias method. It can be used to sample from n values each with an associated
+ * probability. This implementation is based on the detailed explanation of the alias method by Keith Schartz and
+ * implements Vose's algorithm.
+ *
+ * Vose, M.D., A linear algorithm for generating random numbers with a given distribution, IEEE Transactions on
+ * Software Engineering, 17, 972-975, 1991. he algorithm will sample values in O(1) time after a pre-processing step
+ * of O(n) time.
+ *
+ * The alias tables are constructed using fraction probabilities with an assumed denominator of 253. In the generic
+ * case sampling uses UniformRandomProvider.nextInt(int) and the upper 53-bits from UniformRandomProvider.nextLong().
+ *
+ * Zero padding the input probabilities can be used to make more sampling more efficient. Any zero entry will always be
+ * aliased removing the requirement to compute a long. Increased sampling speed comes at the cost of increased storage
+ * space. The algorithm requires approximately 12 bytes of storage per input probability, that is n * 12 for size n.
+ * Zero-padding only requires 4 bytes of storage per padded value as the probability is known to be zero.
+ *
+ * An optimisation is performed for small table sizes that are a power of 2. In this case the sampling uses 1 or 2
+ * calls from UniformRandomProvider.nextInt() to generate up to 64-bits for creation of an 11-bit index and 53-bits
+ * for the long. This optimisation requires a generator with a high cycle length for the lower order bits.
+ *
+ * Larger table sizes that are a power of 2 will benefit from fast algorithms for UniformRandomProvider.nextInt(int)
+ * that exploit the power of 2.
+ *
+ * Based on Commons RNG implementation.
+ * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/AliasMethodDiscreteSampler.html
+ */
 open class AliasMethodDiscreteSampler private constructor(
    // Deliberate direct storage of input arrays
    protected val probability: LongArray,
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/BoxMullerNormalizedGaussianSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/BoxMullerNormalizedGaussianSampler.kt
@ -7,8 +7,10 @@ import scientifik.kmath.prob.chain
 import kotlin.math.*

 /**
- * Based on commons-rng implementation.
+ * [Box-Muller algorithm](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) for sampling from a Gaussian
+ * distribution.
 *
+ * Based on Commons RNG implementation.
 * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.html
 */
 class BoxMullerNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/GaussianSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/GaussianSampler.kt
@ -6,8 +6,9 @@ import scientifik.kmath.prob.RandomGenerator
 import scientifik.kmath.prob.Sampler

 /**
- * Based on commons-rng implementation.
+ * Sampling from a Gaussian distribution with given mean and standard deviation.
 *
+ * Based on Commons RNG implementation.
 * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/GaussianSampler.html
 */
 class GaussianSampler private constructor(
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/KempSmallMeanPoissonSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/KempSmallMeanPoissonSampler.kt
@ -7,8 +7,15 @@ import scientifik.kmath.prob.chain
 import kotlin.math.exp

 /**
- * Based on commons-rng implementation.
+ * Sampler for the Poisson distribution.
+ * - Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
+ * This sampler is suitable for mean < 40. For large means, LargeMeanPoissonSampler should be used instead.
 *
+ * Note: The algorithm uses a recurrence relation to compute the Poisson probability and a rolling summation for the cumulative probability. When the mean is large the initial probability (Math.exp(-mean)) is zero and an exception is raised by the constructor.
+ *
+ * Sampling uses 1 call to UniformRandomProvider.nextDouble(). This method provides an alternative to the SmallMeanPoissonSampler for slow generators of double.
+ *
+ * Based on Commons RNG implementation.
 * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/KempSmallMeanPoissonSampler.html
 */
 class KempSmallMeanPoissonSampler private constructor(
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/LargeMeanPoissonSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/LargeMeanPoissonSampler.kt
@ -9,8 +9,14 @@ import scientifik.kmath.prob.next
 import kotlin.math.*

 /**
- * Based on commons-rng implementation.
+ * Sampler for the Poisson distribution.
+ * - For large means, we use the rejection algorithm described in
+ *   Devroye, Luc. (1981).The Computer Generation of Poisson Random Variables
+ *   Computing vol. 26 pp. 197-207.
 *
+ * This sampler is suitable for mean >= 40.
+ *
+ * Based on Commons RNG implementation.
 * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.html
 */
 class LargeMeanPoissonSampler private constructor(val mean: Double) : Sampler<Int> {
@ -112,7 +118,7 @@ class LargeMeanPoissonSampler private constructor(val mean: Double) : Sampler<In
        private const val MAX_MEAN: Double = 0.5 * Int.MAX_VALUE
        private val NO_CACHE_FACTORIAL_LOG: InternalUtils.FactorialLog = InternalUtils.FactorialLog.create()

-        private val NO_SMALL_MEAN_POISSON_SAMPLER = object : Sampler<Int> {
+        private val NO_SMALL_MEAN_POISSON_SAMPLER: Sampler<Int> = object : Sampler<Int> {
            override fun sample(generator: RandomGenerator): Chain<Int> = ConstantChain(0)
        }

--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/MarsagliaNormalizedGaussianSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/MarsagliaNormalizedGaussianSampler.kt
@ -8,11 +8,14 @@ import kotlin.math.ln
 import kotlin.math.sqrt

 /**
- * Based on commons-rng implementation.
+ * [Marsaglia polar method](https://en.wikipedia.org/wiki/Marsaglia_polar_method) for sampling from a Gaussian
+ * distribution with mean 0 and standard deviation 1. This is a variation of the algorithm implemented in
+ * [BoxMullerNormalizedGaussianSampler].
 *
+ * Based on Commons RNG implementation.
 * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/MarsagliaNormalizedGaussianSampler.html
 */
-class MarsagliaNormalizedGaussianSampler private constructor(): NormalizedGaussianSampler, Sampler<Double> {
+class MarsagliaNormalizedGaussianSampler private constructor() : NormalizedGaussianSampler, Sampler<Double> {
    private var nextGaussian = Double.NaN

    override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/NormalizedGaussianSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/NormalizedGaussianSampler.kt
@ -2,4 +2,8 @@ package scientifik.kmath.prob.samplers

 import scientifik.kmath.prob.Sampler

+/**
+ * Marker interface for a sampler that generates values from an N(0,1)
+ * [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution).
+ */
 interface NormalizedGaussianSampler : Sampler<Double>
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/PoissonSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/PoissonSampler.kt
@ -5,9 +5,16 @@ import scientifik.kmath.prob.RandomGenerator
 import scientifik.kmath.prob.Sampler

 /**
- * Based on commons-rng implementation.
+ * Sampler for the Poisson distribution.
+ * - For small means, a Poisson process is simulated using uniform deviates, as described in
+ *   Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3
+ *   Important integer-valued distributions: The Poisson distribution. Addison Wesley.
+ * The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
+ * - For large means, we use the rejection algorithm described in
+ *   Devroye, Luc. (1981). The Computer Generation of Poisson Random Variables Computing vol. 26 pp. 197-207.
 *
- * https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html
+ * Based on Commons RNG implementation.
+ * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/PoissonSampler.html
 */
 class PoissonSampler private constructor(
    mean: Double
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/SmallMeanPoissonSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/SmallMeanPoissonSampler.kt
@ -8,7 +8,14 @@ import kotlin.math.ceil
 import kotlin.math.exp

 /**
- * Based on commons-rng implementation.
+ * Sampler for the Poisson distribution.
+ * - For small means, a Poisson process is simulated using uniform deviates, as described in
+ *   Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, Volume 2. Chapter 3.4.1.F.3 Important
+ *   integer-valued distributions: The Poisson distribution. Addison Wesley.
+ * - The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
+ *   This sampler is suitable for mean < 40. For large means, [LargeMeanPoissonSampler] should be used instead.
+ *
+ * Based on Commons RNG implementation.
 *
 * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.html
 */
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/ZigguratNormalizedGaussianSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/ZigguratNormalizedGaussianSampler.kt
@ -7,8 +7,11 @@ import scientifik.kmath.prob.chain
 import kotlin.math.*

 /**
- * Based on commons-rng implementation.
+ * [Marsaglia and Tsang "Ziggurat"](https://en.wikipedia.org/wiki/Ziggurat_algorithm) method for sampling from a
+ * Gaussian distribution with mean 0 and standard deviation 1. The algorithm is explained in this paper and this
+ * implementation has been adapted from the C code provided therein.
 *
+ * Based on Commons RNG implementation.
 * See https://commons.apache.org/proper/commons-rng/commons-rng-sampling/apidocs/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.html
 */
 class ZigguratNormalizedGaussianSampler private constructor() :