From 46f6d57fd944bc8719c8b99f296f151ca9eb4b85 Mon Sep 17 00:00:00 2001
From: Commander Tvis <postovalovya@gmail.com>
Date: Fri, 12 Jun 2020 01:13:15 +0700
Subject: [PATCH] Add 2 more samplers, replace SimpleChain with generator.chain

---
 .../AhrensDieterMarsagliaTsangGammaSampler.kt | 112 ++++++++
 .../samplers/AliasMethodDiscreteSampler.kt    | 260 ++++++++++++++++++
 .../kmath/prob/samplers/InternalUtils.kt      |  30 +-
 .../prob/samplers/LargeMeanPoissonSampler.kt  |   4 +-
 .../prob/samplers/SmallMeanPoissonSampler.kt  |   1 -
 .../ZigguratNormalizedGaussianSampler.kt      |   1 -
 6 files changed, 396 insertions(+), 12 deletions(-)
 create mode 100644 kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt
 create mode 100644 kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AliasMethodDiscreteSampler.kt
diff --git a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt
new file mode 100644
index 000000000..57bc778ba
--- /dev/null
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AhrensDieterMarsagliaTsangGammaSampler.kt
@@ -0,0 +1,112 @@
+package scientifik.kmath.prob.samplers
+
+import scientifik.kmath.chains.Chain
+import scientifik.kmath.prob.RandomGenerator
+import scientifik.kmath.prob.Sampler
+import scientifik.kmath.prob.chain
+import scientifik.kmath.prob.next
+import kotlin.math.*
+
+class AhrensDieterMarsagliaTsangGammaSampler private constructor(
+    alpha: Double,
+    theta: Double
+) : Sampler<Double> {
+    private val delegate: BaseGammaSampler =
+        if (alpha < 1) AhrensDieterGammaSampler(alpha, theta) else MarsagliaTsangGammaSampler(
+            alpha,
+            theta
+        )
+
+    private abstract class BaseGammaSampler internal constructor(
+        protected val alpha: Double,
+        protected val theta: Double
+    ) : Sampler<Double> {
+        init {
+            require(alpha > 0) { "alpha is not strictly positive: $alpha" }
+            require(theta > 0) { "theta is not strictly positive: $theta" }
+        }
+
+        override fun toString(): String = "Ahrens-Dieter-Marsaglia-Tsang Gamma deviate"
+    }
+
+    private class AhrensDieterGammaSampler internal constructor(alpha: Double, theta: Double) :
+        BaseGammaSampler(alpha, theta) {
+        private val oneOverAlpha: Double = 1.0 / alpha
+        private val bGSOptim: Double = 1.0 + alpha / E
+
+        override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
+            var x: Double
+
+            // [1]: p. 228, Algorithm GS.
+            while (true) {
+                // Step 1:
+                val u = generator.nextDouble()
+                val p = bGSOptim * u
+
+                if (p <= 1) {
+                    // Step 2:
+                    x = p.pow(oneOverAlpha)
+                    val u2 = generator.nextDouble()
+
+                    if (u2 > exp(-x)) // Reject.
+                        continue
+
+                    break
+                }
+
+                // Step 3:
+                x = -ln((bGSOptim - p) * oneOverAlpha)
+                val u2: Double = generator.nextDouble()
+                if (u2 <= x.pow(alpha - 1.0)) break
+                // Reject and continue.
+            }
+
+            x * theta
+        }
+    }
+
+    private class MarsagliaTsangGammaSampler internal constructor(alpha: Double, theta: Double) :
+        BaseGammaSampler(alpha, theta) {
+        private val dOptim: Double
+        private val cOptim: Double
+        private val gaussian: NormalizedGaussianSampler
+
+        init {
+            gaussian = ZigguratNormalizedGaussianSampler.of()
+            dOptim = alpha - ONE_THIRD
+            cOptim = ONE_THIRD / sqrt(dOptim)
+        }
+
+        override fun sample(generator: RandomGenerator): Chain<Double> = generator.chain {
+            var v: Double
+
+            while (true) {
+                val x = gaussian.next(generator)
+                val oPcTx = 1 + cOptim * x
+                v = oPcTx * oPcTx * oPcTx
+                if (v <= 0) continue
+                val x2 = x * x
+                val u = generator.nextDouble()
+                // Squeeze.
+                if (u < 1 - 0.0331 * x2 * x2) break
+                if (ln(u) < 0.5 * x2 + dOptim * (1 - v + ln(v))) break
+            }
+
+            theta * dOptim * v
+        }
+
+        companion object {
+            private const val ONE_THIRD = 1.0 / 3.0
+        }
+    }
+
+    override fun sample(generator: RandomGenerator): Chain<Double> = delegate.sample(generator)
+    override fun toString(): String = delegate.toString()
+
+    companion object {
+        fun of(
+            alpha: Double,
+            theta: Double
+        ): Sampler<Double> = AhrensDieterMarsagliaTsangGammaSampler(alpha, theta)
+    }
+}
\ No newline at end of file
diff --git a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AliasMethodDiscreteSampler.kt b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AliasMethodDiscreteSampler.kt
new file mode 100644
index 000000000..6b514a40b
--- /dev/null
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/AliasMethodDiscreteSampler.kt
@@ -0,0 +1,260 @@
+package scientifik.kmath.prob.samplers
+
+import scientifik.kmath.chains.Chain
+import scientifik.kmath.prob.RandomGenerator
+import scientifik.kmath.prob.Sampler
+import scientifik.kmath.prob.chain
+import kotlin.jvm.JvmOverloads
+import kotlin.math.ceil
+import kotlin.math.max
+import kotlin.math.min
+
+open class AliasMethodDiscreteSampler private constructor(
+    // Deliberate direct storage of input arrays
+    protected val probability: LongArray,
+    protected val alias: IntArray
+) : Sampler<Int> {
+
+    private class SmallTableAliasMethodDiscreteSampler internal constructor(
+        probability: LongArray,
+        alias: IntArray
+    ) : AliasMethodDiscreteSampler(probability, alias) {
+        // Assume the table size is a power of 2 and create the mask
+        private val mask: Int = alias.size - 1
+
+        override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
+            val bits = generator.nextInt()
+            // Isolate lower bits
+            val j = bits and mask
+
+            // Optimisation for zero-padded input tables
+            if (j >= probability.size)
+            // No probability must use the alias
+                return@chain alias[j]
+
+            // Create a uniform random deviate as a long.
+            // This replicates functionality from the o.a.c.rng.core.utils.NumberFactory.makeLong
+            val longBits = generator.nextInt().toLong() shl 32 or (bits.toLong() and hex_ffffffff)
+            // Choose between the two. Use a 53-bit long for the probability.
+            if (longBits ushr 11 < probability[j]) j else alias[j]
+        }
+
+        private companion object {
+            private const val hex_ffffffff = 4294967295L
+        }
+    }
+
+    override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
+        // This implements the algorithm as per Vose (1991):
+        // v = uniform()  in [0, 1)
+        // j = uniform(n) in [0, n)
+        // if v < prob[j] then
+        //   return j
+        // else
+        //   return alias[j]
+        val j = generator.nextInt(alias.size)
+
+        // Optimisation for zero-padded input tables
+        // No probability must use the alias
+        if (j >= probability.size) return@chain alias[j]
+
+        // Note: We could check the probability before computing a deviate.
+        // p(j) == 0  => alias[j]
+        // p(j) == 1  => j
+        // However it is assumed these edge cases are rare:
+        //
+        // The probability table will be 1 for approximately 1/n samples, i.e. only the
+        // last unpaired probability. This is only worth checking for when the table size (n)
+        // is small. But in that case the user should zero-pad the table for performance.
+        //
+        // The probability table will be 0 when an input probability was zero. We
+        // will assume this is also rare if modelling a discrete distribution where
+        // all samples are possible. The edge case for zero-padded tables is handled above.
+
+        // Choose between the two. Use a 53-bit long for the probability.
+        if (generator.nextLong() ushr 11 < probability[j]) j else alias[j]
+    }
+
+    override fun toString(): String = "Alias method"
+
+    companion object {
+        private const val DEFAULT_ALPHA = 0
+        private const val ZERO = 0.0
+        private const val ONE_AS_NUMERATOR = 1L shl 53
+        private const val CONVERT_TO_NUMERATOR: Double = ONE_AS_NUMERATOR.toDouble()
+        private const val MAX_SMALL_POWER_2_SIZE = 1 shl 11
+
+        @JvmOverloads
+        fun of(
+            probabilities: DoubleArray,
+            alpha: Int = DEFAULT_ALPHA
+        ): Sampler<Int> {
+            // The Alias method balances N categories with counts around the mean into N sections,
+            // each allocated 'mean' observations.
+            //
+            // Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
+            // 2D array as 4 sections with a height of the mean:
+            //
+            // 6
+            // 6
+            // 6
+            // 63   => 6366   --
+            // 632     6326    |-- mean
+            // 6321    6321   --
+            //
+            // section abcd
+            //
+            // Each section is divided as:
+            // a: 6=1/1
+            // b: 3=1/1
+            // c: 2=2/3; 6=1/3   (6 is the alias)
+            // d: 1=1/3; 6=2/3   (6 is the alias)
+            //
+            // The sample is obtained by randomly selecting a section, then choosing which category
+            // from the pair based on a uniform random deviate.
+            val sumProb = InternalUtils.validateProbabilities(probabilities)
+            // Allow zero-padding
+            val n = computeSize(probabilities.size, alpha)
+            // Partition into small and large by splitting on the average.
+            val mean = sumProb / n
+            // The cardinality of smallSize + largeSize = n.
+            // So fill the same array from either end.
+            val indices = IntArray(n)
+            var large = n
+            var small = 0
+
+            probabilities.indices.forEach { i ->
+                if (probabilities[i] >= mean) indices[--large] = i else indices[small++] = i
+            }
+
+            small = fillRemainingIndices(probabilities.size, indices, small)
+            // This may be smaller than the input length if the probabilities were already padded.
+            val nonZeroIndex = findLastNonZeroIndex(probabilities)
+            // The probabilities are modified so use a copy.
+            // Note: probabilities are required only up to last nonZeroIndex
+            val remainingProbabilities = probabilities.copyOf(nonZeroIndex + 1)
+            // Allocate the final tables.
+            // Probability table may be truncated (when zero padded).
+            // The alias table is full length.
+            val probability = LongArray(remainingProbabilities.size)
+            val alias = IntArray(n)
+
+            // This loop uses each large in turn to fill the alias table for small probabilities that
+            // do not reach the requirement to fill an entire section alone (i.e. p < mean).
+            // Since the sum of the small should be less than the sum of the large it should use up
+            // all the small first. However floating point round-off can result in
+            // misclassification of items as small or large. The Vose algorithm handles this using
+            // a while loop conditioned on the size of both sets and a subsequent loop to use
+            // unpaired items.
+            while (large != n && small != 0) {
+                // Index of the small and the large probabilities.
+                val j = indices[--small]
+                val k = indices[large++]
+
+                // Optimisation for zero-padded input:
+                // p(j) = 0 above the last nonZeroIndex
+                if (j > nonZeroIndex)
+                // The entire amount for the section is taken from the alias.
+                    remainingProbabilities[k] -= mean
+                else {
+                    val pj = remainingProbabilities[j]
+                    // Item j is a small probability that is below the mean.
+                    // Compute the weight of the section for item j: pj / mean.
+                    // This is scaled by 2^53 and the ceiling function used to round-up
+                    // the probability to a numerator of a fraction in the range [1,2^53].
+                    // Ceiling ensures non-zero values.
+                    probability[j] = ceil(CONVERT_TO_NUMERATOR * (pj / mean)).toLong()
+                    // The remaining amount for the section is taken from the alias.
+                    // Effectively: probabilities[k] -= (mean - pj)
+                    remainingProbabilities[k] += pj - mean
+                }
+
+                // If not j then the alias is k
+                alias[j] = k
+
+                // Add the remaining probability from large to the appropriate list.
+                if (remainingProbabilities[k] >= mean) indices[--large] = k else indices[small++] = k
+            }
+
+            // Final loop conditions to consume unpaired items.
+            // Note: The large set should never be non-empty but this can occur due to round-off
+            // error so consume from both.
+            fillTable(probability, alias, indices, 0, small)
+            fillTable(probability, alias, indices, large, n)
+
+            // Change the algorithm for small power of 2 sized tables
+            return if (isSmallPowerOf2(n))
+                SmallTableAliasMethodDiscreteSampler(probability, alias)
+            else
+                AliasMethodDiscreteSampler(probability, alias)
+        }
+
+        private fun fillRemainingIndices(length: Int, indices: IntArray, small: Int): Int {
+            var updatedSmall = small
+            (length until indices.size).forEach { i -> indices[updatedSmall++] = i }
+            return updatedSmall
+        }
+
+        private fun findLastNonZeroIndex(probabilities: DoubleArray): Int {
+            // No bounds check is performed when decrementing as the array contains at least one
+            // value above zero.
+            var nonZeroIndex = probabilities.size - 1
+            while (probabilities[nonZeroIndex] == ZERO) nonZeroIndex--
+            return nonZeroIndex
+        }
+
+        private fun computeSize(length: Int, alpha: Int): Int {
+            // If No padding
+            if (alpha < 0) return length
+            // Use the number of leading zeros function to find the next power of 2,
+            // i.e. ceil(log2(x))
+            var pow2 = 32 - numberOfLeadingZeros(length - 1)
+            // Increase by the alpha. Clip this to limit to a positive integer (2^30)
+            pow2 = min(30, pow2 + alpha)
+            // Use max to handle a length above the highest possible power of 2
+            return max(length, 1 shl pow2)
+        }
+
+        private fun fillTable(
+            probability: LongArray,
+            alias: IntArray,
+            indices: IntArray,
+            start: Int,
+            end: Int
+        ) = (start until end).forEach { i ->
+            val index = indices[i]
+            probability[index] = ONE_AS_NUMERATOR
+            alias[index] = index
+        }
+
+        private fun isSmallPowerOf2(n: Int): Boolean = n <= MAX_SMALL_POWER_2_SIZE && n and n - 1 == 0
+
+        private fun numberOfLeadingZeros(i: Int): Int {
+            var mutI = i
+            if (mutI <= 0) return if (mutI == 0) 32 else 0
+            var n = 31
+
+            if (mutI >= 1 shl 16) {
+                n -= 16
+                mutI = mutI ushr 16
+            }
+
+            if (mutI >= 1 shl 8) {
+                n -= 8
+                mutI = mutI ushr 8
+            }
+
+            if (mutI >= 1 shl 4) {
+                n -= 4
+                mutI = mutI ushr 4
+            }
+
+            if (mutI >= 1 shl 2) {
+                n -= 2
+                mutI = mutI ushr 2
+            }
+
+            return n - (mutI ushr 1)
+        }
+    }
+}
diff --git a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/InternalUtils.kt b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/InternalUtils.kt
index c83f35d32..611c4064d 100644
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/InternalUtils.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/InternalUtils.kt
@@ -18,6 +18,22 @@ internal object InternalUtils {
 
     fun factorial(n: Int): Long = FACTORIALS[n]
 
+    fun validateProbabilities(probabilities: DoubleArray?): Double {
+        require(!(probabilities == null || probabilities.isEmpty())) { "Probabilities must not be empty." }
+        var sumProb = 0.0
+
+        probabilities.forEach { prob ->
+            validateProbability(prob)
+            sumProb += prob
+        }
+
+        require(!(sumProb.isInfinite() || sumProb <= 0)) { "Invalid sum of probabilities: $sumProb" }
+        return sumProb
+    }
+
+    private fun validateProbability(probability: Double): Unit =
+        require(!(probability < 0 || probability.isInfinite() || probability.isNaN())) { "Invalid probability: $probability" }
+
     class FactorialLog private constructor(
         numValues: Int,
         cache: DoubleArray?
@@ -30,9 +46,11 @@ internal object InternalUtils {
             if (cache != null && cache.size > BEGIN_LOG_FACTORIALS) {
                 // Copy available values.
                 endCopy = min(cache.size, numValues)
-                cache.copyInto(logFactorials,
+                cache.copyInto(
+                    logFactorials,
                     BEGIN_LOG_FACTORIALS,
-                    BEGIN_LOG_FACTORIALS, endCopy)
+                    BEGIN_LOG_FACTORIALS, endCopy
+                )
             }
             // All values to be computed
             else endCopy = BEGIN_LOG_FACTORIALS
@@ -50,15 +68,11 @@ internal object InternalUtils {
             if (n < logFactorials.size)
                 return logFactorials[n]
 
-            return if (n < FACTORIALS.size) ln(
-                FACTORIALS[n].toDouble()) else InternalGamma.logGamma(
-                n + 1.0
-            )
+            return if (n < FACTORIALS.size) ln(FACTORIALS[n].toDouble()) else InternalGamma.logGamma(n + 1.0)
         }
 
         companion object {
-            fun create(): FactorialLog =
-                FactorialLog(0, null)
+            fun create(): FactorialLog = FactorialLog(0, null)
         }
     }
 }
diff --git a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/LargeMeanPoissonSampler.kt b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/LargeMeanPoissonSampler.kt
index f14aaa8ab..47e84327a 100644
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/LargeMeanPoissonSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/LargeMeanPoissonSampler.kt
@@ -2,9 +2,9 @@ package scientifik.kmath.prob.samplers
 
 import scientifik.kmath.chains.Chain
 import scientifik.kmath.chains.ConstantChain
-import scientifik.kmath.chains.SimpleChain
 import scientifik.kmath.prob.RandomGenerator
 import scientifik.kmath.prob.Sampler
+import scientifik.kmath.prob.chain
 import scientifik.kmath.prob.next
 import kotlin.math.*
 
@@ -35,7 +35,7 @@ class LargeMeanPoissonSampler private constructor(val mean: Double) : Sampler<In
     else  // Not used.
         KempSmallMeanPoissonSampler.of(mean - lambda)
 
-    override fun sample(generator: RandomGenerator): Chain<Int> = SimpleChain {
+    override fun sample(generator: RandomGenerator): Chain<Int> = generator.chain {
         // This will never be null. It may be a no-op delegate that returns zero.
         val y2 = smallMeanPoissonSampler.next(generator)
         var x: Double
diff --git a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/SmallMeanPoissonSampler.kt b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/SmallMeanPoissonSampler.kt
index 966d7db9a..dd1a419c9 100644
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/SmallMeanPoissonSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/SmallMeanPoissonSampler.kt
@@ -1,7 +1,6 @@
 package scientifik.kmath.prob.samplers
 
 import scientifik.kmath.chains.Chain
-import scientifik.kmath.chains.SimpleChain
 import scientifik.kmath.prob.RandomGenerator
 import scientifik.kmath.prob.Sampler
 import scientifik.kmath.prob.chain
diff --git a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/ZigguratNormalizedGaussianSampler.kt b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/ZigguratNormalizedGaussianSampler.kt
index de593a811..a7956deee 100644
--- a/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/ZigguratNormalizedGaussianSampler.kt
+++ b/kmath-prob/src/commonMain/kotlin/scientifik/kmath/prob/samplers/ZigguratNormalizedGaussianSampler.kt
@@ -1,7 +1,6 @@
 package scientifik.kmath.prob.samplers
 
 import scientifik.kmath.chains.Chain
-import scientifik.kmath.chains.SimpleChain
 import scientifik.kmath.prob.RandomGenerator
 import scientifik.kmath.prob.Sampler
 import scientifik.kmath.prob.chain