stat-methods/notebooks/kotlin/distribution-demo.ipynb
2023-09-23 09:41:04 +03:00

3.6 MiB

In [1]:
@file:Repository("https://repo.kotlin.link")
@file:DependsOn("space.kscience:plotlykt-jupyter:0.5.0")
In [2]:
Plotly.jupyter.notebook()
Plotly notebook integration switch into the legacy mode.
In [3]:
import kotlin.math.PI
import kotlin.math.exp
import kotlin.math.pow
import kotlin.math.sqrt

typealias Parameters = Map<String, Double>

fun interface Distribution {
    fun value(parameters: Parameters): Double
}

fun normal(mean: Double, sigma: Double): Distribution = Distribution { parameters ->
    val x: Double = parameters["x"] ?: error("X not found")
    1.0 / sqrt(2 * PI * sigma) * exp(-(x - mean).pow(2) / 2 / sigma.pow(2))
}

operator fun Distribution.plus(other: Distribution): Distribution = Distribution { parameters ->
    this.value(parameters) + other.value(parameters)
}

data class DistributionMax(val arg: Parameters, val value: Double)

fun Distribution.findMax1D(
    parameter: String,
    range: ClosedFloatingPointRange<Double>,
    tolerance: Double = 0.05,
): DistributionMax {
    require(range.endInclusive > range.start) { "Wrong range: $range" }
    val numPoints = ((range.endInclusive - range.start) / tolerance).toInt()
    val xs: List<Map<String, Double>> = List(numPoints) { i -> mapOf(parameter to range.start + tolerance * i) }
    val pointValues = xs.map { it to value(it) }

    val max = pointValues.maxByOrNull { it.second } ?: error("Maximum not found")
    return DistributionMax(max.first, max.second)
}

data class ConfidenceInterval1D(val cl: Double, val left: Parameters, val right: Parameters)

fun Distribution.findConfidenceInterval1D(
    parameter: String,
    range: ClosedFloatingPointRange<Double>,
    cl: Double = 0.90,
    tolerance: Double = 0.05,
): ConfidenceInterval1D {
    require(range.endInclusive > range.start) { "Wrong range: $range" }
    require(cl > 0)
    val alpha = (1.0 - cl) / 2.0

    val numPoints = ((range.endInclusive - range.start) / tolerance).toInt()
    val xs: List<Map<String, Double>> = List(numPoints) { i -> mapOf(parameter to range.start + tolerance * i) }
    val pointValues = xs.map { it to value(it) }

    val norm: Double = pointValues.sumOf { it.second }

    var left = pointValues.first()
    var leftSum = 0.0
    var leftIndex = 0
    while (leftSum < alpha * norm) {
        left = pointValues[leftIndex]
        leftSum += left.second
        leftIndex++
    }

    var right = pointValues.last()
    var rightSum = 0.0
    var rightIndex = pointValues.size - 1
    while (rightSum < alpha * norm) {
        right = pointValues[rightIndex]
        rightSum += right.second
        rightIndex--
    }

    return ConfidenceInterval1D(cl, left.first, right.first)
}

val Parameters.x get() = get("x") ?: error("Value 'x' not found")

fun Distribution.filter(block: (Parameters) -> Boolean): Distribution = Distribution {
    if (block(it)) this.value(it) else 0.0
}
In [4]:
val distribution = (normal(0.0, 1.0) + normal(1.0, 0.2)).filter { it.x > 0.0 }

val max = distribution.findMax1D("x", -2.0..2.0)
val interval = distribution.findConfidenceInterval1D("x", -4.0..4.0)

val xs: List<Double> = (-500..500).map { it.toDouble() / 250 }

val ys: List<Double> = xs.map { x ->
    val arg: Parameters = mapOf("x" to x)
    distribution.value(arg)
}

Plotly.plot {
    scatter {
        x.numbers = xs
        y.numbers = ys
    }
    scatter {
        name = "maximum"
        x.numbers = listOf(max.arg.x)
        y.numbers = listOf(max.value)
    }
    scatter {
        name = "interval"
        x.numbers = listOf(interval.left.x, interval.right.x)
        y.numbers = listOf(0.0, 0.0)
    }
}
<html> <head> </head>
</html>