statistic algebra

2021-05-03 18:45:18 +03:00 · 2021-05-03 18:45:18 +03:00 · 8898f908ef
commit 8898f908ef
parent 5aaba0dae4
2 changed files with 242 additions and 0 deletions
--- a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/StatisticTensorAlgebra.kt
+++ b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/StatisticTensorAlgebra.kt
@ -0,0 +1,137 @@
+/*
+ * Copyright 2018-2021 KMath contributors.
+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
+ */
+
+package space.kscience.kmath.tensors.api
+
+import space.kscience.kmath.tensors.core.DoubleTensor
+
+/**
+ * Common algebra with statistics methods. Operates on [Tensor].
+ *
+ * @param T the type of items closed under division in the tensors.
+ */
+
+public interface StatisticTensorAlgebra<T>: TensorAlgebra<T> {
+
+    /**
+     * Returns the minimum value of all elements in the input tensor.
+     *
+     * @return the minimum value of all elements in the input tensor.
+     */
+    public fun Tensor<T>.min(): Double
+
+    /**
+     * Returns the minimum value of each row of the input tensor in the given dimension [dim].
+     *
+     * If [keepDim] is true, the output tensor is of the same size as
+     * input except in the dimension [dim] where it is of size 1.
+     * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension.
+     *
+     * @param dim the dimension to reduce.
+     * @param keepDim whether the output tensor has [dim] retained or not.
+     * @return the minimum value of each row of the input tensor in the given dimension [dim].
+     */
+    public fun Tensor<T>.min(dim: Int, keepDim: Boolean): DoubleTensor
+
+    /**
+     * Returns the maximum value of all elements in the input tensor.
+     *
+     * @return the maximum value of all elements in the input tensor.
+     */
+    public fun Tensor<T>.max(): Double
+
+    /**
+     * Returns the maximum value of each row of the input tensor in the given dimension [dim].
+     *
+     * If [keepDim] is true, the output tensor is of the same size as
+     * input except in the dimension [dim] where it is of size 1.
+     * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension.
+     *
+     * @param dim the dimension to reduce.
+     * @param keepDim whether the output tensor has [dim] retained or not.
+     * @return the maximum value of each row of the input tensor in the given dimension [dim].
+     */
+    public fun Tensor<T>.max(dim: Int, keepDim: Boolean): DoubleTensor
+
+    /**
+     * Returns the sum of all elements in the input tensor.
+     *
+     * @return the sum of all elements in the input tensor.
+     */
+    public fun Tensor<T>.sum(): Double
+
+    /**
+     * Returns the sum of each row of the input tensor in the given dimension [dim].
+     *
+     * If [keepDim] is true, the output tensor is of the same size as
+     * input except in the dimension [dim] where it is of size 1.
+     * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension.
+     *
+     * @param dim the dimension to reduce.
+     * @param keepDim whether the output tensor has [dim] retained or not.
+     * @return the sum of each row of the input tensor in the given dimension [dim].
+     */
+    public fun Tensor<T>.sum(dim: Int, keepDim: Boolean): DoubleTensor
+
+    /**
+     * Returns the mean of all elements in the input tensor.
+     *
+     * @return the mean of all elements in the input tensor.
+     */
+    public fun Tensor<T>.mean(): Double
+
+    /**
+     * Returns the mean of each row of the input tensor in the given dimension [dim].
+     *
+     * If [keepDim] is true, the output tensor is of the same size as
+     * input except in the dimension [dim] where it is of size 1.
+     * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension.
+     *
+     * @param dim the dimension to reduce.
+     * @param keepDim whether the output tensor has [dim] retained or not.
+     * @return the mean of each row of the input tensor in the given dimension [dim].
+     */
+    public fun Tensor<T>.mean(dim: Int, keepDim: Boolean): DoubleTensor
+
+    /**
+     * Returns the standard deviation of all elements in the input tensor.
+     *
+     * @return the standard deviation of all elements in the input tensor.
+     */
+    public fun Tensor<T>.std(): Double
+
+    /**
+     * Returns the standard deviation of each row of the input tensor in the given dimension [dim].
+     *
+     * If [keepDim] is true, the output tensor is of the same size as
+     * input except in the dimension [dim] where it is of size 1.
+     * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension.
+     *
+     * @param dim the dimension to reduce.
+     * @param keepDim whether the output tensor has [dim] retained or not.
+     * @return the standard deviation of each row of the input tensor in the given dimension [dim].
+     */
+    public fun Tensor<T>.std(dim: Int, keepDim: Boolean): DoubleTensor
+
+    /**
+     * Returns the variance of all elements in the input tensor.
+     *
+     * @return the variance of all elements in the input tensor.
+     */
+    public fun Tensor<T>.variance(): Double
+
+    /**
+     * Returns the variance of each row of the input tensor in the given dimension [dim].
+     *
+     * If [keepDim] is true, the output tensor is of the same size as
+     * input except in the dimension [dim] where it is of size 1.
+     * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension.
+     *
+     * @param dim the dimension to reduce.
+     * @param keepDim whether the output tensor has [dim] retained or not.
+     * @return the variance of each row of the input tensor in the given dimension [dim].
+     */
+    public fun Tensor<T>.variance(dim: Int, keepDim: Boolean): DoubleTensor
+}
--- a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleStatisticTensorAlgebra.kt
+++ b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleStatisticTensorAlgebra.kt
@ -0,0 +1,105 @@
+/*
+ * Copyright 2018-2021 KMath contributors.
+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
+ */
+
+package space.kscience.kmath.tensors.core.algebras
+
+import kotlin.math.sqrt
+
+import space.kscience.kmath.tensors.api.*
+import space.kscience.kmath.tensors.core.*
+import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.max
+import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.mean
+import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.min
+import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.sum
+import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.variance
+
+public object DoubleStatisticTensorAlgebra : StatisticTensorAlgebra<Double>, DoubleTensorAlgebra() {
+
+    private fun Tensor<Double>.fold(foldFunction: (DoubleArray) -> Double): Double {
+        return foldFunction(this.tensor.toDoubleArray())
+    }
+
+    private fun Tensor<Double>.foldDim(
+        foldFunction: (DoubleArray) -> Double,
+        dim: Int,
+        keepDim: Boolean
+    ): DoubleTensor {
+        check(dim < dimension) { "Dimension $dim out of range $dimension" }
+        val resShape = if (keepDim) {
+            shape.take(dim).toIntArray() + intArrayOf(1) + shape.takeLast(dimension - dim - 1).toIntArray()
+        } else {
+            shape.take(dim).toIntArray() + shape.takeLast(dimension - dim - 1).toIntArray()
+        }
+        val resNumElements = resShape.reduce(Int::times)
+        val resTensor = DoubleTensor(resShape, DoubleArray(resNumElements) { 0.0 }, 0)
+        for (index in resTensor.linearStructure.indices()) {
+            val prefix = index.take(dim).toIntArray()
+            val suffix = index.takeLast(dimension - dim - 1).toIntArray()
+            resTensor[index] = foldFunction(DoubleArray(shape[dim]) { i ->
+                this[prefix + intArrayOf(i) + suffix]
+            })
+        }
+
+        return resTensor
+    }
+
+    override fun Tensor<Double>.min(): Double = this.fold { it.minOrNull()!! }
+
+    override fun Tensor<Double>.min(dim: Int, keepDim: Boolean): DoubleTensor =
+        foldDim({ x -> x.minOrNull()!! }, dim, keepDim)
+
+    override fun Tensor<Double>.max(): Double = this.fold { it.maxOrNull()!! }
+
+    override fun Tensor<Double>.max(dim: Int, keepDim: Boolean): DoubleTensor =
+        foldDim({ x -> x.maxOrNull()!! }, dim, keepDim)
+
+    override fun Tensor<Double>.sum(): Double = this.fold { it.sum() }
+
+    override fun Tensor<Double>.sum(dim: Int, keepDim: Boolean): DoubleTensor =
+        foldDim({ x -> x.sum() }, dim, keepDim)
+
+    override fun Tensor<Double>.mean(): Double = this.fold { it.sum() / tensor.numElements }
+
+    override fun Tensor<Double>.mean(dim: Int, keepDim: Boolean): DoubleTensor =
+        foldDim(
+            { arr ->
+                check(dim < dimension) { "Dimension $dim out of range $dimension" }
+                arr.sum() / shape[dim]
+            },
+            dim,
+            keepDim
+        )
+
+    override fun Tensor<Double>.std(): Double = this.fold { arr ->
+        val mean = arr.sum() / tensor.numElements
+        sqrt(arr.sumOf { (it - mean) * (it - mean) } / (tensor.numElements - 1))
+    }
+
+    override fun Tensor<Double>.std(dim: Int, keepDim: Boolean): DoubleTensor = foldDim(
+        { arr ->
+            check(dim < dimension) { "Dimension $dim out of range $dimension" }
+            val mean = arr.sum() / shape[dim]
+            sqrt(arr.sumOf { (it - mean) * (it - mean) } / (shape[dim] - 1))
+        },
+        dim,
+        keepDim
+    )
+
+    override fun Tensor<Double>.variance(): Double = this.fold { arr ->
+        val mean = arr.sum() / tensor.numElements
+        arr.sumOf { (it - mean) * (it - mean) } / (tensor.numElements - 1)
+    }
+
+    override fun Tensor<Double>.variance(dim: Int, keepDim: Boolean): DoubleTensor = foldDim(
+        { arr ->
+            check(dim < dimension) { "Dimension $dim out of range $dimension" }
+            val mean = arr.sum() / shape[dim]
+            arr.sumOf { (it - mean) * (it - mean) } / (shape[dim] - 1)
+        },
+        dim,
+        keepDim
+    )
+
+}