From 8898f908ef6cd6a3ce9458af13cc9abe160e8649 Mon Sep 17 00:00:00 2001 From: Andrei Kislitsyn Date: Mon, 3 May 2021 18:45:18 +0300 Subject: [PATCH] statistic algebra --- .../tensors/api/StatisticTensorAlgebra.kt | 137 ++++++++++++++++++ .../algebras/DoubleStatisticTensorAlgebra.kt | 105 ++++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/StatisticTensorAlgebra.kt create mode 100644 kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleStatisticTensorAlgebra.kt diff --git a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/StatisticTensorAlgebra.kt b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/StatisticTensorAlgebra.kt new file mode 100644 index 000000000..e3a2a1124 --- /dev/null +++ b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/StatisticTensorAlgebra.kt @@ -0,0 +1,137 @@ +/* + * Copyright 2018-2021 KMath contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package space.kscience.kmath.tensors.api + +import space.kscience.kmath.tensors.core.DoubleTensor + +/** + * Common algebra with statistics methods. Operates on [Tensor]. + * + * @param T the type of items closed under division in the tensors. + */ + +public interface StatisticTensorAlgebra: TensorAlgebra { + + /** + * Returns the minimum value of all elements in the input tensor. + * + * @return the minimum value of all elements in the input tensor. + */ + public fun Tensor.min(): Double + + /** + * Returns the minimum value of each row of the input tensor in the given dimension [dim]. + * + * If [keepDim] is true, the output tensor is of the same size as + * input except in the dimension [dim] where it is of size 1. + * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension. + * + * @param dim the dimension to reduce. + * @param keepDim whether the output tensor has [dim] retained or not. + * @return the minimum value of each row of the input tensor in the given dimension [dim]. + */ + public fun Tensor.min(dim: Int, keepDim: Boolean): DoubleTensor + + /** + * Returns the maximum value of all elements in the input tensor. + * + * @return the maximum value of all elements in the input tensor. + */ + public fun Tensor.max(): Double + + /** + * Returns the maximum value of each row of the input tensor in the given dimension [dim]. + * + * If [keepDim] is true, the output tensor is of the same size as + * input except in the dimension [dim] where it is of size 1. + * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension. + * + * @param dim the dimension to reduce. + * @param keepDim whether the output tensor has [dim] retained or not. + * @return the maximum value of each row of the input tensor in the given dimension [dim]. + */ + public fun Tensor.max(dim: Int, keepDim: Boolean): DoubleTensor + + /** + * Returns the sum of all elements in the input tensor. + * + * @return the sum of all elements in the input tensor. + */ + public fun Tensor.sum(): Double + + /** + * Returns the sum of each row of the input tensor in the given dimension [dim]. + * + * If [keepDim] is true, the output tensor is of the same size as + * input except in the dimension [dim] where it is of size 1. + * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension. + * + * @param dim the dimension to reduce. + * @param keepDim whether the output tensor has [dim] retained or not. + * @return the sum of each row of the input tensor in the given dimension [dim]. + */ + public fun Tensor.sum(dim: Int, keepDim: Boolean): DoubleTensor + + /** + * Returns the mean of all elements in the input tensor. + * + * @return the mean of all elements in the input tensor. + */ + public fun Tensor.mean(): Double + + /** + * Returns the mean of each row of the input tensor in the given dimension [dim]. + * + * If [keepDim] is true, the output tensor is of the same size as + * input except in the dimension [dim] where it is of size 1. + * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension. + * + * @param dim the dimension to reduce. + * @param keepDim whether the output tensor has [dim] retained or not. + * @return the mean of each row of the input tensor in the given dimension [dim]. + */ + public fun Tensor.mean(dim: Int, keepDim: Boolean): DoubleTensor + + /** + * Returns the standard deviation of all elements in the input tensor. + * + * @return the standard deviation of all elements in the input tensor. + */ + public fun Tensor.std(): Double + + /** + * Returns the standard deviation of each row of the input tensor in the given dimension [dim]. + * + * If [keepDim] is true, the output tensor is of the same size as + * input except in the dimension [dim] where it is of size 1. + * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension. + * + * @param dim the dimension to reduce. + * @param keepDim whether the output tensor has [dim] retained or not. + * @return the standard deviation of each row of the input tensor in the given dimension [dim]. + */ + public fun Tensor.std(dim: Int, keepDim: Boolean): DoubleTensor + + /** + * Returns the variance of all elements in the input tensor. + * + * @return the variance of all elements in the input tensor. + */ + public fun Tensor.variance(): Double + + /** + * Returns the variance of each row of the input tensor in the given dimension [dim]. + * + * If [keepDim] is true, the output tensor is of the same size as + * input except in the dimension [dim] where it is of size 1. + * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension. + * + * @param dim the dimension to reduce. + * @param keepDim whether the output tensor has [dim] retained or not. + * @return the variance of each row of the input tensor in the given dimension [dim]. + */ + public fun Tensor.variance(dim: Int, keepDim: Boolean): DoubleTensor +} \ No newline at end of file diff --git a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleStatisticTensorAlgebra.kt b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleStatisticTensorAlgebra.kt new file mode 100644 index 000000000..3914a0dfb --- /dev/null +++ b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleStatisticTensorAlgebra.kt @@ -0,0 +1,105 @@ +/* + * Copyright 2018-2021 KMath contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package space.kscience.kmath.tensors.core.algebras + +import kotlin.math.sqrt + +import space.kscience.kmath.tensors.api.* +import space.kscience.kmath.tensors.core.* +import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.max +import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.mean +import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.min +import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.sum +import space.kscience.kmath.tensors.core.algebras.DoubleStatisticTensorAlgebra.variance + +public object DoubleStatisticTensorAlgebra : StatisticTensorAlgebra, DoubleTensorAlgebra() { + + private fun Tensor.fold(foldFunction: (DoubleArray) -> Double): Double { + return foldFunction(this.tensor.toDoubleArray()) + } + + private fun Tensor.foldDim( + foldFunction: (DoubleArray) -> Double, + dim: Int, + keepDim: Boolean + ): DoubleTensor { + check(dim < dimension) { "Dimension $dim out of range $dimension" } + val resShape = if (keepDim) { + shape.take(dim).toIntArray() + intArrayOf(1) + shape.takeLast(dimension - dim - 1).toIntArray() + } else { + shape.take(dim).toIntArray() + shape.takeLast(dimension - dim - 1).toIntArray() + } + val resNumElements = resShape.reduce(Int::times) + val resTensor = DoubleTensor(resShape, DoubleArray(resNumElements) { 0.0 }, 0) + for (index in resTensor.linearStructure.indices()) { + val prefix = index.take(dim).toIntArray() + val suffix = index.takeLast(dimension - dim - 1).toIntArray() + resTensor[index] = foldFunction(DoubleArray(shape[dim]) { i -> + this[prefix + intArrayOf(i) + suffix] + }) + } + + return resTensor + } + + override fun Tensor.min(): Double = this.fold { it.minOrNull()!! } + + override fun Tensor.min(dim: Int, keepDim: Boolean): DoubleTensor = + foldDim({ x -> x.minOrNull()!! }, dim, keepDim) + + override fun Tensor.max(): Double = this.fold { it.maxOrNull()!! } + + override fun Tensor.max(dim: Int, keepDim: Boolean): DoubleTensor = + foldDim({ x -> x.maxOrNull()!! }, dim, keepDim) + + override fun Tensor.sum(): Double = this.fold { it.sum() } + + override fun Tensor.sum(dim: Int, keepDim: Boolean): DoubleTensor = + foldDim({ x -> x.sum() }, dim, keepDim) + + override fun Tensor.mean(): Double = this.fold { it.sum() / tensor.numElements } + + override fun Tensor.mean(dim: Int, keepDim: Boolean): DoubleTensor = + foldDim( + { arr -> + check(dim < dimension) { "Dimension $dim out of range $dimension" } + arr.sum() / shape[dim] + }, + dim, + keepDim + ) + + override fun Tensor.std(): Double = this.fold { arr -> + val mean = arr.sum() / tensor.numElements + sqrt(arr.sumOf { (it - mean) * (it - mean) } / (tensor.numElements - 1)) + } + + override fun Tensor.std(dim: Int, keepDim: Boolean): DoubleTensor = foldDim( + { arr -> + check(dim < dimension) { "Dimension $dim out of range $dimension" } + val mean = arr.sum() / shape[dim] + sqrt(arr.sumOf { (it - mean) * (it - mean) } / (shape[dim] - 1)) + }, + dim, + keepDim + ) + + override fun Tensor.variance(): Double = this.fold { arr -> + val mean = arr.sum() / tensor.numElements + arr.sumOf { (it - mean) * (it - mean) } / (tensor.numElements - 1) + } + + override fun Tensor.variance(dim: Int, keepDim: Boolean): DoubleTensor = foldDim( + { arr -> + check(dim < dimension) { "Dimension $dim out of range $dimension" } + val mean = arr.sum() / shape[dim] + arr.sumOf { (it - mean) * (it - mean) } / (shape[dim] - 1) + }, + dim, + keepDim + ) + +} \ No newline at end of file