diff --git a/examples/src/main/kotlin/space/kscience/kmath/tensors/NeuralNetwork.kt b/examples/src/main/kotlin/space/kscience/kmath/tensors/NeuralNetwork.kt
new file mode 100644
index 000000000..ea863988c
--- /dev/null
+++ b/examples/src/main/kotlin/space/kscience/kmath/tensors/NeuralNetwork.kt
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2018-2021 KMath contributors.
+ * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
+ */
+
+package space.kscience.kmath.tensors
+
+import space.kscience.kmath.operations.invoke
+import space.kscience.kmath.tensors.core.DoubleTensor
+import space.kscience.kmath.tensors.core.algebras.BroadcastDoubleTensorAlgebra
+import space.kscience.kmath.tensors.core.algebras.DoubleAnalyticTensorAlgebra
+import space.kscience.kmath.tensors.core.algebras.DoubleTensorAlgebra
+import space.kscience.kmath.tensors.core.toDoubleArray
+import kotlin.math.sqrt
+
+const val seed = 100500L
+
+// Simple feedforward neural network with backpropagation training
+
+// interface of network layer
+interface Layer {
+    fun forward(input: DoubleTensor): DoubleTensor
+    fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor
+}
+
+// activation layer
+open class Activation(
+    val activation: (DoubleTensor) -> DoubleTensor,
+    val activationDer: (DoubleTensor) -> DoubleTensor
+) : Layer {
+    override fun forward(input: DoubleTensor): DoubleTensor {
+        return activation(input)
+    }
+
+    override fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor {
+        return DoubleTensorAlgebra { outputError * activationDer(input) }
+    }
+}
+
+fun relu(x: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
+    x.map { if (it > 0) it else 0.0 }
+}
+
+fun reluDer(x: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
+    x.map { if (it > 0) 1.0 else 0.0 }
+}
+
+// activation layer with relu activator
+class ReLU : Activation(::relu, ::reluDer)
+
+fun sigmoid(x: DoubleTensor): DoubleTensor = DoubleAnalyticTensorAlgebra {
+    1.0 / (1.0 + (-x).exp())
+}
+
+fun sigmoidDer(x: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
+    sigmoid(x) * (1.0 - sigmoid(x))
+}
+
+// activation layer with sigmoid activator
+class Sigmoid : Activation(::sigmoid, ::sigmoidDer)
+
+// dense layer
+class Dense(
+    private val inputUnits: Int,
+    private val outputUnits: Int,
+    private val learningRate: Double = 0.1
+) : Layer {
+
+    private val weights: DoubleTensor = DoubleTensorAlgebra {
+        randomNormal(
+            intArrayOf(inputUnits, outputUnits),
+            seed
+        ) * sqrt(2.0 / (inputUnits + outputUnits))
+    }
+
+    private val bias: DoubleTensor = DoubleTensorAlgebra { zeros(intArrayOf(outputUnits)) }
+
+    override fun forward(input: DoubleTensor): DoubleTensor {
+        return BroadcastDoubleTensorAlgebra { (input dot weights) + bias }
+    }
+
+    override fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
+        val gradInput = outputError dot weights.transpose()
+
+        val gradW = input.transpose() dot outputError
+        val gradBias = DoubleAnalyticTensorAlgebra {
+            outputError.mean(dim = 0, keepDim = false) * input.shape[0].toDouble()
+        }
+
+        weights -= learningRate * gradW
+        bias -= learningRate * gradBias
+
+        gradInput
+    }
+
+}
+
+// simple accuracy equal to the proportion of correct answers
+fun accuracy(yPred: DoubleTensor, yTrue: DoubleTensor): Double {
+    check(yPred.shape contentEquals yTrue.shape)
+    val n = yPred.shape[0]
+    var correctCnt = 0
+    for (i in 0 until n) {
+        if (yPred[intArrayOf(i, 0)] == yTrue[intArrayOf(i, 0)]) {
+            correctCnt += 1
+        }
+    }
+    return correctCnt.toDouble() / n.toDouble()
+}
+
+// neural network class
+class NeuralNetwork(private val layers: List<Layer>) {
+    private fun softMaxLoss(yPred: DoubleTensor, yTrue: DoubleTensor): DoubleTensor = DoubleAnalyticTensorAlgebra {
+
+        val onesForAnswers = yPred.zeroesLike()
+        yTrue.toDoubleArray().forEachIndexed { index, labelDouble ->
+            val label = labelDouble.toInt()
+            onesForAnswers[intArrayOf(index, label)] = 1.0
+        }
+
+        val softmaxValue = BroadcastDoubleTensorAlgebra { yPred.exp() / yPred.exp().sum(dim = 1, keepDim = true) }
+
+        (-onesForAnswers + softmaxValue) / (yPred.shape[0].toDouble())
+    }
+
+    @OptIn(ExperimentalStdlibApi::class)
+    private fun forward(x: DoubleTensor): List<DoubleTensor> {
+        var input = x
+
+        return buildList {
+            layers.forEach { layer ->
+                val output = layer.forward(input)
+                add(output)
+                input = output
+            }
+        }
+    }
+
+    @OptIn(ExperimentalStdlibApi::class)
+    private fun train(xTrain: DoubleTensor, yTrain: DoubleTensor) {
+        val layerInputs = buildList {
+            add(xTrain)
+            addAll(forward(xTrain))
+        }
+
+        var lossGrad = softMaxLoss(layerInputs.last(), yTrain)
+
+        layers.zip(layerInputs).reversed().forEach { (layer, input) ->
+            lossGrad = layer.backward(input, lossGrad)
+        }
+    }
+
+    fun fit(xTrain: DoubleTensor, yTrain: DoubleTensor, batchSize: Int, epochs: Int) = DoubleTensorAlgebra {
+        fun iterBatch(x: DoubleTensor, y: DoubleTensor): Sequence<Pair<DoubleTensor, DoubleTensor>> = sequence {
+            val n = x.shape[0]
+            val shuffledIndices = (0 until n).shuffled()
+            for (i in 0 until n step batchSize) {
+                val excerptIndices = shuffledIndices.drop(i).take(batchSize).toIntArray()
+                val batch = x.rowsByIndices(excerptIndices) to y.rowsByIndices(excerptIndices)
+                yield(batch)
+            }
+        }
+
+        for (epoch in 0 until epochs) {
+            println("Epoch ${epoch + 1}/$epochs")
+            for ((xBatch, yBatch) in iterBatch(xTrain, yTrain)) {
+                train(xBatch, yBatch)
+            }
+            println("Accuracy:${accuracy(yTrain, predict(xTrain).argMax(1, true))}")
+        }
+    }
+
+    fun predict(x: DoubleTensor): DoubleTensor {
+        return forward(x).last()
+    }
+
+}
+
+
+
+@OptIn(ExperimentalStdlibApi::class)
+fun main() {
+    DoubleTensorAlgebra {
+        val features = 5
+        val sampleSize = 250
+        val trainSize = 180
+        val testSize = sampleSize - trainSize
+
+        // take sample of features from normal distribution
+        val x = randomNormal(intArrayOf(sampleSize, features), seed) * 2.5
+        BroadcastDoubleTensorAlgebra {
+            x += fromArray(
+                intArrayOf(5),
+                doubleArrayOf(0.0, -1.0, -2.5, -3.0, 5.5) // rows means
+            )
+        }
+
+        // define class like '1' if the sum of features > 0 and '0' otherwise
+        val y = fromArray(
+            intArrayOf(sampleSize, 1),
+            DoubleArray(sampleSize) { i ->
+                if (x[i].sum() > 0.0) {
+                    1.0
+                } else {
+                    0.0
+                }
+            }
+        )
+
+        // split train ans test
+        val trainIndices = (0 until trainSize).toList().toIntArray()
+        val testIndices = (trainSize until sampleSize).toList().toIntArray()
+
+        val xTrain = x.rowsByIndices(trainIndices)
+        val yTrain = y.rowsByIndices(trainIndices)
+
+        val xTest = x.rowsByIndices(testIndices)
+        val yTest = y.rowsByIndices(testIndices)
+
+        // build model
+        val layers = buildList {
+            add(Dense(features, 64))
+            add(ReLU())
+            add(Dense(64, 16))
+            add(ReLU())
+            add(Dense(16, 2))
+            add(Sigmoid())
+        }
+        val model = NeuralNetwork(layers)
+
+        // fit it with train data
+        model.fit(xTrain, yTrain, batchSize = 20, epochs = 10)
+
+        // make prediction
+        val prediction = model.predict(xTest)
+
+        // process raw prediction via argMax
+        val predictionLabels = prediction.argMax(1, true)
+
+        // find out accuracy
+        val acc = accuracy(yTest, predictionLabels)
+        println("Test accuracy:$acc")
+
+    }
+}
diff --git a/examples/src/main/kotlin/space/kscience/kmath/tensors/PCA.kt b/examples/src/main/kotlin/space/kscience/kmath/tensors/PCA.kt
index 1d72b2326..ee25b63a3 100644
--- a/examples/src/main/kotlin/space/kscience/kmath/tensors/PCA.kt
+++ b/examples/src/main/kotlin/space/kscience/kmath/tensors/PCA.kt
@@ -10,11 +10,14 @@ import space.kscience.kmath.tensors.core.algebras.BroadcastDoubleTensorAlgebra
 import space.kscience.kmath.tensors.core.algebras.DoubleAnalyticTensorAlgebra
 import space.kscience.kmath.tensors.core.algebras.DoubleLinearOpsTensorAlgebra
 
-const val seed = 100500L
+
 
 // simple PCA
 
 fun main(){
+    val seed = 100500L
+
+    // work in context with analytic methods
     DoubleAnalyticTensorAlgebra {
 
         // assume x is range from 0 until 10
diff --git a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/TensorAlgebra.kt b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/TensorAlgebra.kt
index b0a0f9618..417cff87f 100644
--- a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/TensorAlgebra.kt
+++ b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/TensorAlgebra.kt
@@ -305,5 +305,16 @@ public interface TensorAlgebra<T>: Algebra<Tensor<T>> {
      */
     public fun Tensor<T>.max(dim: Int, keepDim: Boolean): Tensor<T>
 
-
+    /**
+     * Returns the index of maximum value of each row of the input tensor in the given dimension [dim].
+     *
+     * If [keepDim] is true, the output tensor is of the same size as
+     * input except in the dimension [dim] where it is of size 1.
+     * Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension.
+     *
+     * @param dim the dimension to reduce.
+     * @param keepDim whether the output tensor has [dim] retained or not.
+     * @return the the index of maximum value of each row of the input tensor in the given dimension [dim].
+     */
+    public fun Tensor<T>.argMax(dim: Int, keepDim: Boolean): Tensor<T>
 }
diff --git a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleTensorAlgebra.kt b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleTensorAlgebra.kt
index 9500fbdec..6fac0a0ec 100644
--- a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleTensorAlgebra.kt
+++ b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/algebras/DoubleTensorAlgebra.kt
@@ -561,4 +561,9 @@ public open class DoubleTensorAlgebra : TensorPartialDivisionAlgebra<Double> {
     override fun Tensor<Double>.max(dim: Int, keepDim: Boolean): DoubleTensor =
         foldDim({ x -> x.maxOrNull()!! }, dim, keepDim)
 
+    override fun Tensor<Double>.argMax(dim: Int, keepDim: Boolean): DoubleTensor =
+        foldDim({ x ->
+            x.withIndex().maxByOrNull { it.value }?.index!!.toDouble()
+        }, dim, keepDim)
+
 }