forked from kscience/kmath
neural network!
This commit is contained in:
parent
febe526325
commit
1b1a078dea
@ -0,0 +1,245 @@
|
||||
/*
|
||||
* Copyright 2018-2021 KMath contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package space.kscience.kmath.tensors
|
||||
|
||||
import space.kscience.kmath.operations.invoke
|
||||
import space.kscience.kmath.tensors.core.DoubleTensor
|
||||
import space.kscience.kmath.tensors.core.algebras.BroadcastDoubleTensorAlgebra
|
||||
import space.kscience.kmath.tensors.core.algebras.DoubleAnalyticTensorAlgebra
|
||||
import space.kscience.kmath.tensors.core.algebras.DoubleTensorAlgebra
|
||||
import space.kscience.kmath.tensors.core.toDoubleArray
|
||||
import kotlin.math.sqrt
|
||||
|
||||
const val seed = 100500L
|
||||
|
||||
// Simple feedforward neural network with backpropagation training
|
||||
|
||||
// interface of network layer
|
||||
interface Layer {
|
||||
fun forward(input: DoubleTensor): DoubleTensor
|
||||
fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor
|
||||
}
|
||||
|
||||
// activation layer
|
||||
open class Activation(
|
||||
val activation: (DoubleTensor) -> DoubleTensor,
|
||||
val activationDer: (DoubleTensor) -> DoubleTensor
|
||||
) : Layer {
|
||||
override fun forward(input: DoubleTensor): DoubleTensor {
|
||||
return activation(input)
|
||||
}
|
||||
|
||||
override fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor {
|
||||
return DoubleTensorAlgebra { outputError * activationDer(input) }
|
||||
}
|
||||
}
|
||||
|
||||
fun relu(x: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
|
||||
x.map { if (it > 0) it else 0.0 }
|
||||
}
|
||||
|
||||
fun reluDer(x: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
|
||||
x.map { if (it > 0) 1.0 else 0.0 }
|
||||
}
|
||||
|
||||
// activation layer with relu activator
|
||||
class ReLU : Activation(::relu, ::reluDer)
|
||||
|
||||
fun sigmoid(x: DoubleTensor): DoubleTensor = DoubleAnalyticTensorAlgebra {
|
||||
1.0 / (1.0 + (-x).exp())
|
||||
}
|
||||
|
||||
fun sigmoidDer(x: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
|
||||
sigmoid(x) * (1.0 - sigmoid(x))
|
||||
}
|
||||
|
||||
// activation layer with sigmoid activator
|
||||
class Sigmoid : Activation(::sigmoid, ::sigmoidDer)
|
||||
|
||||
// dense layer
|
||||
class Dense(
|
||||
private val inputUnits: Int,
|
||||
private val outputUnits: Int,
|
||||
private val learningRate: Double = 0.1
|
||||
) : Layer {
|
||||
|
||||
private val weights: DoubleTensor = DoubleTensorAlgebra {
|
||||
randomNormal(
|
||||
intArrayOf(inputUnits, outputUnits),
|
||||
seed
|
||||
) * sqrt(2.0 / (inputUnits + outputUnits))
|
||||
}
|
||||
|
||||
private val bias: DoubleTensor = DoubleTensorAlgebra { zeros(intArrayOf(outputUnits)) }
|
||||
|
||||
override fun forward(input: DoubleTensor): DoubleTensor {
|
||||
return BroadcastDoubleTensorAlgebra { (input dot weights) + bias }
|
||||
}
|
||||
|
||||
override fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
|
||||
val gradInput = outputError dot weights.transpose()
|
||||
|
||||
val gradW = input.transpose() dot outputError
|
||||
val gradBias = DoubleAnalyticTensorAlgebra {
|
||||
outputError.mean(dim = 0, keepDim = false) * input.shape[0].toDouble()
|
||||
}
|
||||
|
||||
weights -= learningRate * gradW
|
||||
bias -= learningRate * gradBias
|
||||
|
||||
gradInput
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// simple accuracy equal to the proportion of correct answers
|
||||
fun accuracy(yPred: DoubleTensor, yTrue: DoubleTensor): Double {
|
||||
check(yPred.shape contentEquals yTrue.shape)
|
||||
val n = yPred.shape[0]
|
||||
var correctCnt = 0
|
||||
for (i in 0 until n) {
|
||||
if (yPred[intArrayOf(i, 0)] == yTrue[intArrayOf(i, 0)]) {
|
||||
correctCnt += 1
|
||||
}
|
||||
}
|
||||
return correctCnt.toDouble() / n.toDouble()
|
||||
}
|
||||
|
||||
// neural network class
|
||||
class NeuralNetwork(private val layers: List<Layer>) {
|
||||
private fun softMaxLoss(yPred: DoubleTensor, yTrue: DoubleTensor): DoubleTensor = DoubleAnalyticTensorAlgebra {
|
||||
|
||||
val onesForAnswers = yPred.zeroesLike()
|
||||
yTrue.toDoubleArray().forEachIndexed { index, labelDouble ->
|
||||
val label = labelDouble.toInt()
|
||||
onesForAnswers[intArrayOf(index, label)] = 1.0
|
||||
}
|
||||
|
||||
val softmaxValue = BroadcastDoubleTensorAlgebra { yPred.exp() / yPred.exp().sum(dim = 1, keepDim = true) }
|
||||
|
||||
(-onesForAnswers + softmaxValue) / (yPred.shape[0].toDouble())
|
||||
}
|
||||
|
||||
@OptIn(ExperimentalStdlibApi::class)
|
||||
private fun forward(x: DoubleTensor): List<DoubleTensor> {
|
||||
var input = x
|
||||
|
||||
return buildList {
|
||||
layers.forEach { layer ->
|
||||
val output = layer.forward(input)
|
||||
add(output)
|
||||
input = output
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@OptIn(ExperimentalStdlibApi::class)
|
||||
private fun train(xTrain: DoubleTensor, yTrain: DoubleTensor) {
|
||||
val layerInputs = buildList {
|
||||
add(xTrain)
|
||||
addAll(forward(xTrain))
|
||||
}
|
||||
|
||||
var lossGrad = softMaxLoss(layerInputs.last(), yTrain)
|
||||
|
||||
layers.zip(layerInputs).reversed().forEach { (layer, input) ->
|
||||
lossGrad = layer.backward(input, lossGrad)
|
||||
}
|
||||
}
|
||||
|
||||
fun fit(xTrain: DoubleTensor, yTrain: DoubleTensor, batchSize: Int, epochs: Int) = DoubleTensorAlgebra {
|
||||
fun iterBatch(x: DoubleTensor, y: DoubleTensor): Sequence<Pair<DoubleTensor, DoubleTensor>> = sequence {
|
||||
val n = x.shape[0]
|
||||
val shuffledIndices = (0 until n).shuffled()
|
||||
for (i in 0 until n step batchSize) {
|
||||
val excerptIndices = shuffledIndices.drop(i).take(batchSize).toIntArray()
|
||||
val batch = x.rowsByIndices(excerptIndices) to y.rowsByIndices(excerptIndices)
|
||||
yield(batch)
|
||||
}
|
||||
}
|
||||
|
||||
for (epoch in 0 until epochs) {
|
||||
println("Epoch ${epoch + 1}/$epochs")
|
||||
for ((xBatch, yBatch) in iterBatch(xTrain, yTrain)) {
|
||||
train(xBatch, yBatch)
|
||||
}
|
||||
println("Accuracy:${accuracy(yTrain, predict(xTrain).argMax(1, true))}")
|
||||
}
|
||||
}
|
||||
|
||||
fun predict(x: DoubleTensor): DoubleTensor {
|
||||
return forward(x).last()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@OptIn(ExperimentalStdlibApi::class)
|
||||
fun main() {
|
||||
DoubleTensorAlgebra {
|
||||
val features = 5
|
||||
val sampleSize = 250
|
||||
val trainSize = 180
|
||||
val testSize = sampleSize - trainSize
|
||||
|
||||
// take sample of features from normal distribution
|
||||
val x = randomNormal(intArrayOf(sampleSize, features), seed) * 2.5
|
||||
BroadcastDoubleTensorAlgebra {
|
||||
x += fromArray(
|
||||
intArrayOf(5),
|
||||
doubleArrayOf(0.0, -1.0, -2.5, -3.0, 5.5) // rows means
|
||||
)
|
||||
}
|
||||
|
||||
// define class like '1' if the sum of features > 0 and '0' otherwise
|
||||
val y = fromArray(
|
||||
intArrayOf(sampleSize, 1),
|
||||
DoubleArray(sampleSize) { i ->
|
||||
if (x[i].sum() > 0.0) {
|
||||
1.0
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
// split train ans test
|
||||
val trainIndices = (0 until trainSize).toList().toIntArray()
|
||||
val testIndices = (trainSize until sampleSize).toList().toIntArray()
|
||||
|
||||
val xTrain = x.rowsByIndices(trainIndices)
|
||||
val yTrain = y.rowsByIndices(trainIndices)
|
||||
|
||||
val xTest = x.rowsByIndices(testIndices)
|
||||
val yTest = y.rowsByIndices(testIndices)
|
||||
|
||||
// build model
|
||||
val layers = buildList {
|
||||
add(Dense(features, 64))
|
||||
add(ReLU())
|
||||
add(Dense(64, 16))
|
||||
add(ReLU())
|
||||
add(Dense(16, 2))
|
||||
add(Sigmoid())
|
||||
}
|
||||
val model = NeuralNetwork(layers)
|
||||
|
||||
// fit it with train data
|
||||
model.fit(xTrain, yTrain, batchSize = 20, epochs = 10)
|
||||
|
||||
// make prediction
|
||||
val prediction = model.predict(xTest)
|
||||
|
||||
// process raw prediction via argMax
|
||||
val predictionLabels = prediction.argMax(1, true)
|
||||
|
||||
// find out accuracy
|
||||
val acc = accuracy(yTest, predictionLabels)
|
||||
println("Test accuracy:$acc")
|
||||
|
||||
}
|
||||
}
|
@ -10,11 +10,14 @@ import space.kscience.kmath.tensors.core.algebras.BroadcastDoubleTensorAlgebra
|
||||
import space.kscience.kmath.tensors.core.algebras.DoubleAnalyticTensorAlgebra
|
||||
import space.kscience.kmath.tensors.core.algebras.DoubleLinearOpsTensorAlgebra
|
||||
|
||||
const val seed = 100500L
|
||||
|
||||
|
||||
// simple PCA
|
||||
|
||||
fun main(){
|
||||
val seed = 100500L
|
||||
|
||||
// work in context with analytic methods
|
||||
DoubleAnalyticTensorAlgebra {
|
||||
|
||||
// assume x is range from 0 until 10
|
||||
|
@ -305,5 +305,16 @@ public interface TensorAlgebra<T>: Algebra<Tensor<T>> {
|
||||
*/
|
||||
public fun Tensor<T>.max(dim: Int, keepDim: Boolean): Tensor<T>
|
||||
|
||||
|
||||
/**
|
||||
* Returns the index of maximum value of each row of the input tensor in the given dimension [dim].
|
||||
*
|
||||
* If [keepDim] is true, the output tensor is of the same size as
|
||||
* input except in the dimension [dim] where it is of size 1.
|
||||
* Otherwise, [dim] is squeezed, resulting in the output tensor having 1 fewer dimension.
|
||||
*
|
||||
* @param dim the dimension to reduce.
|
||||
* @param keepDim whether the output tensor has [dim] retained or not.
|
||||
* @return the the index of maximum value of each row of the input tensor in the given dimension [dim].
|
||||
*/
|
||||
public fun Tensor<T>.argMax(dim: Int, keepDim: Boolean): Tensor<T>
|
||||
}
|
||||
|
@ -561,4 +561,9 @@ public open class DoubleTensorAlgebra : TensorPartialDivisionAlgebra<Double> {
|
||||
override fun Tensor<Double>.max(dim: Int, keepDim: Boolean): DoubleTensor =
|
||||
foldDim({ x -> x.maxOrNull()!! }, dim, keepDim)
|
||||
|
||||
override fun Tensor<Double>.argMax(dim: Int, keepDim: Boolean): DoubleTensor =
|
||||
foldDim({ x ->
|
||||
x.withIndex().maxByOrNull { it.value }?.index!!.toDouble()
|
||||
}, dim, keepDim)
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user