Feature/booleans #341

Merged
altavir merged 13 commits from feature/booleans into dev 2021-05-19 03:36:37 +03:00
7 changed files with 191 additions and 202 deletions
Showing only changes of commit 42d130f69c - Show all commits

View File

@ -15,7 +15,7 @@ allprojects {
} }
group = "space.kscience" group = "space.kscience"
version = "0.3.0-dev-8" version = "0.3.0-dev-9"
} }
subprojects { subprojects {

View File

@ -11,36 +11,32 @@ import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra
// Dataset normalization // Dataset normalization
fun main() { fun main() = BroadcastDoubleTensorAlgebra { // work in context with broadcast methods
// take dataset of 5-element vectors from normal distribution
val dataset = randomNormal(intArrayOf(100, 5)) * 1.5 // all elements from N(0, 1.5)
// work in context with broadcast methods dataset += fromArray(
BroadcastDoubleTensorAlgebra { intArrayOf(5),
// take dataset of 5-element vectors from normal distribution doubleArrayOf(0.0, 1.0, 1.5, 3.0, 5.0) // rows means
val dataset = randomNormal(intArrayOf(100, 5)) * 1.5 // all elements from N(0, 1.5) )
dataset += fromArray(
intArrayOf(5),
doubleArrayOf(0.0, 1.0, 1.5, 3.0, 5.0) // rows means
)
// find out mean and standard deviation of each column // find out mean and standard deviation of each column
val mean = dataset.mean(0, false) val mean = dataset.mean(0, false)
val std = dataset.std(0, false) val std = dataset.std(0, false)
println("Mean:\n$mean") println("Mean:\n$mean")
println("Standard deviation:\n$std") println("Standard deviation:\n$std")
// also we can calculate other statistic as minimum and maximum of rows // also we can calculate other statistic as minimum and maximum of rows
println("Minimum:\n${dataset.min(0, false)}") println("Minimum:\n${dataset.min(0, false)}")
println("Maximum:\n${dataset.max(0, false)}") println("Maximum:\n${dataset.max(0, false)}")
// now we can scale dataset with mean normalization // now we can scale dataset with mean normalization
val datasetScaled = (dataset - mean) / std val datasetScaled = (dataset - mean) / std
// find out mean and std of scaled dataset // find out mean and std of scaled dataset
println("Mean of scaled:\n${datasetScaled.mean(0, false)}") println("Mean of scaled:\n${datasetScaled.mean(0, false)}")
println("Mean of scaled:\n${datasetScaled.std(0, false)}") println("Mean of scaled:\n${datasetScaled.std(0, false)}")
}
} }

View File

@ -6,92 +6,88 @@
package space.kscience.kmath.tensors package space.kscience.kmath.tensors
import space.kscience.kmath.operations.invoke import space.kscience.kmath.operations.invoke
import space.kscience.kmath.tensors.core.DoubleTensor
import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra
import space.kscience.kmath.tensors.core.DoubleTensor
// solving linear system with LUP decomposition // solving linear system with LUP decomposition
fun main () { fun main() = BroadcastDoubleTensorAlgebra {// work in context with linear operations
// work in context with linear operations // set true value of x
BroadcastDoubleTensorAlgebra { val trueX = fromArray(
intArrayOf(4),
doubleArrayOf(-2.0, 1.5, 6.8, -2.4)
)
// set true value of x // and A matrix
val trueX = fromArray( val a = fromArray(
intArrayOf(4), intArrayOf(4, 4),
doubleArrayOf(-2.0, 1.5, 6.8, -2.4) doubleArrayOf(
0.5, 10.5, 4.5, 1.0,
8.5, 0.9, 12.8, 0.1,
5.56, 9.19, 7.62, 5.45,
1.0, 2.0, -3.0, -2.5
) )
)
// and A matrix // calculate y value
val a = fromArray( val b = a dot trueX
intArrayOf(4, 4),
doubleArrayOf(
0.5, 10.5, 4.5, 1.0,
8.5, 0.9, 12.8, 0.1,
5.56, 9.19, 7.62, 5.45,
1.0, 2.0, -3.0, -2.5
)
)
// calculate y value // check out A and b
val b = a dot trueX println("A:\n$a")
println("b:\n$b")
// check out A and b // solve `Ax = b` system using LUP decomposition
println("A:\n$a")
println("b:\n$b")
// solve `Ax = b` system using LUP decomposition // get P, L, U such that PA = LU
val (p, l, u) = a.lu()
// get P, L, U such that PA = LU // check that P is permutation matrix
val (p, l, u) = a.lu() println("P:\n$p")
// L is lower triangular matrix and U is upper triangular matrix
println("L:\n$l")
println("U:\n$u")
// and PA = LU
println("PA:\n${p dot a}")
println("LU:\n${l dot u}")
// check that P is permutation matrix /* Ax = b;
println("P:\n$p") PAx = Pb;
// L is lower triangular matrix and U is upper triangular matrix LUx = Pb;
println("L:\n$l") let y = Ux, then
println("U:\n$u") Ly = Pb -- this system can be easily solved, since the matrix L is lower triangular;
// and PA = LU Ux = y can be solved the same way, since the matrix L is upper triangular
println("PA:\n${p dot a}") */
println("LU:\n${l dot u}")
/* Ax = b;
PAx = Pb;
LUx = Pb;
let y = Ux, then
Ly = Pb -- this system can be easily solved, since the matrix L is lower triangular;
Ux = y can be solved the same way, since the matrix L is upper triangular
*/
// this function returns solution x of a system lx = b, l should be lower triangular // this function returns solution x of a system lx = b, l should be lower triangular
fun solveLT(l: DoubleTensor, b: DoubleTensor): DoubleTensor { fun solveLT(l: DoubleTensor, b: DoubleTensor): DoubleTensor {
val n = l.shape[0] val n = l.shape[0]
val x = zeros(intArrayOf(n)) val x = zeros(intArrayOf(n))
for (i in 0 until n){
x[intArrayOf(i)] = (b[intArrayOf(i)] - l[i].dot(x).value()) / l[intArrayOf(i, i)]
}
return x
}
val y = solveLT(l, p dot b)
// solveLT(l, b) function can be easily adapted for upper triangular matrix by the permutation matrix revMat
// create it by placing ones on side diagonal
val revMat = u.zeroesLike()
val n = revMat.shape[0]
for (i in 0 until n) { for (i in 0 until n) {
revMat[intArrayOf(i, n - 1 - i)] = 1.0 x[intArrayOf(i)] = (b[intArrayOf(i)] - l[i].dot(x).value()) / l[intArrayOf(i, i)]
} }
return x
// solution of system ux = b, u should be upper triangular
fun solveUT(u: DoubleTensor, b: DoubleTensor): DoubleTensor = revMat dot solveLT(
revMat dot u dot revMat, revMat dot b
)
val x = solveUT(u, y)
println("True x:\n$trueX")
println("x founded with LU method:\n$x")
} }
val y = solveLT(l, p dot b)
// solveLT(l, b) function can be easily adapted for upper triangular matrix by the permutation matrix revMat
// create it by placing ones on side diagonal
val revMat = u.zeroesLike()
val n = revMat.shape[0]
for (i in 0 until n) {
revMat[intArrayOf(i, n - 1 - i)] = 1.0
}
// solution of system ux = b, u should be upper triangular
fun solveUT(u: DoubleTensor, b: DoubleTensor): DoubleTensor = revMat dot solveLT(
revMat dot u dot revMat, revMat dot b
)
val x = solveUT(u, y)
println("True x:\n$trueX")
println("x founded with LU method:\n$x")
} }

View File

@ -25,7 +25,7 @@ interface Layer {
// activation layer // activation layer
open class Activation( open class Activation(
val activation: (DoubleTensor) -> DoubleTensor, val activation: (DoubleTensor) -> DoubleTensor,
val activationDer: (DoubleTensor) -> DoubleTensor val activationDer: (DoubleTensor) -> DoubleTensor,
) : Layer { ) : Layer {
override fun forward(input: DoubleTensor): DoubleTensor { override fun forward(input: DoubleTensor): DoubleTensor {
return activation(input) return activation(input)
@ -62,7 +62,7 @@ class Sigmoid : Activation(::sigmoid, ::sigmoidDer)
class Dense( class Dense(
private val inputUnits: Int, private val inputUnits: Int,
private val outputUnits: Int, private val outputUnits: Int,
private val learningRate: Double = 0.1 private val learningRate: Double = 0.1,
) : Layer { ) : Layer {
private val weights: DoubleTensor = DoubleTensorAlgebra { private val weights: DoubleTensor = DoubleTensorAlgebra {
@ -74,8 +74,8 @@ class Dense(
private val bias: DoubleTensor = DoubleTensorAlgebra { zeros(intArrayOf(outputUnits)) } private val bias: DoubleTensor = DoubleTensorAlgebra { zeros(intArrayOf(outputUnits)) }
override fun forward(input: DoubleTensor): DoubleTensor { override fun forward(input: DoubleTensor): DoubleTensor = BroadcastDoubleTensorAlgebra {
return BroadcastDoubleTensorAlgebra { (input dot weights) + bias } (input dot weights) + bias
} }
override fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor = DoubleTensorAlgebra { override fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
@ -116,7 +116,7 @@ class NeuralNetwork(private val layers: List<Layer>) {
onesForAnswers[intArrayOf(index, label)] = 1.0 onesForAnswers[intArrayOf(index, label)] = 1.0
} }
val softmaxValue = yPred.exp() / yPred.exp().sum(dim = 1, keepDim = true) val softmaxValue = yPred.exp() / yPred.exp().sum(dim = 1, keepDim = true)
(-onesForAnswers + softmaxValue) / (yPred.shape[0].toDouble()) (-onesForAnswers + softmaxValue) / (yPred.shape[0].toDouble())
} }
@ -175,67 +175,65 @@ class NeuralNetwork(private val layers: List<Layer>) {
@OptIn(ExperimentalStdlibApi::class) @OptIn(ExperimentalStdlibApi::class)
fun main() { fun main() = BroadcastDoubleTensorAlgebra {
BroadcastDoubleTensorAlgebra { val features = 5
val features = 5 val sampleSize = 250
val sampleSize = 250 val trainSize = 180
val trainSize = 180 //val testSize = sampleSize - trainSize
//val testSize = sampleSize - trainSize
// take sample of features from normal distribution // take sample of features from normal distribution
val x = randomNormal(intArrayOf(sampleSize, features), seed) * 2.5 val x = randomNormal(intArrayOf(sampleSize, features), seed) * 2.5
x += fromArray( x += fromArray(
intArrayOf(5), intArrayOf(5),
doubleArrayOf(0.0, -1.0, -2.5, -3.0, 5.5) // rows means doubleArrayOf(0.0, -1.0, -2.5, -3.0, 5.5) // rows means
) )
// define class like '1' if the sum of features > 0 and '0' otherwise // define class like '1' if the sum of features > 0 and '0' otherwise
val y = fromArray( val y = fromArray(
intArrayOf(sampleSize, 1), intArrayOf(sampleSize, 1),
DoubleArray(sampleSize) { i -> DoubleArray(sampleSize) { i ->
if (x[i].sum() > 0.0) { if (x[i].sum() > 0.0) {
1.0 1.0
} else { } else {
0.0 0.0
}
} }
)
// split train ans test
val trainIndices = (0 until trainSize).toList().toIntArray()
val testIndices = (trainSize until sampleSize).toList().toIntArray()
val xTrain = x.rowsByIndices(trainIndices)
val yTrain = y.rowsByIndices(trainIndices)
val xTest = x.rowsByIndices(testIndices)
val yTest = y.rowsByIndices(testIndices)
// build model
val layers = buildList {
add(Dense(features, 64))
add(ReLU())
add(Dense(64, 16))
add(ReLU())
add(Dense(16, 2))
add(Sigmoid())
} }
val model = NeuralNetwork(layers) )
// fit it with train data // split train ans test
model.fit(xTrain, yTrain, batchSize = 20, epochs = 10) val trainIndices = (0 until trainSize).toList().toIntArray()
val testIndices = (trainSize until sampleSize).toList().toIntArray()
// make prediction val xTrain = x.rowsByIndices(trainIndices)
val prediction = model.predict(xTest) val yTrain = y.rowsByIndices(trainIndices)
// process raw prediction via argMax val xTest = x.rowsByIndices(testIndices)
val predictionLabels = prediction.argMax(1, true) val yTest = y.rowsByIndices(testIndices)
// find out accuracy
val acc = accuracy(yTest, predictionLabels)
println("Test accuracy:$acc")
// build model
val layers = buildList {
add(Dense(features, 64))
add(ReLU())
add(Dense(64, 16))
add(ReLU())
add(Dense(16, 2))
add(Sigmoid())
} }
val model = NeuralNetwork(layers)
// fit it with train data
model.fit(xTrain, yTrain, batchSize = 20, epochs = 10)
// make prediction
val prediction = model.predict(xTest)
// process raw prediction via argMax
val predictionLabels = prediction.argMax(1, true)
// find out accuracy
val acc = accuracy(yTest, predictionLabels)
println("Test accuracy:$acc")
} }

View File

@ -11,68 +11,64 @@ import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra
// simple PCA // simple PCA
fun main(){ fun main() = BroadcastDoubleTensorAlgebra { // work in context with broadcast methods
val seed = 100500L val seed = 100500L
// work in context with broadcast methods // assume x is range from 0 until 10
BroadcastDoubleTensorAlgebra { val x = fromArray(
intArrayOf(10),
(0 until 10).toList().map { it.toDouble() }.toDoubleArray()
)
// assume x is range from 0 until 10 // take y dependent on x with noise
val x = fromArray( val y = 2.0 * x + (3.0 + x.randomNormalLike(seed) * 1.5)
intArrayOf(10),
(0 until 10).toList().map { it.toDouble() }.toDoubleArray()
)
// take y dependent on x with noise println("x:\n$x")
val y = 2.0 * x + (3.0 + x.randomNormalLike(seed) * 1.5) println("y:\n$y")
println("x:\n$x") // stack them into single dataset
println("y:\n$y") val dataset = stack(listOf(x, y)).transpose()
// stack them into single dataset // normalize both x and y
val dataset = stack(listOf(x, y)).transpose() val xMean = x.mean()
val yMean = y.mean()
// normalize both x and y val xStd = x.std()
val xMean = x.mean() val yStd = y.std()
val yMean = y.mean()
val xStd = x.std() val xScaled = (x - xMean) / xStd
val yStd = y.std() val yScaled = (y - yMean) / yStd
val xScaled = (x - xMean) / xStd // save means ans standard deviations for further recovery
val yScaled = (y - yMean) / yStd val mean = fromArray(
intArrayOf(2),
doubleArrayOf(xMean, yMean)
)
println("Means:\n$mean")
// save means ans standard deviations for further recovery val std = fromArray(
val mean = fromArray( intArrayOf(2),
intArrayOf(2), doubleArrayOf(xStd, yStd)
doubleArrayOf(xMean, yMean) )
) println("Standard deviations:\n$std")
println("Means:\n$mean")
val std = fromArray( // calculate the covariance matrix of scaled x and y
intArrayOf(2), val covMatrix = cov(listOf(xScaled, yScaled))
doubleArrayOf(xStd, yStd) println("Covariance matrix:\n$covMatrix")
)
println("Standard deviations:\n$std")
// calculate the covariance matrix of scaled x and y // and find out eigenvector of it
val covMatrix = cov(listOf(xScaled, yScaled)) val (_, evecs) = covMatrix.symEig()
println("Covariance matrix:\n$covMatrix") val v = evecs[0]
println("Eigenvector:\n$v")
// and find out eigenvector of it // reduce dimension of dataset
val (_, evecs) = covMatrix.symEig() val datasetReduced = v dot stack(listOf(xScaled, yScaled))
val v = evecs[0] println("Reduced data:\n$datasetReduced")
println("Eigenvector:\n$v")
// reduce dimension of dataset // we can restore original data from reduced data.
val datasetReduced = v dot stack(listOf(xScaled, yScaled)) // for example, find 7th element of dataset
println("Reduced data:\n$datasetReduced") val n = 7
val restored = (datasetReduced[n] dot v.view(intArrayOf(1, 2))) * std + mean
// we can restore original data from reduced data. println("Original value:\n${dataset[n]}")
// for example, find 7th element of dataset println("Restored value:\n$restored")
val n = 7
val restored = (datasetReduced[n] dot v.view(intArrayOf(1, 2))) * std + mean
println("Original value:\n${dataset[n]}")
println("Restored value:\n$restored")
}
} }

View File

@ -19,6 +19,9 @@ import space.kscience.kmath.structures.Buffer
public interface ColumnarData<out T> { public interface ColumnarData<out T> {
public val size: Int public val size: Int
/**
* Provide a column by symbol or null if column with given symbol is not defined
*/
public operator fun get(symbol: Symbol): Buffer<T>? public operator fun get(symbol: Symbol): Buffer<T>?
} }

View File

@ -5,7 +5,7 @@ pluginManagement {
maven("https://repo.kotlin.link") maven("https://repo.kotlin.link")
} }
val toolsVersion = "0.9.6" val toolsVersion = "0.9.7"
val kotlinVersion = "1.5.0" val kotlinVersion = "1.5.0"
plugins { plugins {