Feature/booleans #341

Merged
altavir merged 13 commits from feature/booleans into dev 2021-05-19 03:36:37 +03:00
7 changed files with 191 additions and 202 deletions
Showing only changes of commit 42d130f69c - Show all commits

View File

@ -15,7 +15,7 @@ allprojects {
}
group = "space.kscience"
version = "0.3.0-dev-8"
version = "0.3.0-dev-9"
}
subprojects {

View File

@ -11,36 +11,32 @@ import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra
// Dataset normalization
fun main() {
fun main() = BroadcastDoubleTensorAlgebra { // work in context with broadcast methods
// take dataset of 5-element vectors from normal distribution
val dataset = randomNormal(intArrayOf(100, 5)) * 1.5 // all elements from N(0, 1.5)
// work in context with broadcast methods
BroadcastDoubleTensorAlgebra {
// take dataset of 5-element vectors from normal distribution
val dataset = randomNormal(intArrayOf(100, 5)) * 1.5 // all elements from N(0, 1.5)
dataset += fromArray(
intArrayOf(5),
doubleArrayOf(0.0, 1.0, 1.5, 3.0, 5.0) // rows means
)
dataset += fromArray(
intArrayOf(5),
doubleArrayOf(0.0, 1.0, 1.5, 3.0, 5.0) // rows means
)
// find out mean and standard deviation of each column
val mean = dataset.mean(0, false)
val std = dataset.std(0, false)
// find out mean and standard deviation of each column
val mean = dataset.mean(0, false)
val std = dataset.std(0, false)
println("Mean:\n$mean")
println("Standard deviation:\n$std")
println("Mean:\n$mean")
println("Standard deviation:\n$std")
// also we can calculate other statistic as minimum and maximum of rows
println("Minimum:\n${dataset.min(0, false)}")
println("Maximum:\n${dataset.max(0, false)}")
// also we can calculate other statistic as minimum and maximum of rows
println("Minimum:\n${dataset.min(0, false)}")
println("Maximum:\n${dataset.max(0, false)}")
// now we can scale dataset with mean normalization
val datasetScaled = (dataset - mean) / std
// now we can scale dataset with mean normalization
val datasetScaled = (dataset - mean) / std
// find out mean and std of scaled dataset
// find out mean and std of scaled dataset
println("Mean of scaled:\n${datasetScaled.mean(0, false)}")
println("Mean of scaled:\n${datasetScaled.std(0, false)}")
}
println("Mean of scaled:\n${datasetScaled.mean(0, false)}")
println("Mean of scaled:\n${datasetScaled.std(0, false)}")
}

View File

@ -6,92 +6,88 @@
package space.kscience.kmath.tensors
import space.kscience.kmath.operations.invoke
import space.kscience.kmath.tensors.core.DoubleTensor
import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra
import space.kscience.kmath.tensors.core.DoubleTensor
// solving linear system with LUP decomposition
fun main () {
fun main() = BroadcastDoubleTensorAlgebra {// work in context with linear operations
// work in context with linear operations
BroadcastDoubleTensorAlgebra {
// set true value of x
val trueX = fromArray(
intArrayOf(4),
doubleArrayOf(-2.0, 1.5, 6.8, -2.4)
)
// set true value of x
val trueX = fromArray(
intArrayOf(4),
doubleArrayOf(-2.0, 1.5, 6.8, -2.4)
// and A matrix
val a = fromArray(
intArrayOf(4, 4),
doubleArrayOf(
0.5, 10.5, 4.5, 1.0,
8.5, 0.9, 12.8, 0.1,
5.56, 9.19, 7.62, 5.45,
1.0, 2.0, -3.0, -2.5
)
)
// and A matrix
val a = fromArray(
intArrayOf(4, 4),
doubleArrayOf(
0.5, 10.5, 4.5, 1.0,
8.5, 0.9, 12.8, 0.1,
5.56, 9.19, 7.62, 5.45,
1.0, 2.0, -3.0, -2.5
)
)
// calculate y value
val b = a dot trueX
// calculate y value
val b = a dot trueX
// check out A and b
println("A:\n$a")
println("b:\n$b")
// check out A and b
println("A:\n$a")
println("b:\n$b")
// solve `Ax = b` system using LUP decomposition
// solve `Ax = b` system using LUP decomposition
// get P, L, U such that PA = LU
val (p, l, u) = a.lu()
// get P, L, U such that PA = LU
val (p, l, u) = a.lu()
// check that P is permutation matrix
println("P:\n$p")
// L is lower triangular matrix and U is upper triangular matrix
println("L:\n$l")
println("U:\n$u")
// and PA = LU
println("PA:\n${p dot a}")
println("LU:\n${l dot u}")
// check that P is permutation matrix
println("P:\n$p")
// L is lower triangular matrix and U is upper triangular matrix
println("L:\n$l")
println("U:\n$u")
// and PA = LU
println("PA:\n${p dot a}")
println("LU:\n${l dot u}")
/* Ax = b;
PAx = Pb;
LUx = Pb;
let y = Ux, then
Ly = Pb -- this system can be easily solved, since the matrix L is lower triangular;
Ux = y can be solved the same way, since the matrix L is upper triangular
*/
/* Ax = b;
PAx = Pb;
LUx = Pb;
let y = Ux, then
Ly = Pb -- this system can be easily solved, since the matrix L is lower triangular;
Ux = y can be solved the same way, since the matrix L is upper triangular
*/
// this function returns solution x of a system lx = b, l should be lower triangular
fun solveLT(l: DoubleTensor, b: DoubleTensor): DoubleTensor {
val n = l.shape[0]
val x = zeros(intArrayOf(n))
for (i in 0 until n){
x[intArrayOf(i)] = (b[intArrayOf(i)] - l[i].dot(x).value()) / l[intArrayOf(i, i)]
}
return x
}
val y = solveLT(l, p dot b)
// solveLT(l, b) function can be easily adapted for upper triangular matrix by the permutation matrix revMat
// create it by placing ones on side diagonal
val revMat = u.zeroesLike()
val n = revMat.shape[0]
// this function returns solution x of a system lx = b, l should be lower triangular
fun solveLT(l: DoubleTensor, b: DoubleTensor): DoubleTensor {
val n = l.shape[0]
val x = zeros(intArrayOf(n))
for (i in 0 until n) {
revMat[intArrayOf(i, n - 1 - i)] = 1.0
x[intArrayOf(i)] = (b[intArrayOf(i)] - l[i].dot(x).value()) / l[intArrayOf(i, i)]
}
// solution of system ux = b, u should be upper triangular
fun solveUT(u: DoubleTensor, b: DoubleTensor): DoubleTensor = revMat dot solveLT(
revMat dot u dot revMat, revMat dot b
)
val x = solveUT(u, y)
println("True x:\n$trueX")
println("x founded with LU method:\n$x")
return x
}
val y = solveLT(l, p dot b)
// solveLT(l, b) function can be easily adapted for upper triangular matrix by the permutation matrix revMat
// create it by placing ones on side diagonal
val revMat = u.zeroesLike()
val n = revMat.shape[0]
for (i in 0 until n) {
revMat[intArrayOf(i, n - 1 - i)] = 1.0
}
// solution of system ux = b, u should be upper triangular
fun solveUT(u: DoubleTensor, b: DoubleTensor): DoubleTensor = revMat dot solveLT(
revMat dot u dot revMat, revMat dot b
)
val x = solveUT(u, y)
println("True x:\n$trueX")
println("x founded with LU method:\n$x")
}

View File

@ -25,7 +25,7 @@ interface Layer {
// activation layer
open class Activation(
val activation: (DoubleTensor) -> DoubleTensor,
val activationDer: (DoubleTensor) -> DoubleTensor
val activationDer: (DoubleTensor) -> DoubleTensor,
) : Layer {
override fun forward(input: DoubleTensor): DoubleTensor {
return activation(input)
@ -62,7 +62,7 @@ class Sigmoid : Activation(::sigmoid, ::sigmoidDer)
class Dense(
private val inputUnits: Int,
private val outputUnits: Int,
private val learningRate: Double = 0.1
private val learningRate: Double = 0.1,
) : Layer {
private val weights: DoubleTensor = DoubleTensorAlgebra {
@ -74,8 +74,8 @@ class Dense(
private val bias: DoubleTensor = DoubleTensorAlgebra { zeros(intArrayOf(outputUnits)) }
override fun forward(input: DoubleTensor): DoubleTensor {
return BroadcastDoubleTensorAlgebra { (input dot weights) + bias }
override fun forward(input: DoubleTensor): DoubleTensor = BroadcastDoubleTensorAlgebra {
(input dot weights) + bias
}
override fun backward(input: DoubleTensor, outputError: DoubleTensor): DoubleTensor = DoubleTensorAlgebra {
@ -116,7 +116,7 @@ class NeuralNetwork(private val layers: List<Layer>) {
onesForAnswers[intArrayOf(index, label)] = 1.0
}
val softmaxValue = yPred.exp() / yPred.exp().sum(dim = 1, keepDim = true)
val softmaxValue = yPred.exp() / yPred.exp().sum(dim = 1, keepDim = true)
(-onesForAnswers + softmaxValue) / (yPred.shape[0].toDouble())
}
@ -175,67 +175,65 @@ class NeuralNetwork(private val layers: List<Layer>) {
@OptIn(ExperimentalStdlibApi::class)
fun main() {
BroadcastDoubleTensorAlgebra {
val features = 5
val sampleSize = 250
val trainSize = 180
//val testSize = sampleSize - trainSize
fun main() = BroadcastDoubleTensorAlgebra {
val features = 5
val sampleSize = 250
val trainSize = 180
//val testSize = sampleSize - trainSize
// take sample of features from normal distribution
val x = randomNormal(intArrayOf(sampleSize, features), seed) * 2.5
// take sample of features from normal distribution
val x = randomNormal(intArrayOf(sampleSize, features), seed) * 2.5
x += fromArray(
intArrayOf(5),
doubleArrayOf(0.0, -1.0, -2.5, -3.0, 5.5) // rows means
)
x += fromArray(
intArrayOf(5),
doubleArrayOf(0.0, -1.0, -2.5, -3.0, 5.5) // rows means
)
// define class like '1' if the sum of features > 0 and '0' otherwise
val y = fromArray(
intArrayOf(sampleSize, 1),
DoubleArray(sampleSize) { i ->
if (x[i].sum() > 0.0) {
1.0
} else {
0.0
}
// define class like '1' if the sum of features > 0 and '0' otherwise
val y = fromArray(
intArrayOf(sampleSize, 1),
DoubleArray(sampleSize) { i ->
if (x[i].sum() > 0.0) {
1.0
} else {
0.0
}
)
// split train ans test
val trainIndices = (0 until trainSize).toList().toIntArray()
val testIndices = (trainSize until sampleSize).toList().toIntArray()
val xTrain = x.rowsByIndices(trainIndices)
val yTrain = y.rowsByIndices(trainIndices)
val xTest = x.rowsByIndices(testIndices)
val yTest = y.rowsByIndices(testIndices)
// build model
val layers = buildList {
add(Dense(features, 64))
add(ReLU())
add(Dense(64, 16))
add(ReLU())
add(Dense(16, 2))
add(Sigmoid())
}
val model = NeuralNetwork(layers)
)
// fit it with train data
model.fit(xTrain, yTrain, batchSize = 20, epochs = 10)
// split train ans test
val trainIndices = (0 until trainSize).toList().toIntArray()
val testIndices = (trainSize until sampleSize).toList().toIntArray()
// make prediction
val prediction = model.predict(xTest)
val xTrain = x.rowsByIndices(trainIndices)
val yTrain = y.rowsByIndices(trainIndices)
// process raw prediction via argMax
val predictionLabels = prediction.argMax(1, true)
// find out accuracy
val acc = accuracy(yTest, predictionLabels)
println("Test accuracy:$acc")
val xTest = x.rowsByIndices(testIndices)
val yTest = y.rowsByIndices(testIndices)
// build model
val layers = buildList {
add(Dense(features, 64))
add(ReLU())
add(Dense(64, 16))
add(ReLU())
add(Dense(16, 2))
add(Sigmoid())
}
val model = NeuralNetwork(layers)
// fit it with train data
model.fit(xTrain, yTrain, batchSize = 20, epochs = 10)
// make prediction
val prediction = model.predict(xTest)
// process raw prediction via argMax
val predictionLabels = prediction.argMax(1, true)
// find out accuracy
val acc = accuracy(yTest, predictionLabels)
println("Test accuracy:$acc")
}

View File

@ -11,68 +11,64 @@ import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra
// simple PCA
fun main(){
fun main() = BroadcastDoubleTensorAlgebra { // work in context with broadcast methods
val seed = 100500L
// work in context with broadcast methods
BroadcastDoubleTensorAlgebra {
// assume x is range from 0 until 10
val x = fromArray(
intArrayOf(10),
(0 until 10).toList().map { it.toDouble() }.toDoubleArray()
)
// assume x is range from 0 until 10
val x = fromArray(
intArrayOf(10),
(0 until 10).toList().map { it.toDouble() }.toDoubleArray()
)
// take y dependent on x with noise
val y = 2.0 * x + (3.0 + x.randomNormalLike(seed) * 1.5)
// take y dependent on x with noise
val y = 2.0 * x + (3.0 + x.randomNormalLike(seed) * 1.5)
println("x:\n$x")
println("y:\n$y")
println("x:\n$x")
println("y:\n$y")
// stack them into single dataset
val dataset = stack(listOf(x, y)).transpose()
// stack them into single dataset
val dataset = stack(listOf(x, y)).transpose()
// normalize both x and y
val xMean = x.mean()
val yMean = y.mean()
// normalize both x and y
val xMean = x.mean()
val yMean = y.mean()
val xStd = x.std()
val yStd = y.std()
val xStd = x.std()
val yStd = y.std()
val xScaled = (x - xMean) / xStd
val yScaled = (y - yMean) / yStd
val xScaled = (x - xMean) / xStd
val yScaled = (y - yMean) / yStd
// save means ans standard deviations for further recovery
val mean = fromArray(
intArrayOf(2),
doubleArrayOf(xMean, yMean)
)
println("Means:\n$mean")
// save means ans standard deviations for further recovery
val mean = fromArray(
intArrayOf(2),
doubleArrayOf(xMean, yMean)
)
println("Means:\n$mean")
val std = fromArray(
intArrayOf(2),
doubleArrayOf(xStd, yStd)
)
println("Standard deviations:\n$std")
val std = fromArray(
intArrayOf(2),
doubleArrayOf(xStd, yStd)
)
println("Standard deviations:\n$std")
// calculate the covariance matrix of scaled x and y
val covMatrix = cov(listOf(xScaled, yScaled))
println("Covariance matrix:\n$covMatrix")
// calculate the covariance matrix of scaled x and y
val covMatrix = cov(listOf(xScaled, yScaled))
println("Covariance matrix:\n$covMatrix")
// and find out eigenvector of it
val (_, evecs) = covMatrix.symEig()
val v = evecs[0]
println("Eigenvector:\n$v")
// and find out eigenvector of it
val (_, evecs) = covMatrix.symEig()
val v = evecs[0]
println("Eigenvector:\n$v")
// reduce dimension of dataset
val datasetReduced = v dot stack(listOf(xScaled, yScaled))
println("Reduced data:\n$datasetReduced")
// reduce dimension of dataset
val datasetReduced = v dot stack(listOf(xScaled, yScaled))
println("Reduced data:\n$datasetReduced")
// we can restore original data from reduced data.
// for example, find 7th element of dataset
val n = 7
val restored = (datasetReduced[n] dot v.view(intArrayOf(1, 2))) * std + mean
println("Original value:\n${dataset[n]}")
println("Restored value:\n$restored")
}
// we can restore original data from reduced data.
// for example, find 7th element of dataset
val n = 7
val restored = (datasetReduced[n] dot v.view(intArrayOf(1, 2))) * std + mean
println("Original value:\n${dataset[n]}")
println("Restored value:\n$restored")
}

View File

@ -19,6 +19,9 @@ import space.kscience.kmath.structures.Buffer
public interface ColumnarData<out T> {
public val size: Int
/**
* Provide a column by symbol or null if column with given symbol is not defined
*/
public operator fun get(symbol: Symbol): Buffer<T>?
}

View File

@ -5,7 +5,7 @@ pluginManagement {
maven("https://repo.kotlin.link")
}
val toolsVersion = "0.9.6"
val toolsVersion = "0.9.7"
val kotlinVersion = "1.5.0"
plugins {