kmath/examples/notebooks/Naive classifier.ipynb

9.7 KiB

In [ ]:
%use kmath(0.3.1-dev-5)
%use plotly(0.5.0)
@file:DependsOn("space.kscience:kmath-commons:0.3.1-dev-5")
In [ ]:
//Uncomment to work in Jupyter classic or DataLore
//Plotly.jupyter.notebook()

The model

Defining the input data format, the statistic abstraction and the statistic implementation based on a weighted sum of elements.

In [ ]:
class XYValues(val xValues: DoubleArray, val yValues: DoubleArray) {
    init {
        require(xValues.size == yValues.size)
    }
}

fun interface XYStatistic {
    operator fun invoke(values: XYValues): Double
}

class ConvolutionalXYStatistic(val weights: DoubleArray) : XYStatistic {
    override fun invoke(values: XYValues): Double {
        require(weights.size == values.yValues.size)
        val norm = values.yValues.sum()
        return values.yValues.zip(weights) { value, weight -> value * weight }.sum()/norm
    }
}

Generator

Generate sample data for parabolas and hyperbolas

In [ ]:
fun generateParabolas(xValues: DoubleArray, a: Double, b: Double, c: Double): XYValues {
    val yValues = xValues.map { x -> a * x * x + b * x + c }.toDoubleArray()
    return XYValues(xValues, yValues)
}

fun generateHyperbols(xValues: DoubleArray, gamma: Double, x0: Double, y0: Double): XYValues {
    val yValues = xValues.map { x -> y0 + gamma / (x - x0) }.toDoubleArray()
    return XYValues(xValues, yValues)
}
In [ ]:
val xValues = (1.0..10.0).step(1.0).toDoubleArray()

val xy = generateHyperbols(xValues, 1.0, 0.0, 0.0)

Plotly.plot {
    scatter {
        this.x.doubles = xValues
        this.y.doubles = xy.yValues
    }
}

Create a default statistic with uniform weights

In [ ]:
val statistic = ConvolutionalXYStatistic(DoubleArray(xValues.size){1.0})
statistic(xy)
In [ ]:
import kotlin.random.Random

val random = Random(1288)

val parabolas = buildList{
    repeat(500){
        add(
            generateParabolas(
                xValues, 
                random.nextDouble(), 
                random.nextDouble(), 
                random.nextDouble()
            )
        )
    }
}

val hyperbolas: List<XYValues> =  buildList{
    repeat(500){
        add(
            generateHyperbols(
                xValues, 
                random.nextDouble()*10, 
                random.nextDouble(), 
                random.nextDouble()
            )
        )
    }
}
In [ ]:
Plotly.plot { 
    scatter { 
        x.doubles = xValues
        y.doubles = parabolas[257].yValues
    }
    scatter { 
        x.doubles = xValues
        y.doubles = hyperbolas[252].yValues
    }
 }
In [ ]:
Plotly.plot { 
    histogram { 
        name = "parabolae"
        x.numbers = parabolas.map { statistic(it) }
    }
    histogram { 
        name = "hyperbolae"
        x.numbers = hyperbolas.map { statistic(it) }
    }
}
In [ ]:
val lossFunction: (XYStatistic) -> Double = { statistic ->
    - abs(parabolas.sumOf { statistic(it) } - hyperbolas.sumOf { statistic(it) })
}

Using commons-math optimizer to optimize weights

In [ ]:
import org.apache.commons.math3.optim.*
import org.apache.commons.math3.optim.nonlinear.scalar.*
import org.apache.commons.math3.optim.nonlinear.scalar.noderiv.*

val optimizer = SimplexOptimizer(1e-1, Double.MAX_VALUE)

val result = optimizer.optimize(
    ObjectiveFunction { point ->
        lossFunction(ConvolutionalXYStatistic(point))
    },
    NelderMeadSimplex(xValues.size),
    InitialGuess(DoubleArray(xValues.size){ 1.0 }),
    GoalType.MINIMIZE,
    MaxEval(100000)
)

Print resulting weights of optimization

In [ ]:
result.point
In [ ]:
Plotly.plot { 
    scatter { 
        y.doubles = result.point
     }
}

The resulting statistic distribution

In [ ]:
val resultStatistic = ConvolutionalXYStatistic(result.point)
Plotly.plot { 
    histogram { 
        name = "parabolae"
        x.numbers = parabolas.map { resultStatistic(it) }
    }
    histogram { 
        name = "hyperbolae"
        x.numbers = hyperbolas.map { resultStatistic(it) }
    }
}
In [ ]: