stat-methods/Теория проверки гипотез/Naive classifier.ipynb

3.6 MiB

In [1]:
%use kmath
%use plotly(0.5.0)
@file:DependsOn("org.apache.commons:commons-math3:3.6.1")
In [2]:
//Plotly.jupyter.notebook()
In [3]:
class XYValues(val xValues: DoubleArray, val yValues: DoubleArray) {
    init {
        require(xValues.size == yValues.size)
    }
}

fun interface XYStatistic {
    operator fun invoke(values: XYValues): Double
}

fun generateParabola(xValues: DoubleArray, a: Double, b: Double, c: Double): XYValues {
    val yValues = xValues.map { x -> a * x * x + b * x + c }.toDoubleArray()
    return XYValues(xValues, yValues)
}

fun generateHyperbole(xValues: DoubleArray, gamma: Double, x0: Double, y0: Double): XYValues {
    val yValues = xValues.map { x -> y0 + gamma / (x - x0) }.toDoubleArray()
    return XYValues(xValues, yValues)
}

class ConvolutionalXYStatistic(val weights: DoubleArray) : XYStatistic {
    override fun invoke(values: XYValues): Double {
        require(weights.size == values.yValues.size)
        val norm = values.yValues.sum()
        return values.yValues.zip(weights) { value, weight -> value * weight }.sum()/norm
    }
}
In [4]:
val xValues = (1.0..10.0).step(1.0).toDoubleArray()

val xy = generateHyperbole(xValues, 1.0, 0.0, 0.0)

Plotly.plot {
    scatter {
        this.x.doubles = xValues
        this.y.doubles = xy.yValues
    }
}
Out[4]:
In [5]:
val statistic = ConvolutionalXYStatistic(DoubleArray(xValues.size){if(it == 5) 1.0 else 0.0})
statistic(xy)
Out[5]:
0.05690285869123425
In [6]:
import kotlin.random.Random

val random = Random(1288)

val parabolae = buildList{
    repeat(500){
        add(
            generateParabola(
                xValues, 
                random.nextDouble(), 
                random.nextDouble(), 
                random.nextDouble()
            )
        )
    }
}

val hyperbolae: List<XYValues> =  buildList{
    repeat(500){
        add(
            generateHyperbole(
                xValues, 
                random.nextDouble()*10, 
                random.nextDouble(), 
                random.nextDouble()
            )
        )
    }
}
In [7]:
Plotly.plot { 
    scatter { 
        x.doubles = xValues
        y.doubles = parabolae[257].yValues
    }
    scatter { 
        x.doubles = xValues
        y.doubles = hyperbolae[252].yValues
    }
 }
Out[7]:
In [8]:
Plotly.plot { 
    histogram { 
        name = "parabolae"
        x.numbers = parabolae.map { statistic(it) }
    }
    histogram { 
        name = "hyperbolae"
        x.numbers = hyperbolae.map { statistic(it) }
    }
}
Out[8]:
In [9]:
val lossFunction: (XYStatistic) -> Double = { statistic ->
    - abs(parabolae.sumOf { statistic(it) } - hyperbolae.sumOf { statistic(it) })
}
In [10]:
import org.apache.commons.math3.optimization.*
import org.apache.commons.math3.analysis.*
import org.apache.commons.math3.optimization.direct.*

val cmFunction = object: MultivariateFunction{
    override fun value(point: DoubleArray) = lossFunction(ConvolutionalXYStatistic(point))
}
In [11]:
val optimizer = BOBYQAOptimizer(20)

val result = optimizer.optimize(1000, cmFunction, GoalType.MINIMIZE, xValues)
In [12]:
result.point
Out[12]:
[-1.357757502856414E8, -2.0157729926862407E7, -1.003627060413469E7, -3516890.1555952034, 2318972.567610743, 8257264.5552848, 1.4601023119437138E7, 2.149227167045051E7, 2.9007620324898317E7, -1.740750223163073E7]
In [13]:
Plotly.plot { 
    scatter { 
        y.doubles = result.point
     }
}
Out[13]:
In [14]:
val resultStatistic = ConvolutionalXYStatistic(result.point)
Plotly.plot { 
    histogram { 
        name = "parabolae"
        x.numbers = parabolae.map { resultStatistic(it) }
    }
    histogram { 
        name = "hyperbolae"
        x.numbers = hyperbolae.map { resultStatistic(it) }
    }
}
Out[14]:
In [ ]: