2019-02-20 13:05:39 +03:00
10 changed files with 256 additions and 71 deletions
--- a/benchmarks/build.gradle
+++ b/benchmarks/build.gradle
@ -8,4 +8,10 @@ dependencies {
    compile project(":kmath-core")
    compile project(":kmath-coroutines")
    //jmh project(':kmath-core')
-}
+}
+
+jmh{
+    warmupIterations = 1
+}
+
+jmhClasses.dependsOn(compileKotlin)
--- a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/ArrayBenchmark.kt
+++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/ArrayBenchmark.kt
@ -1,49 +1,48 @@
 package scientifik.kmath.structures

-import org.openjdk.jmh.annotations.*
+import org.openjdk.jmh.annotations.Benchmark
+import org.openjdk.jmh.annotations.Scope
+import org.openjdk.jmh.annotations.State
 import java.nio.IntBuffer


-@Warmup(iterations = 1)
-@Measurement(iterations = 5)
@State(Scope.Benchmark)
 open class ArrayBenchmark {

-    lateinit var array: IntArray
-    lateinit var arrayBuffer: IntBuffer
-    lateinit var nativeBuffer: IntBuffer
-
-    @Setup
-    fun setup() {
-        array = IntArray(10000) { it }
-        arrayBuffer = IntBuffer.wrap(array)
-        nativeBuffer = IntBuffer.allocate(10000)
-        for (i in 0 until 10000) {
-            nativeBuffer.put(i, i)
-        }
-    }
-
    @Benchmark
    fun benchmarkArrayRead() {
        var res = 0
-        for (i in 1..10000) {
-            res += array[10000 - i]
+        for (i in 1..size) {
+            res += array[size - i]
        }
    }

    @Benchmark
    fun benchmarkBufferRead() {
        var res = 0
-        for (i in 1..10000) {
-            res += arrayBuffer.get(10000 - i)
+        for (i in 1..size) {
+            res += arrayBuffer.get(size - i)
        }
    }

    @Benchmark
    fun nativeBufferRead() {
        var res = 0
-        for (i in 1..10000) {
-            res += nativeBuffer.get(10000 - i)
+        for (i in 1..size) {
+            res += nativeBuffer.get(size - i)
+        }
+    }
+    
+    companion object {
+        val size = 1000
+        
+        val array = IntArray(size) { it }
+        val arrayBuffer = IntBuffer.wrap(array)
+        val nativeBuffer = IntBuffer.allocate(size).also {
+            for (i in 0 until size) {
+                it.put(i, i)
+            }
+
        }
    }
 }
--- a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/BufferBenchmark.kt
+++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/BufferBenchmark.kt
@ -1,10 +1,10 @@
 package scientifik.kmath.structures

-import org.openjdk.jmh.annotations.*
+import org.openjdk.jmh.annotations.Benchmark
+import org.openjdk.jmh.annotations.Scope
+import org.openjdk.jmh.annotations.State
 import scientifik.kmath.operations.Complex

-@Warmup(iterations = 1)
-@Measurement(iterations = 5)
@State(Scope.Benchmark)
 open class BufferBenchmark {

@ -22,7 +22,7 @@ open class BufferBenchmark {

    @Benchmark
    fun complexBufferReadWrite() {
-        val buffer = Complex.createBuffer(size / 2)
+        val buffer = MutableBuffer.complex(size / 2)
        (0 until size / 2).forEach {
            buffer[it] = Complex(it.toDouble(), -it.toDouble())
        }
@ -33,6 +33,6 @@ open class BufferBenchmark {
    }

    companion object {
-        const val size = 1000
+        const val size = 100
    }
 }
--- a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt
+++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt
@ -0,0 +1,69 @@
+package scientifik.kmath.structures
+
+import org.openjdk.jmh.annotations.Benchmark
+import scientifik.kmath.operations.RealField
+
+open class NDFieldBenchmark {
+
+    @Benchmark
+    fun autoFieldAdd() {
+        bufferedField.run {
+            var res: NDBuffer<Double> = one
+            repeat(n) {
+                res += one
+            }
+        }
+    }
+
+    @Benchmark
+    fun autoElementAdd() {
+        var res = bufferedField.run { one.toElement() }
+        repeat(n) {
+            res += 1.0
+        }
+    }
+
+    @Benchmark
+    fun specializedFieldAdd() {
+        specializedField.run {
+            var res: NDBuffer<Double> = one
+            repeat(n) {
+                res += 1.0
+            }
+        }
+    }
+
+
+    @Benchmark
+    fun lazyFieldAdd() {
+        lazyNDField.run {
+            var res = one
+            repeat(n) {
+                res += one
+            }
+
+            res.elements().sumByDouble { it.second }
+        }
+    }
+
+
+    @Benchmark
+    fun boxingFieldAdd() {
+        genericField.run {
+            var res: NDBuffer<Double> = one
+            repeat(n) {
+                res += one
+            }
+        }
+    }
+
+    companion object {
+        val dim = 1000
+        val n = 100
+
+        val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
+        val specializedField = NDField.real(intArrayOf(dim, dim))
+        val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
+        val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
+    }
+}
--- a/benchmarks/src/main/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt
+++ b/benchmarks/src/main/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt
@ -7,34 +7,29 @@ fun main(args: Array<String>) {
    val dim = 1000
    val n = 1000

-    val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
+    // automatically build context most suited for given type.
+    val autoField = NDField.auto(intArrayOf(dim, dim), RealField)
+    // specialized nd-field for Double. It works as generic Double field as well
    val specializedField = NDField.real(intArrayOf(dim, dim))
+    //A field implementing lazy computations. All elements are computed on-demand
+    val lazyField = NDField.lazy(intArrayOf(dim, dim), RealField)
+    //A generic boxing field. It should be used for objects, not primitives.
    val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
-    val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
-
-//    val action: NDField<Double, DoubleField, NDStructure<Double>>.() -> Unit = {
-//        var res = one
-//        repeat(n) {
-//            res += 1.0
-//        }
-//    }


-    val doubleTime = measureTimeMillis {
-
-        bufferedField.run {
-            var res: NDBuffer<Double> = one
+    val autoTime = measureTimeMillis {
+        autoField.run {
+            var res = one
            repeat(n) {
-                res += one
+                res += 1.0
            }
        }
    }

-    println("Buffered addition completed in $doubleTime millis")
-
+    println("Buffered addition completed in $autoTime millis")

    val elementTime = measureTimeMillis {
-        var res = bufferedField.run{one.toElement()}
+        var res = genericField.one
        repeat(n) {
            res += 1.0
        }
@ -43,9 +38,8 @@ fun main(args: Array<String>) {
    println("Element addition completed in $elementTime millis")

    val specializedTime = measureTimeMillis {
-        //specializedField.run(action)
        specializedField.run {
-            var res: NDBuffer<Double> = one
+            var res = one
            repeat(n) {
                res += 1.0
            }
@ -56,17 +50,16 @@ fun main(args: Array<String>) {


    val lazyTime = measureTimeMillis {
-        val tr : RealField.(Double)->Double = {arg->
-            var r = arg
-            repeat(n) {
-                r += 1.0
+        lazyField.run {
+            val res = one.map {
+                var c = 0.0
+                repeat(n) {
+                    c += 1.0
+                }
+                c
            }
-            r
-        }
-        lazyNDField.run {
-            val res = one.map(tr)

-            res.elements().sumByDouble { it.second }
+            res.elements().forEach { it.second }
        }
    }

@ -77,10 +70,11 @@ fun main(args: Array<String>) {
        genericField.run {
            var res: NDBuffer<Double> = one
            repeat(n) {
-                res += one
+                res += 1.0
            }
        }
    }

    println("Generic addition completed in $genericTime millis")
+
 }
--- a/build.gradle.kts
+++ b/build.gradle.kts
@ -28,7 +28,7 @@ allprojects {
    apply(plugin = "com.jfrog.artifactory")

    group = "scientifik"
-    version = "0.0.3-dev-1"
+    version = "0.0.3-dev-2"

    repositories {
        maven("https://dl.bintray.com/kotlin/kotlin-eap")
--- a/doc/nd-performance.md
+++ b/doc/nd-performance.md
@ -5,9 +5,123 @@ structures. In `kmath` performance depends on which particular context was used

 Let us consider following contexts:
 ```kotlin
-    // automatically build context
-    val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
+    // specialized nd-field for Double. It works as generic Double field as well
    val specializedField = NDField.real(intArrayOf(dim, dim))
+
+    // automatically build context most suited for given type.
+    val autoField = NDField.auto(intArrayOf(dim, dim), RealField)
+
+    //A field implementing lazy computations. All elements are computed on-demand
+    val lazyField = NDField.lazy(intArrayOf(dim, dim), RealField)
+
+    //A generic boxing field. It should be used for objects, not primitives.
    val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
-    val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
-```
+```
+Now let us perform several tests and see which implementation is best suited for each case:
+
+## Test case
+
+In order to test performance we will take 2d-structures with `dim = 1000` and add a structure filled with `1.0`
+to it `n = 1000` times.
+
+## Specialized
+The code to run this looks like:
+```kotlin
+    specializedField.run {
+        var res = one
+        repeat(n) {
+            res += 1.0
+        }
+    }
+```
+The performance of this code is the best of all tests since it inlines all operations and is specialized for operation
+with doubles. We will measure everything else relative to this one, so time for this test will be `1x` (real time
+on my computer is about 4.5 seconds). The only problem with this approach is that it requires to specify type
+from the beginning. Everyone do so anyway, so it is the recommended approach.
+
+## Automatic
+Let's do the same with automatic field inference:
+```kotlin
+    autoField.run {
+        var res = one
+        repeat(n) {
+            res += 1.0
+        }
+    }
+```
+Ths speed of this operation is approximately the same as for specialized case since `NDField.auto` just
+returns the same `RealNDField` in this case. Of course it is usually better to use specialized method to be sure.
+
+## Lazy
+Lazy field does not produce a structure when asked, instead it generates an empty structure and fills it on-demand
+using coroutines to parallelize computations.
+When one calls
+```kotlin
+    lazyField.run {
+        var res = one
+        repeat(n) {
+            res += 1.0
+        }
+    }
+```
+The result will be calculated almost immediately but the result will be empty. In order to get the full result
+structure one needs to call all its elements. In this case computation overhead will be huge. So this field never
+should be used if one expects to use the full result structure. Though if one wants only small fraction, it could
+save a lot of time.
+
+This field still could be used with reasonable performance if call code is changed:
+```kotlin
+    lazyField.run {
+        val res = one.map {
+            var c = 0.0
+            repeat(n) {
+                c += 1.0
+            }
+            c
+        }
+
+        res.elements().forEach { it.second }
+    }
+```
+In this case it completes in about `4x-5x` time due to boxing.
+
+## Boxing
+The boxing field produced by
+```kotlin
+    genericField.run {
+        var res = one
+        repeat(n) {
+            res += 1.0
+        }
+    }
+```
+obviously is the slowest one, because it requires to box and unbox the `double` on each operation. It takes about
+`15x` time (**TODO: there seems to be a problem here, it should be slow, but not that slow**). This field should
+never be used for primitives.
+
+## Element operation
+Let us also check the speed for direct operations on elements:
+```kotlin
+    var res = genericField.one
+    repeat(n) {
+        res += 1.0
+    }
+```
+One would expect to be at least as slow as field operation, but in fact, this one takes only `2x` time to complete.
+It happens, because in this particular case it does not use actual `NDField` but instead calculated directly
+via extension function.
+
+## What about python?
+
+Usually it is bad idea to compare the direct numerical operation performance in different languages, but it hard to
+work completely without frame of reference. In this case, simple numpy code:
+```python
+res = np.ones((1000,1000))
+for i in range(1000):
+    res = res + 1.0
+```
+gives the completion time of about `1.1x`, which means that specialized kotlin code in fact is working faster (I think it is
+because better memory management). Of course if one writes `res += 1.0`, the performance will be different,
+but it would be differenc case, because numpy overrides `+=` with in-place operations. In-place operations are
+available in `kmath` with `MutableNDStructure` but there is no field for it (one can still work with mapping
+functions).
--- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/operations/Algebra.kt
+++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/operations/Algebra.kt
@ -69,11 +69,11 @@ interface Ring<T> : Space<T> {

    operator fun T.times(b: T): T = multiply(this, b)

-    operator fun T.plus(b: Number) = this.plus(b * one)
-    operator fun Number.plus(b: T) = b + this
-
-    operator fun T.minus(b: Number) = this.minus(b * one)
-    operator fun Number.minus(b: T) = -b + this
+//    operator fun T.plus(b: Number) = this.plus(b * one)
+//    operator fun Number.plus(b: T) = b + this
+//
+//    operator fun T.minus(b: Number) = this.minus(b * one)
+//    operator fun Number.minus(b: T) = -b + this
 }

 abstract class AbstractRing<T : Any> : AbstractSpace<T>(), Ring<T> {
--- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/RealNDField.kt
+++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/RealNDField.kt
@ -79,7 +79,7 @@ class RealNDField(shape: IntArray) :
 * Fast element production using function inlining
 */
 inline fun StridedNDField<Double, RealField>.produceInline(crossinline initializer: RealField.(Int) -> Double): RealNDElement {
-    val array = DoubleArray(strides.linearSize) { offset -> elementField.initializer(offset) }
+    val array = DoubleArray(strides.linearSize) { offset -> RealField.initializer(offset) }
    return StridedNDElement(this, DoubleBuffer(array))
 }

@ -102,4 +102,4 @@ operator fun RealNDElement.plus(arg: Double) =
 * Subtraction operation between [StridedNDElement] and single element
 */
 operator fun RealNDElement.minus(arg: Double) =
-    context.produceInline { i -> buffer[i] - arg }
+    context.produceInline { i -> buffer[i] - arg }
--- a/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/structures/LazyNDField.kt
+++ b/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/structures/LazyNDField.kt
@ -21,11 +21,13 @@ class LazyNDField<T, F : Field<T>>(shape: IntArray, field: F, val scope: Corouti
        check(arg)
        return if (arg is LazyNDStructure<T, *>) {
            LazyNDStructure(this) { index ->
-                this.elementField.transform(index, arg.function(index))
+                //FIXME if value of arg is already calculated, it should be used
+                elementField.transform(index, arg.function(index))
            }
        } else {
            LazyNDStructure(this) { elementField.transform(it, arg.await(it)) }
        }
+//        return LazyNDStructure(this) { elementField.transform(it, arg.await(it)) }
    }

    override fun map(arg: NDStructure<T>, transform: F.(T) -> T) =
@ -43,6 +45,7 @@ class LazyNDField<T, F : Field<T>>(shape: IntArray, field: F, val scope: Corouti
        } else {
            LazyNDStructure(this@LazyNDField) { elementField.transform(a.await(it), b.await(it)) }
        }
+//        return LazyNDStructure(this) { elementField.transform(a.await(it), b.await(it)) }
    }

    fun NDStructure<T>.lazy(): LazyNDStructure<T, F> {