2019-02-20 13:05:39 +03:00
10 changed files with 256 additions and 71 deletions
--- a/benchmarks/build.gradle
+++ b/benchmarks/build.gradle
@ -8,4 +8,10 @@ dependencies {
    compile project(":kmath-core")
    compile project(":kmath-coroutines")
    //jmh project(':kmath-core')
-}
+}
 jmh{
    warmupIterations = 1
 }
 jmhClasses.dependsOn(compileKotlin)
--- a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/ArrayBenchmark.kt
+++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/ArrayBenchmark.kt
@ -1,49 +1,48 @@
 package scientifik.kmath.structures
-import org.openjdk.jmh.annotations.*
+import org.openjdk.jmh.annotations.Benchmark
 import org.openjdk.jmh.annotations.Scope
 import org.openjdk.jmh.annotations.State
 import java.nio.IntBuffer
@Warmup(iterations = 1)
@Measurement(iterations = 5)
@State(Scope.Benchmark)
 open class ArrayBenchmark {
    lateinit var array: IntArray
    lateinit var arrayBuffer: IntBuffer
    lateinit var nativeBuffer: IntBuffer
    @Setup
    fun setup() {
        array = IntArray(10000) { it }
        arrayBuffer = IntBuffer.wrap(array)
        nativeBuffer = IntBuffer.allocate(10000)
        for (i in 0 until 10000) {
            nativeBuffer.put(i, i)
        }
    }
    @Benchmark
    fun benchmarkArrayRead() {
        var res = 0
-        for (i in 1..10000) {
+        for (i in 1..size) {
-            res += array[10000 - i]
+            res += array[size - i]
        }
    }
    @Benchmark
    fun benchmarkBufferRead() {
        var res = 0
-        for (i in 1..10000) {
+        for (i in 1..size) {
-            res += arrayBuffer.get(10000 - i)
+            res += arrayBuffer.get(size - i)
        }
    }
    @Benchmark
    fun nativeBufferRead() {
        var res = 0
-        for (i in 1..10000) {
+        for (i in 1..size) {
-            res += nativeBuffer.get(10000 - i)
+            res += nativeBuffer.get(size - i)
        }
    }
    companion object {
        val size = 1000
        val array = IntArray(size) { it }
        val arrayBuffer = IntBuffer.wrap(array)
        val nativeBuffer = IntBuffer.allocate(size).also {
            for (i in 0 until size) {
                it.put(i, i)
            }
        }
    }
 }
--- a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/BufferBenchmark.kt
+++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/BufferBenchmark.kt
@ -1,10 +1,10 @@
 package scientifik.kmath.structures
-import org.openjdk.jmh.annotations.*
+import org.openjdk.jmh.annotations.Benchmark
 import org.openjdk.jmh.annotations.Scope
 import org.openjdk.jmh.annotations.State
 import scientifik.kmath.operations.Complex
@Warmup(iterations = 1)
@Measurement(iterations = 5)
@State(Scope.Benchmark)
 open class BufferBenchmark {
@ -22,7 +22,7 @@ open class BufferBenchmark {
    @Benchmark
    fun complexBufferReadWrite() {
-        val buffer = Complex.createBuffer(size / 2)
+        val buffer = MutableBuffer.complex(size / 2)
        (0 until size / 2).forEach {
            buffer[it] = Complex(it.toDouble(), -it.toDouble())
        }
@ -33,6 +33,6 @@ open class BufferBenchmark {
    }
    companion object {
-        const val size = 1000
+        const val size = 100
    }
 }
--- a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt
+++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt
@ -0,0 +1,69 @@
 package scientifik.kmath.structures
 import org.openjdk.jmh.annotations.Benchmark
 import scientifik.kmath.operations.RealField
 open class NDFieldBenchmark {
    @Benchmark
    fun autoFieldAdd() {
        bufferedField.run {
            var res: NDBuffer<Double> = one
            repeat(n) {
                res += one
            }
        }
    }
    @Benchmark
    fun autoElementAdd() {
        var res = bufferedField.run { one.toElement() }
        repeat(n) {
            res += 1.0
        }
    }
    @Benchmark
    fun specializedFieldAdd() {
        specializedField.run {
            var res: NDBuffer<Double> = one
            repeat(n) {
                res += 1.0
            }
        }
    }
    @Benchmark
    fun lazyFieldAdd() {
        lazyNDField.run {
            var res = one
            repeat(n) {
                res += one
            }
            res.elements().sumByDouble { it.second }
        }
    }
    @Benchmark
    fun boxingFieldAdd() {
        genericField.run {
            var res: NDBuffer<Double> = one
            repeat(n) {
                res += one
            }
        }
    }
    companion object {
        val dim = 1000
        val n = 100
        val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
        val specializedField = NDField.real(intArrayOf(dim, dim))
        val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
        val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
    }
 }
--- a/benchmarks/src/main/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt
+++ b/benchmarks/src/main/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt
@ -7,34 +7,29 @@ fun main(args: Array<String>) {
    val dim = 1000
    val n = 1000
-    val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
+    // automatically build context most suited for given type.
    val autoField = NDField.auto(intArrayOf(dim, dim), RealField)
    // specialized nd-field for Double. It works as generic Double field as well
    val specializedField = NDField.real(intArrayOf(dim, dim))
    //A field implementing lazy computations. All elements are computed on-demand
    val lazyField = NDField.lazy(intArrayOf(dim, dim), RealField)
    //A generic boxing field. It should be used for objects, not primitives.
    val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
    val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
 //    val action: NDField<Double, DoubleField, NDStructure<Double>>.() -> Unit = {
 //        var res = one
 //        repeat(n) {
 //            res += 1.0
 //        }
 //    }
-    val doubleTime = measureTimeMillis {
+    val autoTime = measureTimeMillis {
-
+        autoField.run {
-        bufferedField.run {
+            var res = one
            var res: NDBuffer<Double> = one
            repeat(n) {
-                res += one
+                res += 1.0
            }
        }
    }
-    println("Buffered addition completed in $doubleTime millis")
+    println("Buffered addition completed in $autoTime millis")
    val elementTime = measureTimeMillis {
-        var res = bufferedField.run{one.toElement()}
+        var res = genericField.one
        repeat(n) {
            res += 1.0
        }
@ -43,9 +38,8 @@ fun main(args: Array<String>) {
    println("Element addition completed in $elementTime millis")
    val specializedTime = measureTimeMillis {
        //specializedField.run(action)
        specializedField.run {
-            var res: NDBuffer<Double> = one
+            var res = one
            repeat(n) {
                res += 1.0
            }
@ -56,17 +50,16 @@ fun main(args: Array<String>) {
    val lazyTime = measureTimeMillis {
-        val tr : RealField.(Double)->Double = {arg->
+        lazyField.run {
-            var r = arg
+            val res = one.map {
-            repeat(n) {
+                var c = 0.0
-                r += 1.0
+                repeat(n) {
                    c += 1.0
                }
                c
            }
            r
        }
        lazyNDField.run {
            val res = one.map(tr)
-            res.elements().sumByDouble { it.second }
+            res.elements().forEach { it.second }
        }
    }
@ -77,10 +70,11 @@ fun main(args: Array<String>) {
        genericField.run {
            var res: NDBuffer<Double> = one
            repeat(n) {
-                res += one
+                res += 1.0
            }
        }
    }
    println("Generic addition completed in $genericTime millis")
 }
--- a/build.gradle.kts
+++ b/build.gradle.kts
@ -28,7 +28,7 @@ allprojects {
    apply(plugin = "com.jfrog.artifactory")
    group = "scientifik"
-    version = "0.0.3-dev-1"
+    version = "0.0.3-dev-2"
    repositories {
        maven("https://dl.bintray.com/kotlin/kotlin-eap")
--- a/doc/nd-performance.md
+++ b/doc/nd-performance.md
@ -5,9 +5,123 @@ structures. In `kmath` performance depends on which particular context was used
 Let us consider following contexts:
 ```kotlin
-    // automatically build context
+    // specialized nd-field for Double. It works as generic Double field as well
    val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
    val specializedField = NDField.real(intArrayOf(dim, dim))
    // automatically build context most suited for given type.
    val autoField = NDField.auto(intArrayOf(dim, dim), RealField)
    //A field implementing lazy computations. All elements are computed on-demand
    val lazyField = NDField.lazy(intArrayOf(dim, dim), RealField)
    //A generic boxing field. It should be used for objects, not primitives.
    val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
-    val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
+```
-```
+Now let us perform several tests and see which implementation is best suited for each case:
 ## Test case
 In order to test performance we will take 2d-structures with `dim = 1000` and add a structure filled with `1.0`
 to it `n = 1000` times.
 ## Specialized
 The code to run this looks like:
 ```kotlin
    specializedField.run {
        var res = one
        repeat(n) {
            res += 1.0
        }
    }
 ```
 The performance of this code is the best of all tests since it inlines all operations and is specialized for operation
 with doubles. We will measure everything else relative to this one, so time for this test will be `1x` (real time
 on my computer is about 4.5 seconds). The only problem with this approach is that it requires to specify type
 from the beginning. Everyone do so anyway, so it is the recommended approach.
 ## Automatic
 Let's do the same with automatic field inference:
 ```kotlin
    autoField.run {
        var res = one
        repeat(n) {
            res += 1.0
        }
    }
 ```
 Ths speed of this operation is approximately the same as for specialized case since `NDField.auto` just
 returns the same `RealNDField` in this case. Of course it is usually better to use specialized method to be sure.
 ## Lazy
 Lazy field does not produce a structure when asked, instead it generates an empty structure and fills it on-demand
 using coroutines to parallelize computations.
 When one calls
 ```kotlin
    lazyField.run {
        var res = one
        repeat(n) {
            res += 1.0
        }
    }
 ```
 The result will be calculated almost immediately but the result will be empty. In order to get the full result
 structure one needs to call all its elements. In this case computation overhead will be huge. So this field never
 should be used if one expects to use the full result structure. Though if one wants only small fraction, it could
 save a lot of time.
 This field still could be used with reasonable performance if call code is changed:
 ```kotlin
    lazyField.run {
        val res = one.map {
            var c = 0.0
            repeat(n) {
                c += 1.0
            }
            c
        }
        res.elements().forEach { it.second }
    }
 ```
 In this case it completes in about `4x-5x` time due to boxing.
 ## Boxing
 The boxing field produced by
 ```kotlin
    genericField.run {
        var res = one
        repeat(n) {
            res += 1.0
        }
    }
 ```
 obviously is the slowest one, because it requires to box and unbox the `double` on each operation. It takes about
 `15x` time (**TODO: there seems to be a problem here, it should be slow, but not that slow**). This field should
 never be used for primitives.
 ## Element operation
 Let us also check the speed for direct operations on elements:
 ```kotlin
    var res = genericField.one
    repeat(n) {
        res += 1.0
    }
 ```
 One would expect to be at least as slow as field operation, but in fact, this one takes only `2x` time to complete.
 It happens, because in this particular case it does not use actual `NDField` but instead calculated directly
 via extension function.
 ## What about python?
 Usually it is bad idea to compare the direct numerical operation performance in different languages, but it hard to
 work completely without frame of reference. In this case, simple numpy code:
 ```python
 res = np.ones((1000,1000))
 for i in range(1000):
    res = res + 1.0
 ```
 gives the completion time of about `1.1x`, which means that specialized kotlin code in fact is working faster (I think it is
 because better memory management). Of course if one writes `res += 1.0`, the performance will be different,
 but it would be differenc case, because numpy overrides `+=` with in-place operations. In-place operations are
 available in `kmath` with `MutableNDStructure` but there is no field for it (one can still work with mapping
 functions).
--- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/operations/Algebra.kt
+++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/operations/Algebra.kt
@ -69,11 +69,11 @@ interface Ring<T> : Space<T> {
    operator fun T.times(b: T): T = multiply(this, b)
-    operator fun T.plus(b: Number) = this.plus(b * one)
+//    operator fun T.plus(b: Number) = this.plus(b * one)
-    operator fun Number.plus(b: T) = b + this
+//    operator fun Number.plus(b: T) = b + this
-
+//
-    operator fun T.minus(b: Number) = this.minus(b * one)
+//    operator fun T.minus(b: Number) = this.minus(b * one)
-    operator fun Number.minus(b: T) = -b + this
+//    operator fun Number.minus(b: T) = -b + this
 }
 abstract class AbstractRing<T : Any> : AbstractSpace<T>(), Ring<T> {
--- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/RealNDField.kt
+++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/RealNDField.kt
@ -79,7 +79,7 @@ class RealNDField(shape: IntArray) :
 * Fast element production using function inlining
 */
 inline fun StridedNDField<Double, RealField>.produceInline(crossinline initializer: RealField.(Int) -> Double): RealNDElement {
-    val array = DoubleArray(strides.linearSize) { offset -> elementField.initializer(offset) }
+    val array = DoubleArray(strides.linearSize) { offset -> RealField.initializer(offset) }
    return StridedNDElement(this, DoubleBuffer(array))
 }
@ -102,4 +102,4 @@ operator fun RealNDElement.plus(arg: Double) =
 * Subtraction operation between [StridedNDElement] and single element
 */
 operator fun RealNDElement.minus(arg: Double) =
-    context.produceInline { i -> buffer[i] - arg }
+    context.produceInline { i -> buffer[i] - arg }
--- a/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/structures/LazyNDField.kt
+++ b/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/structures/LazyNDField.kt
@ -21,11 +21,13 @@ class LazyNDField<T, F : Field<T>>(shape: IntArray, field: F, val scope: Corouti
        check(arg)
        return if (arg is LazyNDStructure<T, *>) {
            LazyNDStructure(this) { index ->
-                this.elementField.transform(index, arg.function(index))
+                //FIXME if value of arg is already calculated, it should be used
                elementField.transform(index, arg.function(index))
            }
        } else {
            LazyNDStructure(this) { elementField.transform(it, arg.await(it)) }
        }
 //        return LazyNDStructure(this) { elementField.transform(it, arg.await(it)) }
    }
    override fun map(arg: NDStructure<T>, transform: F.(T) -> T) =
@ -43,6 +45,7 @@ class LazyNDField<T, F : Field<T>>(shape: IntArray, field: F, val scope: Corouti
        } else {
            LazyNDStructure(this@LazyNDField) { elementField.transform(a.await(it), b.await(it)) }
        }
 //        return LazyNDStructure(this) { elementField.transform(a.await(it), b.await(it)) }
    }
    fun NDStructure<T>.lazy(): LazyNDStructure<T, F> {