From c161ef0b57f9b5354ebda34ccb563e69f36aea0d Mon Sep 17 00:00:00 2001 From: Alexander Nozik Date: Mon, 7 Jan 2019 17:18:31 +0300 Subject: [PATCH] Documentation for nd-performance --- benchmarks/build.gradle | 8 +- .../kmath/structures/ArrayBenchmark.kt | 45 ++++--- .../kmath/structures/BufferBenchmark.kt | 10 +- .../kmath/structures/NDFieldBenchmark.kt | 69 ++++++++++ .../kmath/structures/NDFieldBenchmark.kt | 52 ++++---- build.gradle.kts | 2 +- doc/nd-performance.md | 122 +++++++++++++++++- .../scientifik/kmath/operations/Algebra.kt | 10 +- .../kmath/structures/RealNDField.kt | 4 +- .../kmath/structures/LazyNDField.kt | 5 +- 10 files changed, 256 insertions(+), 71 deletions(-) create mode 100644 benchmarks/src/jmh/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle index 23f967102..a9450a974 100644 --- a/benchmarks/build.gradle +++ b/benchmarks/build.gradle @@ -8,4 +8,10 @@ dependencies { compile project(":kmath-core") compile project(":kmath-coroutines") //jmh project(':kmath-core') -} \ No newline at end of file +} + +jmh{ + warmupIterations = 1 +} + +jmhClasses.dependsOn(compileKotlin) \ No newline at end of file diff --git a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/ArrayBenchmark.kt b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/ArrayBenchmark.kt index 098c61cf6..393d7b06a 100644 --- a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/ArrayBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/ArrayBenchmark.kt @@ -1,49 +1,48 @@ package scientifik.kmath.structures -import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State import java.nio.IntBuffer -@Warmup(iterations = 1) -@Measurement(iterations = 5) @State(Scope.Benchmark) open class ArrayBenchmark { - lateinit var array: IntArray - lateinit var arrayBuffer: IntBuffer - lateinit var nativeBuffer: IntBuffer - - @Setup - fun setup() { - array = IntArray(10000) { it } - arrayBuffer = IntBuffer.wrap(array) - nativeBuffer = IntBuffer.allocate(10000) - for (i in 0 until 10000) { - nativeBuffer.put(i, i) - } - } - @Benchmark fun benchmarkArrayRead() { var res = 0 - for (i in 1..10000) { - res += array[10000 - i] + for (i in 1..size) { + res += array[size - i] } } @Benchmark fun benchmarkBufferRead() { var res = 0 - for (i in 1..10000) { - res += arrayBuffer.get(10000 - i) + for (i in 1..size) { + res += arrayBuffer.get(size - i) } } @Benchmark fun nativeBufferRead() { var res = 0 - for (i in 1..10000) { - res += nativeBuffer.get(10000 - i) + for (i in 1..size) { + res += nativeBuffer.get(size - i) + } + } + + companion object { + val size = 1000 + + val array = IntArray(size) { it } + val arrayBuffer = IntBuffer.wrap(array) + val nativeBuffer = IntBuffer.allocate(size).also { + for (i in 0 until size) { + it.put(i, i) + } + } } } \ No newline at end of file diff --git a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/BufferBenchmark.kt b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/BufferBenchmark.kt index 90f9bc372..5ca05d451 100644 --- a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/BufferBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/BufferBenchmark.kt @@ -1,10 +1,10 @@ package scientifik.kmath.structures -import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State import scientifik.kmath.operations.Complex -@Warmup(iterations = 1) -@Measurement(iterations = 5) @State(Scope.Benchmark) open class BufferBenchmark { @@ -22,7 +22,7 @@ open class BufferBenchmark { @Benchmark fun complexBufferReadWrite() { - val buffer = Complex.createBuffer(size / 2) + val buffer = MutableBuffer.complex(size / 2) (0 until size / 2).forEach { buffer[it] = Complex(it.toDouble(), -it.toDouble()) } @@ -33,6 +33,6 @@ open class BufferBenchmark { } companion object { - const val size = 1000 + const val size = 100 } } \ No newline at end of file diff --git a/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt new file mode 100644 index 000000000..421b5fb6c --- /dev/null +++ b/benchmarks/src/jmh/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt @@ -0,0 +1,69 @@ +package scientifik.kmath.structures + +import org.openjdk.jmh.annotations.Benchmark +import scientifik.kmath.operations.RealField + +open class NDFieldBenchmark { + + @Benchmark + fun autoFieldAdd() { + bufferedField.run { + var res: NDBuffer = one + repeat(n) { + res += one + } + } + } + + @Benchmark + fun autoElementAdd() { + var res = bufferedField.run { one.toElement() } + repeat(n) { + res += 1.0 + } + } + + @Benchmark + fun specializedFieldAdd() { + specializedField.run { + var res: NDBuffer = one + repeat(n) { + res += 1.0 + } + } + } + + + @Benchmark + fun lazyFieldAdd() { + lazyNDField.run { + var res = one + repeat(n) { + res += one + } + + res.elements().sumByDouble { it.second } + } + } + + + @Benchmark + fun boxingFieldAdd() { + genericField.run { + var res: NDBuffer = one + repeat(n) { + res += one + } + } + } + + companion object { + val dim = 1000 + val n = 100 + + val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField) + val specializedField = NDField.real(intArrayOf(dim, dim)) + val genericField = NDField.buffered(intArrayOf(dim, dim), RealField) + val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField) + } +} \ No newline at end of file diff --git a/benchmarks/src/main/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt b/benchmarks/src/main/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt index d1101d503..bc5db1e2f 100644 --- a/benchmarks/src/main/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt +++ b/benchmarks/src/main/kotlin/scientifik/kmath/structures/NDFieldBenchmark.kt @@ -7,34 +7,29 @@ fun main(args: Array) { val dim = 1000 val n = 1000 - val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField) + // automatically build context most suited for given type. + val autoField = NDField.auto(intArrayOf(dim, dim), RealField) + // specialized nd-field for Double. It works as generic Double field as well val specializedField = NDField.real(intArrayOf(dim, dim)) + //A field implementing lazy computations. All elements are computed on-demand + val lazyField = NDField.lazy(intArrayOf(dim, dim), RealField) + //A generic boxing field. It should be used for objects, not primitives. val genericField = NDField.buffered(intArrayOf(dim, dim), RealField) - val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField) - -// val action: NDField>.() -> Unit = { -// var res = one -// repeat(n) { -// res += 1.0 -// } -// } - val doubleTime = measureTimeMillis { - - bufferedField.run { - var res: NDBuffer = one + val autoTime = measureTimeMillis { + autoField.run { + var res = one repeat(n) { - res += one + res += 1.0 } } } - println("Buffered addition completed in $doubleTime millis") - + println("Buffered addition completed in $autoTime millis") val elementTime = measureTimeMillis { - var res = bufferedField.run{one.toElement()} + var res = genericField.one repeat(n) { res += 1.0 } @@ -43,9 +38,8 @@ fun main(args: Array) { println("Element addition completed in $elementTime millis") val specializedTime = measureTimeMillis { - //specializedField.run(action) specializedField.run { - var res: NDBuffer = one + var res = one repeat(n) { res += 1.0 } @@ -56,17 +50,16 @@ fun main(args: Array) { val lazyTime = measureTimeMillis { - val tr : RealField.(Double)->Double = {arg-> - var r = arg - repeat(n) { - r += 1.0 + lazyField.run { + val res = one.map { + var c = 0.0 + repeat(n) { + c += 1.0 + } + c } - r - } - lazyNDField.run { - val res = one.map(tr) - res.elements().sumByDouble { it.second } + res.elements().forEach { it.second } } } @@ -77,10 +70,11 @@ fun main(args: Array) { genericField.run { var res: NDBuffer = one repeat(n) { - res += one + res += 1.0 } } } println("Generic addition completed in $genericTime millis") + } \ No newline at end of file diff --git a/build.gradle.kts b/build.gradle.kts index 5fa89bc1a..37e61237e 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -28,7 +28,7 @@ allprojects { apply(plugin = "com.jfrog.artifactory") group = "scientifik" - version = "0.0.3-dev-1" + version = "0.0.3-dev-2" repositories { maven("https://dl.bintray.com/kotlin/kotlin-eap") diff --git a/doc/nd-performance.md b/doc/nd-performance.md index 8ddfc355b..47653e3e8 100644 --- a/doc/nd-performance.md +++ b/doc/nd-performance.md @@ -5,9 +5,123 @@ structures. In `kmath` performance depends on which particular context was used Let us consider following contexts: ```kotlin - // automatically build context - val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField) + // specialized nd-field for Double. It works as generic Double field as well val specializedField = NDField.real(intArrayOf(dim, dim)) + + // automatically build context most suited for given type. + val autoField = NDField.auto(intArrayOf(dim, dim), RealField) + + //A field implementing lazy computations. All elements are computed on-demand + val lazyField = NDField.lazy(intArrayOf(dim, dim), RealField) + + //A generic boxing field. It should be used for objects, not primitives. val genericField = NDField.buffered(intArrayOf(dim, dim), RealField) - val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField) -``` \ No newline at end of file +``` +Now let us perform several tests and see which implementation is best suited for each case: + +## Test case + +In order to test performance we will take 2d-structures with `dim = 1000` and add a structure filled with `1.0` +to it `n = 1000` times. + +## Specialized +The code to run this looks like: +```kotlin + specializedField.run { + var res = one + repeat(n) { + res += 1.0 + } + } +``` +The performance of this code is the best of all tests since it inlines all operations and is specialized for operation +with doubles. We will measure everything else relative to this one, so time for this test will be `1x` (real time +on my computer is about 4.5 seconds). The only problem with this approach is that it requires to specify type +from the beginning. Everyone do so anyway, so it is the recommended approach. + +## Automatic +Let's do the same with automatic field inference: +```kotlin + autoField.run { + var res = one + repeat(n) { + res += 1.0 + } + } +``` +Ths speed of this operation is approximately the same as for specialized case since `NDField.auto` just +returns the same `RealNDField` in this case. Of course it is usually better to use specialized method to be sure. + +## Lazy +Lazy field does not produce a structure when asked, instead it generates an empty structure and fills it on-demand +using coroutines to parallelize computations. +When one calls +```kotlin + lazyField.run { + var res = one + repeat(n) { + res += 1.0 + } + } +``` +The result will be calculated almost immediately but the result will be empty. In order to get the full result +structure one needs to call all its elements. In this case computation overhead will be huge. So this field never +should be used if one expects to use the full result structure. Though if one wants only small fraction, it could +save a lot of time. + +This field still could be used with reasonable performance if call code is changed: +```kotlin + lazyField.run { + val res = one.map { + var c = 0.0 + repeat(n) { + c += 1.0 + } + c + } + + res.elements().forEach { it.second } + } +``` +In this case it completes in about `4x-5x` time due to boxing. + +## Boxing +The boxing field produced by +```kotlin + genericField.run { + var res = one + repeat(n) { + res += 1.0 + } + } +``` +obviously is the slowest one, because it requires to box and unbox the `double` on each operation. It takes about +`15x` time (**TODO: there seems to be a problem here, it should be slow, but not that slow**). This field should +never be used for primitives. + +## Element operation +Let us also check the speed for direct operations on elements: +```kotlin + var res = genericField.one + repeat(n) { + res += 1.0 + } +``` +One would expect to be at least as slow as field operation, but in fact, this one takes only `2x` time to complete. +It happens, because in this particular case it does not use actual `NDField` but instead calculated directly +via extension function. + +## What about python? + +Usually it is bad idea to compare the direct numerical operation performance in different languages, but it hard to +work completely without frame of reference. In this case, simple numpy code: +```python +res = np.ones((1000,1000)) +for i in range(1000): + res = res + 1.0 +``` +gives the completion time of about `1.1x`, which means that specialized kotlin code in fact is working faster (I think it is +because better memory management). Of course if one writes `res += 1.0`, the performance will be different, +but it would be differenc case, because numpy overrides `+=` with in-place operations. In-place operations are +available in `kmath` with `MutableNDStructure` but there is no field for it (one can still work with mapping +functions). \ No newline at end of file diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/operations/Algebra.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/operations/Algebra.kt index a6bd8d78a..ff647504a 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/operations/Algebra.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/operations/Algebra.kt @@ -69,11 +69,11 @@ interface Ring : Space { operator fun T.times(b: T): T = multiply(this, b) - operator fun T.plus(b: Number) = this.plus(b * one) - operator fun Number.plus(b: T) = b + this - - operator fun T.minus(b: Number) = this.minus(b * one) - operator fun Number.minus(b: T) = -b + this +// operator fun T.plus(b: Number) = this.plus(b * one) +// operator fun Number.plus(b: T) = b + this +// +// operator fun T.minus(b: Number) = this.minus(b * one) +// operator fun Number.minus(b: T) = -b + this } abstract class AbstractRing : AbstractSpace(), Ring { diff --git a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/RealNDField.kt b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/RealNDField.kt index b8bab7c07..9b3e306d5 100644 --- a/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/RealNDField.kt +++ b/kmath-core/src/commonMain/kotlin/scientifik/kmath/structures/RealNDField.kt @@ -79,7 +79,7 @@ class RealNDField(shape: IntArray) : * Fast element production using function inlining */ inline fun StridedNDField.produceInline(crossinline initializer: RealField.(Int) -> Double): RealNDElement { - val array = DoubleArray(strides.linearSize) { offset -> elementField.initializer(offset) } + val array = DoubleArray(strides.linearSize) { offset -> RealField.initializer(offset) } return StridedNDElement(this, DoubleBuffer(array)) } @@ -102,4 +102,4 @@ operator fun RealNDElement.plus(arg: Double) = * Subtraction operation between [StridedNDElement] and single element */ operator fun RealNDElement.minus(arg: Double) = - context.produceInline { i -> buffer[i] - arg } \ No newline at end of file + context.produceInline { i -> buffer[i] - arg } diff --git a/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/structures/LazyNDField.kt b/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/structures/LazyNDField.kt index edda12888..451ef9fdd 100644 --- a/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/structures/LazyNDField.kt +++ b/kmath-coroutines/src/commonMain/kotlin/scientifik/kmath/structures/LazyNDField.kt @@ -21,11 +21,13 @@ class LazyNDField>(shape: IntArray, field: F, val scope: Corouti check(arg) return if (arg is LazyNDStructure) { LazyNDStructure(this) { index -> - this.elementField.transform(index, arg.function(index)) + //FIXME if value of arg is already calculated, it should be used + elementField.transform(index, arg.function(index)) } } else { LazyNDStructure(this) { elementField.transform(it, arg.await(it)) } } +// return LazyNDStructure(this) { elementField.transform(it, arg.await(it)) } } override fun map(arg: NDStructure, transform: F.(T) -> T) = @@ -43,6 +45,7 @@ class LazyNDField>(shape: IntArray, field: F, val scope: Corouti } else { LazyNDStructure(this@LazyNDField) { elementField.transform(a.await(it), b.await(it)) } } +// return LazyNDStructure(this) { elementField.transform(a.await(it), b.await(it)) } } fun NDStructure.lazy(): LazyNDStructure {