Documentation for nd-performance

This commit is contained in:
Alexander Nozik 2019-01-07 17:18:31 +03:00
parent 9da1a8c3e3
commit c161ef0b57
10 changed files with 256 additions and 71 deletions

View File

@ -9,3 +9,9 @@ dependencies {
compile project(":kmath-coroutines")
//jmh project(':kmath-core')
}
jmh{
warmupIterations = 1
}
jmhClasses.dependsOn(compileKotlin)

View File

@ -1,49 +1,48 @@
package scientifik.kmath.structures
import org.openjdk.jmh.annotations.*
import org.openjdk.jmh.annotations.Benchmark
import org.openjdk.jmh.annotations.Scope
import org.openjdk.jmh.annotations.State
import java.nio.IntBuffer
@Warmup(iterations = 1)
@Measurement(iterations = 5)
@State(Scope.Benchmark)
open class ArrayBenchmark {
lateinit var array: IntArray
lateinit var arrayBuffer: IntBuffer
lateinit var nativeBuffer: IntBuffer
@Setup
fun setup() {
array = IntArray(10000) { it }
arrayBuffer = IntBuffer.wrap(array)
nativeBuffer = IntBuffer.allocate(10000)
for (i in 0 until 10000) {
nativeBuffer.put(i, i)
}
}
@Benchmark
fun benchmarkArrayRead() {
var res = 0
for (i in 1..10000) {
res += array[10000 - i]
for (i in 1..size) {
res += array[size - i]
}
}
@Benchmark
fun benchmarkBufferRead() {
var res = 0
for (i in 1..10000) {
res += arrayBuffer.get(10000 - i)
for (i in 1..size) {
res += arrayBuffer.get(size - i)
}
}
@Benchmark
fun nativeBufferRead() {
var res = 0
for (i in 1..10000) {
res += nativeBuffer.get(10000 - i)
for (i in 1..size) {
res += nativeBuffer.get(size - i)
}
}
companion object {
val size = 1000
val array = IntArray(size) { it }
val arrayBuffer = IntBuffer.wrap(array)
val nativeBuffer = IntBuffer.allocate(size).also {
for (i in 0 until size) {
it.put(i, i)
}
}
}
}

View File

@ -1,10 +1,10 @@
package scientifik.kmath.structures
import org.openjdk.jmh.annotations.*
import org.openjdk.jmh.annotations.Benchmark
import org.openjdk.jmh.annotations.Scope
import org.openjdk.jmh.annotations.State
import scientifik.kmath.operations.Complex
@Warmup(iterations = 1)
@Measurement(iterations = 5)
@State(Scope.Benchmark)
open class BufferBenchmark {
@ -22,7 +22,7 @@ open class BufferBenchmark {
@Benchmark
fun complexBufferReadWrite() {
val buffer = Complex.createBuffer(size / 2)
val buffer = MutableBuffer.complex(size / 2)
(0 until size / 2).forEach {
buffer[it] = Complex(it.toDouble(), -it.toDouble())
}
@ -33,6 +33,6 @@ open class BufferBenchmark {
}
companion object {
const val size = 1000
const val size = 100
}
}

View File

@ -0,0 +1,69 @@
package scientifik.kmath.structures
import org.openjdk.jmh.annotations.Benchmark
import scientifik.kmath.operations.RealField
open class NDFieldBenchmark {
@Benchmark
fun autoFieldAdd() {
bufferedField.run {
var res: NDBuffer<Double> = one
repeat(n) {
res += one
}
}
}
@Benchmark
fun autoElementAdd() {
var res = bufferedField.run { one.toElement() }
repeat(n) {
res += 1.0
}
}
@Benchmark
fun specializedFieldAdd() {
specializedField.run {
var res: NDBuffer<Double> = one
repeat(n) {
res += 1.0
}
}
}
@Benchmark
fun lazyFieldAdd() {
lazyNDField.run {
var res = one
repeat(n) {
res += one
}
res.elements().sumByDouble { it.second }
}
}
@Benchmark
fun boxingFieldAdd() {
genericField.run {
var res: NDBuffer<Double> = one
repeat(n) {
res += one
}
}
}
companion object {
val dim = 1000
val n = 100
val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
val specializedField = NDField.real(intArrayOf(dim, dim))
val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
}
}

View File

@ -7,34 +7,29 @@ fun main(args: Array<String>) {
val dim = 1000
val n = 1000
val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
// automatically build context most suited for given type.
val autoField = NDField.auto(intArrayOf(dim, dim), RealField)
// specialized nd-field for Double. It works as generic Double field as well
val specializedField = NDField.real(intArrayOf(dim, dim))
//A field implementing lazy computations. All elements are computed on-demand
val lazyField = NDField.lazy(intArrayOf(dim, dim), RealField)
//A generic boxing field. It should be used for objects, not primitives.
val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
// val action: NDField<Double, DoubleField, NDStructure<Double>>.() -> Unit = {
// var res = one
// repeat(n) {
// res += 1.0
// }
// }
val doubleTime = measureTimeMillis {
bufferedField.run {
var res: NDBuffer<Double> = one
val autoTime = measureTimeMillis {
autoField.run {
var res = one
repeat(n) {
res += one
res += 1.0
}
}
}
println("Buffered addition completed in $doubleTime millis")
println("Buffered addition completed in $autoTime millis")
val elementTime = measureTimeMillis {
var res = bufferedField.run{one.toElement()}
var res = genericField.one
repeat(n) {
res += 1.0
}
@ -43,9 +38,8 @@ fun main(args: Array<String>) {
println("Element addition completed in $elementTime millis")
val specializedTime = measureTimeMillis {
//specializedField.run(action)
specializedField.run {
var res: NDBuffer<Double> = one
var res = one
repeat(n) {
res += 1.0
}
@ -56,17 +50,16 @@ fun main(args: Array<String>) {
val lazyTime = measureTimeMillis {
val tr : RealField.(Double)->Double = {arg->
var r = arg
repeat(n) {
r += 1.0
lazyField.run {
val res = one.map {
var c = 0.0
repeat(n) {
c += 1.0
}
c
}
r
}
lazyNDField.run {
val res = one.map(tr)
res.elements().sumByDouble { it.second }
res.elements().forEach { it.second }
}
}
@ -77,10 +70,11 @@ fun main(args: Array<String>) {
genericField.run {
var res: NDBuffer<Double> = one
repeat(n) {
res += one
res += 1.0
}
}
}
println("Generic addition completed in $genericTime millis")
}

View File

@ -28,7 +28,7 @@ allprojects {
apply(plugin = "com.jfrog.artifactory")
group = "scientifik"
version = "0.0.3-dev-1"
version = "0.0.3-dev-2"
repositories {
maven("https://dl.bintray.com/kotlin/kotlin-eap")

View File

@ -5,9 +5,123 @@ structures. In `kmath` performance depends on which particular context was used
Let us consider following contexts:
```kotlin
// automatically build context
val bufferedField = NDField.auto(intArrayOf(dim, dim), RealField)
// specialized nd-field for Double. It works as generic Double field as well
val specializedField = NDField.real(intArrayOf(dim, dim))
// automatically build context most suited for given type.
val autoField = NDField.auto(intArrayOf(dim, dim), RealField)
//A field implementing lazy computations. All elements are computed on-demand
val lazyField = NDField.lazy(intArrayOf(dim, dim), RealField)
//A generic boxing field. It should be used for objects, not primitives.
val genericField = NDField.buffered(intArrayOf(dim, dim), RealField)
val lazyNDField = NDField.lazy(intArrayOf(dim, dim), RealField)
```
Now let us perform several tests and see which implementation is best suited for each case:
## Test case
In order to test performance we will take 2d-structures with `dim = 1000` and add a structure filled with `1.0`
to it `n = 1000` times.
## Specialized
The code to run this looks like:
```kotlin
specializedField.run {
var res = one
repeat(n) {
res += 1.0
}
}
```
The performance of this code is the best of all tests since it inlines all operations and is specialized for operation
with doubles. We will measure everything else relative to this one, so time for this test will be `1x` (real time
on my computer is about 4.5 seconds). The only problem with this approach is that it requires to specify type
from the beginning. Everyone do so anyway, so it is the recommended approach.
## Automatic
Let's do the same with automatic field inference:
```kotlin
autoField.run {
var res = one
repeat(n) {
res += 1.0
}
}
```
Ths speed of this operation is approximately the same as for specialized case since `NDField.auto` just
returns the same `RealNDField` in this case. Of course it is usually better to use specialized method to be sure.
## Lazy
Lazy field does not produce a structure when asked, instead it generates an empty structure and fills it on-demand
using coroutines to parallelize computations.
When one calls
```kotlin
lazyField.run {
var res = one
repeat(n) {
res += 1.0
}
}
```
The result will be calculated almost immediately but the result will be empty. In order to get the full result
structure one needs to call all its elements. In this case computation overhead will be huge. So this field never
should be used if one expects to use the full result structure. Though if one wants only small fraction, it could
save a lot of time.
This field still could be used with reasonable performance if call code is changed:
```kotlin
lazyField.run {
val res = one.map {
var c = 0.0
repeat(n) {
c += 1.0
}
c
}
res.elements().forEach { it.second }
}
```
In this case it completes in about `4x-5x` time due to boxing.
## Boxing
The boxing field produced by
```kotlin
genericField.run {
var res = one
repeat(n) {
res += 1.0
}
}
```
obviously is the slowest one, because it requires to box and unbox the `double` on each operation. It takes about
`15x` time (**TODO: there seems to be a problem here, it should be slow, but not that slow**). This field should
never be used for primitives.
## Element operation
Let us also check the speed for direct operations on elements:
```kotlin
var res = genericField.one
repeat(n) {
res += 1.0
}
```
One would expect to be at least as slow as field operation, but in fact, this one takes only `2x` time to complete.
It happens, because in this particular case it does not use actual `NDField` but instead calculated directly
via extension function.
## What about python?
Usually it is bad idea to compare the direct numerical operation performance in different languages, but it hard to
work completely without frame of reference. In this case, simple numpy code:
```python
res = np.ones((1000,1000))
for i in range(1000):
res = res + 1.0
```
gives the completion time of about `1.1x`, which means that specialized kotlin code in fact is working faster (I think it is
because better memory management). Of course if one writes `res += 1.0`, the performance will be different,
but it would be differenc case, because numpy overrides `+=` with in-place operations. In-place operations are
available in `kmath` with `MutableNDStructure` but there is no field for it (one can still work with mapping
functions).

View File

@ -69,11 +69,11 @@ interface Ring<T> : Space<T> {
operator fun T.times(b: T): T = multiply(this, b)
operator fun T.plus(b: Number) = this.plus(b * one)
operator fun Number.plus(b: T) = b + this
operator fun T.minus(b: Number) = this.minus(b * one)
operator fun Number.minus(b: T) = -b + this
// operator fun T.plus(b: Number) = this.plus(b * one)
// operator fun Number.plus(b: T) = b + this
//
// operator fun T.minus(b: Number) = this.minus(b * one)
// operator fun Number.minus(b: T) = -b + this
}
abstract class AbstractRing<T : Any> : AbstractSpace<T>(), Ring<T> {

View File

@ -79,7 +79,7 @@ class RealNDField(shape: IntArray) :
* Fast element production using function inlining
*/
inline fun StridedNDField<Double, RealField>.produceInline(crossinline initializer: RealField.(Int) -> Double): RealNDElement {
val array = DoubleArray(strides.linearSize) { offset -> elementField.initializer(offset) }
val array = DoubleArray(strides.linearSize) { offset -> RealField.initializer(offset) }
return StridedNDElement(this, DoubleBuffer(array))
}

View File

@ -21,11 +21,13 @@ class LazyNDField<T, F : Field<T>>(shape: IntArray, field: F, val scope: Corouti
check(arg)
return if (arg is LazyNDStructure<T, *>) {
LazyNDStructure(this) { index ->
this.elementField.transform(index, arg.function(index))
//FIXME if value of arg is already calculated, it should be used
elementField.transform(index, arg.function(index))
}
} else {
LazyNDStructure(this) { elementField.transform(it, arg.await(it)) }
}
// return LazyNDStructure(this) { elementField.transform(it, arg.await(it)) }
}
override fun map(arg: NDStructure<T>, transform: F.(T) -> T) =
@ -43,6 +45,7 @@ class LazyNDField<T, F : Field<T>>(shape: IntArray, field: F, val scope: Corouti
} else {
LazyNDStructure(this@LazyNDField) { elementField.transform(a.await(it), b.await(it)) }
}
// return LazyNDStructure(this) { elementField.transform(a.await(it), b.await(it)) }
}
fun NDStructure<T>.lazy(): LazyNDStructure<T, F> {