Support for tensors on GPU

2021-01-06 13:20:48 +00:00 · 2021-01-06 13:20:48 +00:00 · 0fc29b40c5
commit 0fc29b40c5
parent 32e4b68061
9 changed files with 230 additions and 43 deletions
--- a/kmath-torch/README.md
+++ b/kmath-torch/README.md
@ -81,15 +81,25 @@ Tensors implement the buffer protocol over `MutableNDStructure`. They can only b
 memScoped {
    val intTensor: TorchTensorInt = TorchTensor.copyFromIntArray(
        scope = this,
-        array = intArrayOf(7,8,9,2,6,5), 
-        shape = intArrayOf(3,2))
+        array = (1..24).toList().toIntArray(),
+        shape = intArrayOf(3, 2, 4)
+    )
    println(intTensor)
-    
+
    val floatTensor: TorchTensorFloat = TorchTensor.copyFromFloatArray(
        scope = this,
-        array = floatArrayOf(7f,8.9f,2.6f,5.6f),
-        shape = intArrayOf(4))
+        array = (1..10).map { it + 50f }.toList().toFloatArray(),
+        shape = intArrayOf(10)
+    )
    println(floatTensor)
+
+    val gpuFloatTensor: TorchTensorFloatGPU = TorchTensor.copyFromFloatArrayToGPU(
+        scope = this,
+        array = (1..8).map { it * 2f }.toList().toFloatArray(),
+        shape = intArrayOf(2, 2, 2),
+        device = 0
+    )
+    println(gpuFloatTensor)
 }
 ```

--- a/kmath-torch/build.gradle.kts
+++ b/kmath-torch/build.gradle.kts
@ -133,6 +133,7 @@ kotlin {

    val test by nativeTarget.compilations.getting

+
    sourceSets {
        val nativeMain by creating {
            dependencies {
@ -142,12 +143,20 @@ kotlin {
        val nativeTest by creating {
            dependsOn(nativeMain)
        }
+        val nativeGPUTest by creating {
+            dependsOn(nativeMain)
+        }
+

        main.defaultSourceSet.dependsOn(nativeMain)
        test.defaultSourceSet.dependsOn(nativeTest)
+        if(cudaFound) {
+            test.defaultSourceSet.dependsOn(nativeGPUTest)
+        }
+
    }
 }

 val torch: KotlinNativeTarget by kotlin.targets
 tasks[torch.compilations["main"].cinterops["libctorch"].interopProcessingTaskName]
-    .dependsOn(buildCpp)
+    .dependsOn(buildCpp)
--- a/kmath-torch/ctorch/include/ctorch.h
+++ b/kmath-torch/ctorch/include/ctorch.h
@ -22,6 +22,10 @@ extern "C"
    TorchTensorHandle copy_from_blob_float(float *data, int *shape, int dim);
    TorchTensorHandle copy_from_blob_long(long *data, int *shape, int dim);
    TorchTensorHandle copy_from_blob_int(int *data, int *shape, int dim);
+    TorchTensorHandle copy_from_blob_to_gpu_double(double *data, int *shape, int dim, int device);
+    TorchTensorHandle copy_from_blob_to_gpu_float(float *data, int *shape, int dim, int device);
+    TorchTensorHandle copy_from_blob_to_gpu_long(long *data, int *shape, int dim, int device);
+    TorchTensorHandle copy_from_blob_to_gpu_int(int *data, int *shape, int dim, int device);

    TorchTensorHandle copy_tensor(TorchTensorHandle tensor_handle);

@ -41,6 +45,19 @@ extern "C"
    void dispose_char(char *ptr);
    void dispose_tensor(TorchTensorHandle tensor_handle);

+    // Workaround for GPU tensors
+    double get_at_offset_double(TorchTensorHandle tensor_handle, int offset);
+    float get_at_offset_float(TorchTensorHandle tensor_handle, int offset);
+    long get_at_offset_long(TorchTensorHandle tensor_handle, int offset);
+    int get_at_offset_int(TorchTensorHandle tensor_handle, int offset);
+    void set_at_offset_double(TorchTensorHandle tensor_handle, int offset, double value);
+    void set_at_offset_float(TorchTensorHandle tensor_handle, int offset, float value);
+    void set_at_offset_long(TorchTensorHandle tensor_handle, int offset, long value);
+    void set_at_offset_int(TorchTensorHandle tensor_handle, int offset, int value);
+
+    TorchTensorHandle copy_to_cpu(TorchTensorHandle tensor_handle);
+    TorchTensorHandle copy_to_gpu(TorchTensorHandle tensor_handle, int device);
+
 #ifdef __cplusplus
 }
 #endif
--- a/kmath-torch/ctorch/include/utils.hh
+++ b/kmath-torch/ctorch/include/utils.hh
@ -28,21 +28,21 @@ namespace ctorch
        return torch::kInt32;
    }

-    inline torch::Tensor &cast(TorchTensorHandle tensor_handle)
+    inline torch::Tensor &cast(const TorchTensorHandle &tensor_handle)
    {
        return *static_cast<torch::Tensor *>(tensor_handle);
    }

    template <typename Dtype>
-    inline torch::Tensor copy_from_blob(Dtype *data, int *shape, int dim)
+    inline torch::Tensor copy_from_blob(Dtype *data, int *shape, int dim, torch::Device device)
    {
        auto shape_vec = std::vector<int64_t>(dim);
        shape_vec.assign(shape, shape + dim);
-        return torch::from_blob(data, shape_vec, dtype<Dtype>()).clone();
+        return torch::from_blob(data, shape_vec, dtype<Dtype>()).to(
+            torch::TensorOptions().layout(torch::kStrided).device(device), false, true);
    }

-    template <typename IntArray>
-    inline int *to_dynamic_ints(IntArray arr)
+    inline int *to_dynamic_ints(const c10::IntArrayRef &arr)
    {
        size_t n = arr.size();
        int *res = (int *)malloc(sizeof(int) * n);
@ -53,4 +53,29 @@ namespace ctorch
        return res;
    }

+    inline std::vector<at::indexing::TensorIndex> offset_to_index(int offset, const c10::IntArrayRef &strides)
+    {
+        std::vector<at::indexing::TensorIndex> index;
+        for (const auto &stride : strides)
+        {
+            index.emplace_back(offset / stride);
+            offset %= stride;
+        }
+        return index;
+    }
+
+    template <typename NumType>
+    inline NumType get_at_offset(const TorchTensorHandle &tensor_handle, int offset)
+    {
+        auto ten = ctorch::cast(tensor_handle);
+        return ten.index(ctorch::offset_to_index(offset, ten.strides())).item<NumType>();
+    }
+
+    template <typename NumType>
+    inline void set_at_offset(TorchTensorHandle &tensor_handle, int offset, NumType value)
+    {
+        auto ten = ctorch::cast(tensor_handle);
+        ten.index(offset_to_index(offset, ten.strides())) = value;
+    }
+
 } // namespace ctorch
--- a/kmath-torch/ctorch/src/ctorch.cc
+++ b/kmath-torch/ctorch/src/ctorch.cc
@ -27,19 +27,36 @@ void set_seed(int seed)

 TorchTensorHandle copy_from_blob_double(double *data, int *shape, int dim)
 {
-  return new torch::Tensor(ctorch::copy_from_blob<double>(data, shape, dim));
+  return new torch::Tensor(ctorch::copy_from_blob<double>(data, shape, dim, torch::kCPU));
 }
 TorchTensorHandle copy_from_blob_float(float *data, int *shape, int dim)
 {
-  return new torch::Tensor(ctorch::copy_from_blob<float>(data, shape, dim));
+  return new torch::Tensor(ctorch::copy_from_blob<float>(data, shape, dim, torch::kCPU));
 }
 TorchTensorHandle copy_from_blob_long(long *data, int *shape, int dim)
 {
-  return new torch::Tensor(ctorch::copy_from_blob<long>(data, shape, dim));
+  return new torch::Tensor(ctorch::copy_from_blob<long>(data, shape, dim, torch::kCPU));
 }
 TorchTensorHandle copy_from_blob_int(int *data, int *shape, int dim)
 {
-  return new torch::Tensor(ctorch::copy_from_blob<int>(data, shape, dim));
+  return new torch::Tensor(ctorch::copy_from_blob<int>(data, shape, dim, torch::kCPU));
+}
+
+TorchTensorHandle copy_from_blob_to_gpu_double(double *data, int *shape, int dim, int device)
+{
+  return new torch::Tensor(ctorch::copy_from_blob<double>(data, shape, dim, torch::Device(torch::kCUDA, device)));
+}
+TorchTensorHandle copy_from_blob_to_gpu_float(float *data, int *shape, int dim, int device)
+{
+  return new torch::Tensor(ctorch::copy_from_blob<float>(data, shape, dim, torch::Device(torch::kCUDA, device)));
+}
+TorchTensorHandle copy_from_blob_to_gpu_long(long *data, int *shape, int dim, int device)
+{
+  return new torch::Tensor(ctorch::copy_from_blob<long>(data, shape, dim, torch::Device(torch::kCUDA, device)));
+}
+TorchTensorHandle copy_from_blob_to_gpu_int(int *data, int *shape, int dim, int device)
+{
+  return new torch::Tensor(ctorch::copy_from_blob<int>(data, shape, dim, torch::Device(torch::kCUDA, device)));
 }

 TorchTensorHandle copy_tensor(TorchTensorHandle tensor_handle)
@ -107,4 +124,46 @@ void dispose_char(char *ptr)
 void dispose_tensor(TorchTensorHandle tensor_handle)
 {
  delete static_cast<torch::Tensor *>(tensor_handle);
-}
+}
+
+double get_at_offset_double(TorchTensorHandle tensor_handle, int offset)
+{
+  return ctorch::get_at_offset<double>(tensor_handle, offset);
+}
+float get_at_offset_float(TorchTensorHandle tensor_handle, int offset)
+{
+  return ctorch::get_at_offset<float>(tensor_handle, offset);
+}
+long get_at_offset_long(TorchTensorHandle tensor_handle, int offset)
+{
+  return ctorch::get_at_offset<long>(tensor_handle, offset);
+}
+int get_at_offset_int(TorchTensorHandle tensor_handle, int offset)
+{
+  return ctorch::get_at_offset<int>(tensor_handle, offset);
+}
+void set_at_offset_double(TorchTensorHandle tensor_handle, int offset, double value)
+{
+  ctorch::set_at_offset<double>(tensor_handle, offset, value);
+}
+void set_at_offset_float(TorchTensorHandle tensor_handle, int offset, float value)
+{
+  ctorch::set_at_offset<float>(tensor_handle, offset, value);
+}
+void set_at_offset_long(TorchTensorHandle tensor_handle, int offset, long value)
+{
+  ctorch::set_at_offset<long>(tensor_handle, offset, value);
+}
+void set_at_offset_int(TorchTensorHandle tensor_handle, int offset, int value)
+{
+  ctorch::set_at_offset<int>(tensor_handle, offset, value);
+}
+
+TorchTensorHandle copy_to_cpu(TorchTensorHandle tensor_handle)
+{
+  return new torch::Tensor(ctorch::cast(tensor_handle).to(torch::kCPU,false, true));
+}
+TorchTensorHandle copy_to_gpu(TorchTensorHandle tensor_handle, int device)
+{
+  return new torch::Tensor(ctorch::cast(tensor_handle).to(torch::Device(torch::kCUDA, device),false, true));
+}
--- a/kmath-torch/src/nativeGPUTest/kotlin/kscience/kmath/torch/TestTorchTensorGPU.kt
+++ b/kmath-torch/src/nativeGPUTest/kotlin/kscience/kmath/torch/TestTorchTensorGPU.kt
@ -0,0 +1,25 @@
+package kscience.kmath.torch
+
+import kscience.kmath.structures.asBuffer
+
+import kotlinx.cinterop.memScoped
+import kotlin.test.*
+
+class TestTorchTensorGPU {
+
+    @Test
+    fun cudaAvailability() {
+        assertTrue(cudaAvailable())
+    }
+
+    @Test
+    fun floatGPUTensorLayout() = memScoped {
+        val array = (1..8).map { it * 2f }.toList().toFloatArray()
+        val shape = intArrayOf(2, 2, 2)
+        val tensor = TorchTensor.copyFromFloatArrayToGPU(this, array, shape, 0)
+        tensor.elements().forEach {
+            assertEquals(tensor[it.first], it.second)
+        }
+        assertTrue(tensor.buffer.contentEquals(array.asBuffer()))
+    }
+}
--- a/kmath-torch/src/nativeMain/kotlin/kscience/kmath/torch/TorchTensor.kt
+++ b/kmath-torch/src/nativeMain/kotlin/kscience/kmath/torch/TorchTensor.kt
@ -5,9 +5,7 @@ import kscience.kmath.structures.*
 import kotlinx.cinterop.*
 import ctorch.*

-public abstract class TorchTensor<T,
-        TVar : CPrimitiveVar,
-        TorchTensorBufferImpl : TorchTensorBuffer<T, TVar>> :
+public abstract class TorchTensor<T, out TorchTensorBufferImpl : TorchTensorBuffer<T>> :
    MutableNDBufferTrait<T, TorchTensorBufferImpl, TorchTensorStrides>() {

    public companion object {
@ -23,6 +21,12 @@ public abstract class TorchTensor<T,
            )!!
            return TorchTensorInt(populateStridesFromNative(tensorHandle, rawShape = shape), scope, tensorHandle)
        }
+        public fun copyFromFloatArrayToGPU(scope: DeferScope, array: FloatArray, shape: IntArray, device: Int): TorchTensorFloatGPU {
+            val tensorHandle: COpaquePointer = copy_from_blob_to_gpu_float(
+                array.toCValues(), shape.toCValues(), shape.size, device
+            )!!
+            return TorchTensorFloatGPU(populateStridesFromNative(tensorHandle, rawShape = shape), scope, tensorHandle)
+        }
    }

    override fun toString(): String {
@ -38,7 +42,7 @@ public class TorchTensorFloat internal constructor(
    override val strides: TorchTensorStrides,
    scope: DeferScope,
    tensorHandle: COpaquePointer
-): TorchTensor<Float, FloatVar, TorchTensorBufferFloat>() {
+): TorchTensor<Float, TorchTensorBufferFloat>() {
    override val buffer: TorchTensorBufferFloat = TorchTensorBufferFloat(scope, tensorHandle)
 }

@ -46,7 +50,15 @@ public class TorchTensorInt internal constructor(
    override val strides: TorchTensorStrides,
    scope: DeferScope,
    tensorHandle: COpaquePointer
-): TorchTensor<Int, IntVar, TorchTensorBufferInt>() {
+): TorchTensor<Int, TorchTensorBufferInt>() {
    override val buffer: TorchTensorBufferInt = TorchTensorBufferInt(scope, tensorHandle)
 }

+public class TorchTensorFloatGPU internal constructor(
+    override val strides: TorchTensorStrides,
+    scope: DeferScope,
+    tensorHandle: COpaquePointer
+): TorchTensor<Float, TorchTensorBufferFloatGPU>() {
+    override val buffer: TorchTensorBufferFloatGPU = TorchTensorBufferFloatGPU(scope, tensorHandle)
+}
+
--- a/kmath-torch/src/nativeMain/kotlin/kscience/kmath/torch/TorchTensorBuffer.kt
+++ b/kmath-torch/src/nativeMain/kotlin/kscience/kmath/torch/TorchTensorBuffer.kt
@ -5,31 +5,35 @@ import kscience.kmath.structures.MutableBuffer
 import kotlinx.cinterop.*
 import ctorch.*

-public abstract class TorchTensorBuffer<T, TVar : CPrimitiveVar> internal constructor(
+public abstract class TorchTensorBuffer<T> internal constructor(
    internal val scope: DeferScope,
    internal val tensorHandle: COpaquePointer
 ) : MutableBuffer<T> {
+
+    override val size: Int = get_numel(tensorHandle)
+
    init {
        scope.defer(::close)
    }

-    internal fun close() {
+    protected fun close() {
        dispose_tensor(tensorHandle)
    }

-    protected abstract val tensorData: CPointer<TVar>
-
-    override val size: Int
-        get() = get_numel(tensorHandle)
+    internal abstract fun wrap(outScope: DeferScope, outTensorHandle: COpaquePointer): TorchTensorBuffer<T>

+    override fun copy(): TorchTensorBuffer<T> = wrap(
+        outScope = scope,
+        outTensorHandle = copy_tensor(tensorHandle)!!
+    )
 }

-
 public class TorchTensorBufferFloat internal constructor(
    scope: DeferScope,
    tensorHandle: COpaquePointer
-) : TorchTensorBuffer<Float, FloatVar>(scope, tensorHandle) {
-    override val tensorData: CPointer<FloatVar> = get_data_float(tensorHandle)!!
+) : TorchTensorBuffer<Float>(scope, tensorHandle) {
+
+    private val tensorData: CPointer<FloatVar> = get_data_float(tensorHandle)!!

    override operator fun get(index: Int): Float = tensorData[index]

@ -39,17 +43,19 @@ public class TorchTensorBufferFloat internal constructor(

    override operator fun iterator(): Iterator<Float> = (1..size).map { tensorData[it - 1] }.iterator()

-    override fun copy(): TorchTensorBufferFloat = TorchTensorBufferFloat(
-        scope = scope,
-        tensorHandle = copy_tensor(tensorHandle)!!
+    override fun wrap(outScope: DeferScope, outTensorHandle: COpaquePointer) = TorchTensorBufferFloat(
+        scope = outScope,
+        tensorHandle = outTensorHandle
    )
 }

+
 public class TorchTensorBufferInt internal constructor(
    scope: DeferScope,
    tensorHandle: COpaquePointer
-) : TorchTensorBuffer<Int, IntVar>(scope, tensorHandle) {
-    override val tensorData: CPointer<IntVar> = get_data_int(tensorHandle)!!
+) : TorchTensorBuffer<Int>(scope, tensorHandle) {
+
+    private val tensorData: CPointer<IntVar> = get_data_int(tensorHandle)!!

    override operator fun get(index: Int): Int = tensorData[index]

@ -59,9 +65,33 @@ public class TorchTensorBufferInt internal constructor(

    override operator fun iterator(): Iterator<Int> = (1..size).map { tensorData[it - 1] }.iterator()

-    override fun copy(): TorchTensorBufferInt = TorchTensorBufferInt(
-        scope = scope,
-        tensorHandle = copy_tensor(tensorHandle)!!
+    override fun wrap(outScope: DeferScope, outTensorHandle: COpaquePointer) = TorchTensorBufferInt(
+        scope = outScope,
+        tensorHandle = outTensorHandle
    )
 }

+public class TorchTensorBufferFloatGPU internal constructor(
+    scope: DeferScope,
+    tensorHandle: COpaquePointer
+) : TorchTensorBuffer<Float>(scope, tensorHandle) {
+
+    override operator fun get(index: Int): Float = get_at_offset_float(tensorHandle, index)
+
+    override operator fun set(index: Int, value: Float) {
+        set_at_offset_float(tensorHandle, index, value)
+    }
+
+    override operator fun iterator(): Iterator<Float> {
+        val cpuCopy = copy_to_cpu(tensorHandle)!!
+        val tensorCpuData = get_data_float(cpuCopy)!!
+        val iteratorResult = (1..size).map { tensorCpuData[it - 1] }.iterator()
+        dispose_tensor(cpuCopy)
+        return iteratorResult
+    }
+
+    override fun wrap(outScope: DeferScope, outTensorHandle: COpaquePointer) = TorchTensorBufferFloatGPU(
+        scope = outScope,
+        tensorHandle = outTensorHandle
+    )
+}
--- a/kmath-torch/src/nativeTest/kotlin/kscience/kmath/torch/TestTorchTensor.kt
+++ b/kmath-torch/src/nativeTest/kotlin/kscience/kmath/torch/TestTorchTensor.kt
@ -10,9 +10,9 @@ internal class TestTorchTensor {

    @Test
    fun intTensorLayout() = memScoped {
-        val array = intArrayOf(7,8,9,2,6,5)
-        val shape = intArrayOf(3,2)
-        val tensor = TorchTensor.copyFromIntArray(scope=this, array=array, shape=shape)
+        val array = (1..24).toList().toIntArray()
+        val shape = intArrayOf(3, 2, 4)
+        val tensor = TorchTensor.copyFromIntArray(scope = this, array = array, shape = shape)
        tensor.elements().forEach {
            assertEquals(tensor[it.first], it.second)
        }
@ -21,8 +21,8 @@ internal class TestTorchTensor {

    @Test
    fun floatTensorLayout() = memScoped {
-        val array = floatArrayOf(7.5f,8.2f,9f,2.58f,6.5f,5f)
-        val shape = intArrayOf(2,3)
+        val array = (1..10).map { it + 50f }.toList().toFloatArray()
+        val shape = intArrayOf(10)
        val tensor = TorchTensor.copyFromFloatArray(this, array, shape)
        tensor.elements().forEach {
            assertEquals(tensor[it.first], it.second)