Support for tensors on GPU

This commit is contained in:
rgrit91 2021-01-06 13:20:48 +00:00
parent 32e4b68061
commit 0fc29b40c5
9 changed files with 230 additions and 43 deletions

View File

@ -81,15 +81,25 @@ Tensors implement the buffer protocol over `MutableNDStructure`. They can only b
memScoped {
val intTensor: TorchTensorInt = TorchTensor.copyFromIntArray(
scope = this,
array = intArrayOf(7,8,9,2,6,5),
shape = intArrayOf(3,2))
array = (1..24).toList().toIntArray(),
shape = intArrayOf(3, 2, 4)
)
println(intTensor)
val floatTensor: TorchTensorFloat = TorchTensor.copyFromFloatArray(
scope = this,
array = floatArrayOf(7f,8.9f,2.6f,5.6f),
shape = intArrayOf(4))
array = (1..10).map { it + 50f }.toList().toFloatArray(),
shape = intArrayOf(10)
)
println(floatTensor)
val gpuFloatTensor: TorchTensorFloatGPU = TorchTensor.copyFromFloatArrayToGPU(
scope = this,
array = (1..8).map { it * 2f }.toList().toFloatArray(),
shape = intArrayOf(2, 2, 2),
device = 0
)
println(gpuFloatTensor)
}
```

View File

@ -133,6 +133,7 @@ kotlin {
val test by nativeTarget.compilations.getting
sourceSets {
val nativeMain by creating {
dependencies {
@ -142,9 +143,17 @@ kotlin {
val nativeTest by creating {
dependsOn(nativeMain)
}
val nativeGPUTest by creating {
dependsOn(nativeMain)
}
main.defaultSourceSet.dependsOn(nativeMain)
test.defaultSourceSet.dependsOn(nativeTest)
if(cudaFound) {
test.defaultSourceSet.dependsOn(nativeGPUTest)
}
}
}

View File

@ -22,6 +22,10 @@ extern "C"
TorchTensorHandle copy_from_blob_float(float *data, int *shape, int dim);
TorchTensorHandle copy_from_blob_long(long *data, int *shape, int dim);
TorchTensorHandle copy_from_blob_int(int *data, int *shape, int dim);
TorchTensorHandle copy_from_blob_to_gpu_double(double *data, int *shape, int dim, int device);
TorchTensorHandle copy_from_blob_to_gpu_float(float *data, int *shape, int dim, int device);
TorchTensorHandle copy_from_blob_to_gpu_long(long *data, int *shape, int dim, int device);
TorchTensorHandle copy_from_blob_to_gpu_int(int *data, int *shape, int dim, int device);
TorchTensorHandle copy_tensor(TorchTensorHandle tensor_handle);
@ -41,6 +45,19 @@ extern "C"
void dispose_char(char *ptr);
void dispose_tensor(TorchTensorHandle tensor_handle);
// Workaround for GPU tensors
double get_at_offset_double(TorchTensorHandle tensor_handle, int offset);
float get_at_offset_float(TorchTensorHandle tensor_handle, int offset);
long get_at_offset_long(TorchTensorHandle tensor_handle, int offset);
int get_at_offset_int(TorchTensorHandle tensor_handle, int offset);
void set_at_offset_double(TorchTensorHandle tensor_handle, int offset, double value);
void set_at_offset_float(TorchTensorHandle tensor_handle, int offset, float value);
void set_at_offset_long(TorchTensorHandle tensor_handle, int offset, long value);
void set_at_offset_int(TorchTensorHandle tensor_handle, int offset, int value);
TorchTensorHandle copy_to_cpu(TorchTensorHandle tensor_handle);
TorchTensorHandle copy_to_gpu(TorchTensorHandle tensor_handle, int device);
#ifdef __cplusplus
}
#endif

View File

@ -28,21 +28,21 @@ namespace ctorch
return torch::kInt32;
}
inline torch::Tensor &cast(TorchTensorHandle tensor_handle)
inline torch::Tensor &cast(const TorchTensorHandle &tensor_handle)
{
return *static_cast<torch::Tensor *>(tensor_handle);
}
template <typename Dtype>
inline torch::Tensor copy_from_blob(Dtype *data, int *shape, int dim)
inline torch::Tensor copy_from_blob(Dtype *data, int *shape, int dim, torch::Device device)
{
auto shape_vec = std::vector<int64_t>(dim);
shape_vec.assign(shape, shape + dim);
return torch::from_blob(data, shape_vec, dtype<Dtype>()).clone();
return torch::from_blob(data, shape_vec, dtype<Dtype>()).to(
torch::TensorOptions().layout(torch::kStrided).device(device), false, true);
}
template <typename IntArray>
inline int *to_dynamic_ints(IntArray arr)
inline int *to_dynamic_ints(const c10::IntArrayRef &arr)
{
size_t n = arr.size();
int *res = (int *)malloc(sizeof(int) * n);
@ -53,4 +53,29 @@ namespace ctorch
return res;
}
inline std::vector<at::indexing::TensorIndex> offset_to_index(int offset, const c10::IntArrayRef &strides)
{
std::vector<at::indexing::TensorIndex> index;
for (const auto &stride : strides)
{
index.emplace_back(offset / stride);
offset %= stride;
}
return index;
}
template <typename NumType>
inline NumType get_at_offset(const TorchTensorHandle &tensor_handle, int offset)
{
auto ten = ctorch::cast(tensor_handle);
return ten.index(ctorch::offset_to_index(offset, ten.strides())).item<NumType>();
}
template <typename NumType>
inline void set_at_offset(TorchTensorHandle &tensor_handle, int offset, NumType value)
{
auto ten = ctorch::cast(tensor_handle);
ten.index(offset_to_index(offset, ten.strides())) = value;
}
} // namespace ctorch

View File

@ -27,19 +27,36 @@ void set_seed(int seed)
TorchTensorHandle copy_from_blob_double(double *data, int *shape, int dim)
{
return new torch::Tensor(ctorch::copy_from_blob<double>(data, shape, dim));
return new torch::Tensor(ctorch::copy_from_blob<double>(data, shape, dim, torch::kCPU));
}
TorchTensorHandle copy_from_blob_float(float *data, int *shape, int dim)
{
return new torch::Tensor(ctorch::copy_from_blob<float>(data, shape, dim));
return new torch::Tensor(ctorch::copy_from_blob<float>(data, shape, dim, torch::kCPU));
}
TorchTensorHandle copy_from_blob_long(long *data, int *shape, int dim)
{
return new torch::Tensor(ctorch::copy_from_blob<long>(data, shape, dim));
return new torch::Tensor(ctorch::copy_from_blob<long>(data, shape, dim, torch::kCPU));
}
TorchTensorHandle copy_from_blob_int(int *data, int *shape, int dim)
{
return new torch::Tensor(ctorch::copy_from_blob<int>(data, shape, dim));
return new torch::Tensor(ctorch::copy_from_blob<int>(data, shape, dim, torch::kCPU));
}
TorchTensorHandle copy_from_blob_to_gpu_double(double *data, int *shape, int dim, int device)
{
return new torch::Tensor(ctorch::copy_from_blob<double>(data, shape, dim, torch::Device(torch::kCUDA, device)));
}
TorchTensorHandle copy_from_blob_to_gpu_float(float *data, int *shape, int dim, int device)
{
return new torch::Tensor(ctorch::copy_from_blob<float>(data, shape, dim, torch::Device(torch::kCUDA, device)));
}
TorchTensorHandle copy_from_blob_to_gpu_long(long *data, int *shape, int dim, int device)
{
return new torch::Tensor(ctorch::copy_from_blob<long>(data, shape, dim, torch::Device(torch::kCUDA, device)));
}
TorchTensorHandle copy_from_blob_to_gpu_int(int *data, int *shape, int dim, int device)
{
return new torch::Tensor(ctorch::copy_from_blob<int>(data, shape, dim, torch::Device(torch::kCUDA, device)));
}
TorchTensorHandle copy_tensor(TorchTensorHandle tensor_handle)
@ -108,3 +125,45 @@ void dispose_tensor(TorchTensorHandle tensor_handle)
{
delete static_cast<torch::Tensor *>(tensor_handle);
}
double get_at_offset_double(TorchTensorHandle tensor_handle, int offset)
{
return ctorch::get_at_offset<double>(tensor_handle, offset);
}
float get_at_offset_float(TorchTensorHandle tensor_handle, int offset)
{
return ctorch::get_at_offset<float>(tensor_handle, offset);
}
long get_at_offset_long(TorchTensorHandle tensor_handle, int offset)
{
return ctorch::get_at_offset<long>(tensor_handle, offset);
}
int get_at_offset_int(TorchTensorHandle tensor_handle, int offset)
{
return ctorch::get_at_offset<int>(tensor_handle, offset);
}
void set_at_offset_double(TorchTensorHandle tensor_handle, int offset, double value)
{
ctorch::set_at_offset<double>(tensor_handle, offset, value);
}
void set_at_offset_float(TorchTensorHandle tensor_handle, int offset, float value)
{
ctorch::set_at_offset<float>(tensor_handle, offset, value);
}
void set_at_offset_long(TorchTensorHandle tensor_handle, int offset, long value)
{
ctorch::set_at_offset<long>(tensor_handle, offset, value);
}
void set_at_offset_int(TorchTensorHandle tensor_handle, int offset, int value)
{
ctorch::set_at_offset<int>(tensor_handle, offset, value);
}
TorchTensorHandle copy_to_cpu(TorchTensorHandle tensor_handle)
{
return new torch::Tensor(ctorch::cast(tensor_handle).to(torch::kCPU,false, true));
}
TorchTensorHandle copy_to_gpu(TorchTensorHandle tensor_handle, int device)
{
return new torch::Tensor(ctorch::cast(tensor_handle).to(torch::Device(torch::kCUDA, device),false, true));
}

View File

@ -0,0 +1,25 @@
package kscience.kmath.torch
import kscience.kmath.structures.asBuffer
import kotlinx.cinterop.memScoped
import kotlin.test.*
class TestTorchTensorGPU {
@Test
fun cudaAvailability() {
assertTrue(cudaAvailable())
}
@Test
fun floatGPUTensorLayout() = memScoped {
val array = (1..8).map { it * 2f }.toList().toFloatArray()
val shape = intArrayOf(2, 2, 2)
val tensor = TorchTensor.copyFromFloatArrayToGPU(this, array, shape, 0)
tensor.elements().forEach {
assertEquals(tensor[it.first], it.second)
}
assertTrue(tensor.buffer.contentEquals(array.asBuffer()))
}
}

View File

@ -5,9 +5,7 @@ import kscience.kmath.structures.*
import kotlinx.cinterop.*
import ctorch.*
public abstract class TorchTensor<T,
TVar : CPrimitiveVar,
TorchTensorBufferImpl : TorchTensorBuffer<T, TVar>> :
public abstract class TorchTensor<T, out TorchTensorBufferImpl : TorchTensorBuffer<T>> :
MutableNDBufferTrait<T, TorchTensorBufferImpl, TorchTensorStrides>() {
public companion object {
@ -23,6 +21,12 @@ public abstract class TorchTensor<T,
)!!
return TorchTensorInt(populateStridesFromNative(tensorHandle, rawShape = shape), scope, tensorHandle)
}
public fun copyFromFloatArrayToGPU(scope: DeferScope, array: FloatArray, shape: IntArray, device: Int): TorchTensorFloatGPU {
val tensorHandle: COpaquePointer = copy_from_blob_to_gpu_float(
array.toCValues(), shape.toCValues(), shape.size, device
)!!
return TorchTensorFloatGPU(populateStridesFromNative(tensorHandle, rawShape = shape), scope, tensorHandle)
}
}
override fun toString(): String {
@ -38,7 +42,7 @@ public class TorchTensorFloat internal constructor(
override val strides: TorchTensorStrides,
scope: DeferScope,
tensorHandle: COpaquePointer
): TorchTensor<Float, FloatVar, TorchTensorBufferFloat>() {
): TorchTensor<Float, TorchTensorBufferFloat>() {
override val buffer: TorchTensorBufferFloat = TorchTensorBufferFloat(scope, tensorHandle)
}
@ -46,7 +50,15 @@ public class TorchTensorInt internal constructor(
override val strides: TorchTensorStrides,
scope: DeferScope,
tensorHandle: COpaquePointer
): TorchTensor<Int, IntVar, TorchTensorBufferInt>() {
): TorchTensor<Int, TorchTensorBufferInt>() {
override val buffer: TorchTensorBufferInt = TorchTensorBufferInt(scope, tensorHandle)
}
public class TorchTensorFloatGPU internal constructor(
override val strides: TorchTensorStrides,
scope: DeferScope,
tensorHandle: COpaquePointer
): TorchTensor<Float, TorchTensorBufferFloatGPU>() {
override val buffer: TorchTensorBufferFloatGPU = TorchTensorBufferFloatGPU(scope, tensorHandle)
}

View File

@ -5,31 +5,35 @@ import kscience.kmath.structures.MutableBuffer
import kotlinx.cinterop.*
import ctorch.*
public abstract class TorchTensorBuffer<T, TVar : CPrimitiveVar> internal constructor(
public abstract class TorchTensorBuffer<T> internal constructor(
internal val scope: DeferScope,
internal val tensorHandle: COpaquePointer
) : MutableBuffer<T> {
override val size: Int = get_numel(tensorHandle)
init {
scope.defer(::close)
}
internal fun close() {
protected fun close() {
dispose_tensor(tensorHandle)
}
protected abstract val tensorData: CPointer<TVar>
override val size: Int
get() = get_numel(tensorHandle)
internal abstract fun wrap(outScope: DeferScope, outTensorHandle: COpaquePointer): TorchTensorBuffer<T>
override fun copy(): TorchTensorBuffer<T> = wrap(
outScope = scope,
outTensorHandle = copy_tensor(tensorHandle)!!
)
}
public class TorchTensorBufferFloat internal constructor(
scope: DeferScope,
tensorHandle: COpaquePointer
) : TorchTensorBuffer<Float, FloatVar>(scope, tensorHandle) {
override val tensorData: CPointer<FloatVar> = get_data_float(tensorHandle)!!
) : TorchTensorBuffer<Float>(scope, tensorHandle) {
private val tensorData: CPointer<FloatVar> = get_data_float(tensorHandle)!!
override operator fun get(index: Int): Float = tensorData[index]
@ -39,17 +43,19 @@ public class TorchTensorBufferFloat internal constructor(
override operator fun iterator(): Iterator<Float> = (1..size).map { tensorData[it - 1] }.iterator()
override fun copy(): TorchTensorBufferFloat = TorchTensorBufferFloat(
scope = scope,
tensorHandle = copy_tensor(tensorHandle)!!
override fun wrap(outScope: DeferScope, outTensorHandle: COpaquePointer) = TorchTensorBufferFloat(
scope = outScope,
tensorHandle = outTensorHandle
)
}
public class TorchTensorBufferInt internal constructor(
scope: DeferScope,
tensorHandle: COpaquePointer
) : TorchTensorBuffer<Int, IntVar>(scope, tensorHandle) {
override val tensorData: CPointer<IntVar> = get_data_int(tensorHandle)!!
) : TorchTensorBuffer<Int>(scope, tensorHandle) {
private val tensorData: CPointer<IntVar> = get_data_int(tensorHandle)!!
override operator fun get(index: Int): Int = tensorData[index]
@ -59,9 +65,33 @@ public class TorchTensorBufferInt internal constructor(
override operator fun iterator(): Iterator<Int> = (1..size).map { tensorData[it - 1] }.iterator()
override fun copy(): TorchTensorBufferInt = TorchTensorBufferInt(
scope = scope,
tensorHandle = copy_tensor(tensorHandle)!!
override fun wrap(outScope: DeferScope, outTensorHandle: COpaquePointer) = TorchTensorBufferInt(
scope = outScope,
tensorHandle = outTensorHandle
)
}
public class TorchTensorBufferFloatGPU internal constructor(
scope: DeferScope,
tensorHandle: COpaquePointer
) : TorchTensorBuffer<Float>(scope, tensorHandle) {
override operator fun get(index: Int): Float = get_at_offset_float(tensorHandle, index)
override operator fun set(index: Int, value: Float) {
set_at_offset_float(tensorHandle, index, value)
}
override operator fun iterator(): Iterator<Float> {
val cpuCopy = copy_to_cpu(tensorHandle)!!
val tensorCpuData = get_data_float(cpuCopy)!!
val iteratorResult = (1..size).map { tensorCpuData[it - 1] }.iterator()
dispose_tensor(cpuCopy)
return iteratorResult
}
override fun wrap(outScope: DeferScope, outTensorHandle: COpaquePointer) = TorchTensorBufferFloatGPU(
scope = outScope,
tensorHandle = outTensorHandle
)
}

View File

@ -10,8 +10,8 @@ internal class TestTorchTensor {
@Test
fun intTensorLayout() = memScoped {
val array = intArrayOf(7,8,9,2,6,5)
val shape = intArrayOf(3,2)
val array = (1..24).toList().toIntArray()
val shape = intArrayOf(3, 2, 4)
val tensor = TorchTensor.copyFromIntArray(scope = this, array = array, shape = shape)
tensor.elements().forEach {
assertEquals(tensor[it.first], it.second)
@ -21,8 +21,8 @@ internal class TestTorchTensor {
@Test
fun floatTensorLayout() = memScoped {
val array = floatArrayOf(7.5f,8.2f,9f,2.58f,6.5f,5f)
val shape = intArrayOf(2,3)
val array = (1..10).map { it + 50f }.toList().toFloatArray()
val shape = intArrayOf(10)
val tensor = TorchTensor.copyFromFloatArray(this, array, shape)
tensor.elements().forEach {
assertEquals(tensor[it.first], it.second)