Merge pull request #97 from EthicalML/python_extensions
Python extensions for end to end example
This commit is contained in:
commit
5a3e4da916
7 changed files with 223 additions and 157 deletions
BIN
docs/images/binder-python.jpg
Executable file
BIN
docs/images/binder-python.jpg
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 424 KiB |
|
|
@ -2,13 +2,28 @@
|
|||
Python Package Overview
|
||||
========
|
||||
|
||||
This section provides an overview of the Python Package from a functionality perspective. If you wish to see all the classes and their respective functions you can find that in the `Python Class Reference Section <python-reference>`_.
|
||||
This section provides an overview of the Python Package from a functionality perspective. If you wish to see all the classes and their respective functions you can find that in the `Python Class Reference Section <python-reference.html>`_.
|
||||
|
||||
Below is a diagram that provides insights on the relationship between Vulkan Kompute objects and Vulkan resources, which primarily encompass ownership of either CPU and/or GPU memory.
|
||||
|
||||
.. image:: ../images/kompute-architecture.jpg
|
||||
:width: 70%
|
||||
|
||||
Package Installation
|
||||
^^^^^^^^^
|
||||
|
||||
Once you set up the package dependencies, you can install Kompute from ```Pypi``` using ```pip``` by running:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install kp
|
||||
|
||||
You can also install from master branch using:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
pip install git+git://github.com/EthicalML/vulkan-kompute.git@master
|
||||
|
||||
Core Python Components
|
||||
^^^^^^^^
|
||||
|
||||
|
|
@ -272,28 +287,16 @@ Similar to the logistic regression implementation in the C++ examples section, b
|
|||
print(tensor_b_in.data())
|
||||
|
||||
|
||||
Package Installation
|
||||
^^^^^^^^^
|
||||
|
||||
The package can be installed through the top level `setup.py` by running:
|
||||
|
||||
```
|
||||
pip install kp
|
||||
```
|
||||
|
||||
You can also install from master branch using:
|
||||
|
||||
```
|
||||
pip install git+git://github.com/EthicalML/vulkan-kompute.git@master
|
||||
```
|
||||
Log Level Configuration
|
||||
^^^^^^
|
||||
|
||||
You can configure log level with the function `kp.log_level` as outlined below.
|
||||
|
||||
The values are TRACE=0, DEBUG=1, INFO=2, WARN=3, ERROR=4. Kompute defaults to INFO.
|
||||
|
||||
```
|
||||
import kp
|
||||
kp.log_level(1)
|
||||
```
|
||||
.. code-block:: python
|
||||
:linenos:
|
||||
|
||||
import kp
|
||||
kp.log_level(1)
|
||||
|
||||
|
|
|
|||
17
examples/python/README.md
Normal file
17
examples/python/README.md
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# Kompute Python Example
|
||||
|
||||
This folder contains the accompanying code for the article "High Performance Python for GPU Accelerated Machine Learning in Cross-Vendor GPUs".
|
||||
|
||||
The easiest way to try this example is by using the [Google Binder Notebook](https://colab.research.google.com/drive/15uQ7qMZuOyk8JcXF-3SB2R5yNFW21I4P), which will allow you to use a GPU for free and runs without much setup.
|
||||
|
||||
<a href="https://colab.research.google.com/drive/15uQ7qMZuOyk8JcXF-3SB2R5yNFW21I4P">
|
||||
<img src="https://raw.githubusercontent.com/EthicalML/vulkan-kompute/python_extensions/docs/images/binder-python.jpg">
|
||||
</a>
|
||||
|
||||
Alternatively if you want to test the example yourself locally, you can get setup and started through the following links:
|
||||
|
||||
1. Install the [Kompute Python Package](https://kompute.cc/overview/python-package.html#package-installation)
|
||||
2. Run the [Array Multiplication Code](https://github.com/EthicalML/vulkan-kompute/blob/python_extensions/python/test/test_array_multiplication.py)
|
||||
3. Run the [Logistic Regression Code](https://github.com/EthicalML/vulkan-kompute/blob/python_extensions/python/test/test_logistic_regression.py)
|
||||
|
||||
|
||||
|
|
@ -39,12 +39,33 @@ PYBIND11_MODULE(kp, m) {
|
|||
return std::unique_ptr<kp::Tensor>(new kp::Tensor(data, tensorTypes));
|
||||
}), "Initialiser with list of data components and tensor GPU memory type.")
|
||||
.def("data", &kp::Tensor::data, DOC(kp, Tensor, data))
|
||||
.def("get", [](kp::Tensor &self, uint32_t index) -> float { return self.data()[index]; },
|
||||
.def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; },
|
||||
"When only an index is necessary")
|
||||
.def("set", [](kp::Tensor &self, uint32_t index, float value) {
|
||||
.def("__setitem__", [](kp::Tensor &self, size_t index, float value) {
|
||||
self.data()[index] = value; })
|
||||
.def("set", &kp::Tensor::setData, "Overrides the data in the local Tensor memory.")
|
||||
.def("set_data", &kp::Tensor::setData, "Overrides the data in the local Tensor memory.")
|
||||
.def("__iter__", [](kp::Tensor &self) {
|
||||
return py::make_iterator(self.data().begin(), self.data().end());
|
||||
}, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists
|
||||
"Iterator to enable looping within data structure as required.")
|
||||
.def("__contains__", [](kp::Tensor &self, float v) {
|
||||
for (size_t i = 0; i < self.data().size(); ++i) {
|
||||
if (v == self.data()[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
})
|
||||
.def("__reversed__", [](kp::Tensor &self) {
|
||||
size_t size = self.data().size();
|
||||
std::vector<float> reversed(size);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
reversed[size - i - 1] = self.data()[i];
|
||||
}
|
||||
return reversed;
|
||||
})
|
||||
.def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
|
||||
.def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
|
||||
.def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.")
|
||||
.def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.")
|
||||
.def("map_data_from_host", &kp::Tensor::mapDataFromHostMemory, "Maps data into GPU memory from tensor local data.")
|
||||
|
|
|
|||
35
python/test/test_array_multiplication.py
Normal file
35
python/test/test_array_multiplication.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import pyshader as ps
|
||||
import kp
|
||||
|
||||
|
||||
def test_array_multiplication():
|
||||
|
||||
# 1. Create Kompute Manager (selects device 0 by default)
|
||||
mgr = kp.Manager()
|
||||
|
||||
# 2. Create Kompute Tensors to hold data
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
# 3. Initialise the Kompute Tensors in the GPU
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
# 4. Define the multiplication shader code to run on the GPU
|
||||
@ps.python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ps.ivec3),
|
||||
data1=("buffer", 0, ps.Array(ps.f32)),
|
||||
data2=("buffer", 1, ps.Array(ps.f32)),
|
||||
data3=("buffer", 2, ps.Array(ps.f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
|
||||
# 5. Run shader code against our previously defined tensors
|
||||
mgr.eval_algo_data_def(
|
||||
[tensor_in_a, tensor_in_b, tensor_out],
|
||||
compute_shader_multiply.to_spirv())
|
||||
|
||||
# 6. Sync tensor data from GPU back to local
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
|
@ -1,9 +1,6 @@
|
|||
import os
|
||||
|
||||
from pyshader import python2shader, f32, ivec3, Array
|
||||
from pyshader.stdlib import exp, log
|
||||
|
||||
from kp import Tensor, Manager, Sequence
|
||||
import kp
|
||||
|
||||
DIRNAME = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
|
@ -12,11 +9,11 @@ def test_opmult():
|
|||
Test basic OpMult operation
|
||||
"""
|
||||
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
mgr = Manager()
|
||||
mgr = kp.Manager()
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
|
|
@ -31,11 +28,11 @@ def test_opalgobase_data():
|
|||
Test basic OpAlgoBase operation
|
||||
"""
|
||||
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
mgr = Manager()
|
||||
mgr = kp.Manager()
|
||||
|
||||
shaderData = """
|
||||
#version 450
|
||||
|
|
@ -67,11 +64,11 @@ def test_opalgobase_file():
|
|||
Test basic OpAlgoBase operation
|
||||
"""
|
||||
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
mgr = Manager()
|
||||
mgr = kp.Manager()
|
||||
|
||||
shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
|
||||
|
||||
|
|
@ -87,11 +84,11 @@ def test_sequence():
|
|||
"""
|
||||
Test basic OpAlgoBase operation
|
||||
"""
|
||||
mgr = Manager(0, [2])
|
||||
mgr = kp.Manager(0, [2])
|
||||
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
|
|
@ -109,118 +106,3 @@ def test_sequence():
|
|||
seq.eval()
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
def test_pyshader_pyshader():
|
||||
|
||||
@python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
|
||||
data1=("buffer", 0, Array(f32)),
|
||||
data2=("buffer", 1, Array(f32)),
|
||||
data3=("buffer", 2, Array(f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
|
||||
mgr = Manager()
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
def test_logistic_regression_pyshader():
|
||||
@python2shader
|
||||
def compute_shader(
|
||||
index = ("input", "GlobalInvocationId", ivec3),
|
||||
x_i = ("buffer", 0, Array(f32)),
|
||||
x_j = ("buffer", 1, Array(f32)),
|
||||
y = ("buffer", 2, Array(f32)),
|
||||
w_in = ("buffer", 3, Array(f32)),
|
||||
w_out_i = ("buffer", 4, Array(f32)),
|
||||
w_out_j = ("buffer", 5, Array(f32)),
|
||||
b_in = ("buffer", 6, Array(f32)),
|
||||
b_out = ("buffer", 7, Array(f32)),
|
||||
l_out = ("buffer", 8, Array(f32)),
|
||||
M = ("buffer", 9, Array(f32))):
|
||||
|
||||
i = index.x
|
||||
|
||||
m = M[0]
|
||||
|
||||
w_curr = vec2(w_in[0], w_in[1])
|
||||
b_curr = b_in[0]
|
||||
|
||||
x_curr = vec2(x_i[i], x_j[i])
|
||||
y_curr = y[i]
|
||||
|
||||
z_dot = w_curr @ x_curr
|
||||
z = z_dot + b_curr
|
||||
y_hat = 1.0 / (1.0 + exp(-z))
|
||||
|
||||
d_z = y_hat - y_curr
|
||||
d_w = (1.0 / m) * x_curr * d_z
|
||||
d_b = (1.0 / m) * d_z
|
||||
|
||||
loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat)))
|
||||
|
||||
w_out_i[i] = d_w.x
|
||||
w_out_j[i] = d_w.y
|
||||
b_out[i] = d_b
|
||||
l_out[i] = loss
|
||||
|
||||
|
||||
# First we create input and ouput tensors for shader
|
||||
tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
|
||||
tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_w_in = Tensor([0.001, 0.001])
|
||||
tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_b_in = Tensor([0.0])
|
||||
tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_m = Tensor([ 5.0 ])
|
||||
|
||||
# We store them in an array for easier interaction
|
||||
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
|
||||
tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
|
||||
|
||||
mgr = Manager()
|
||||
|
||||
mgr.eval_tensor_create_def(params)
|
||||
|
||||
# Record commands for efficient evaluation
|
||||
sq = mgr.create_sequence()
|
||||
sq.begin()
|
||||
sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
|
||||
sq.record_algo_data(params, compute_shader.to_spirv())
|
||||
sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
|
||||
sq.end()
|
||||
|
||||
ITERATIONS = 100
|
||||
learning_rate = 0.1
|
||||
|
||||
# Perform machine learning training and inference across all input X and Y
|
||||
for i_iter in range(ITERATIONS):
|
||||
sq.eval()
|
||||
|
||||
# Calculate the parameters based on the respective derivatives calculated
|
||||
for j_iter in range(tensor_b_out.size()):
|
||||
tensor_w_in.set(0, tensor_w_in.get(0) - learning_rate * tensor_w_out_i.data()[j_iter])
|
||||
tensor_w_in.set(1, tensor_w_in.get(1) - learning_rate * tensor_w_out_j.data()[j_iter])
|
||||
tensor_b_in.set(0, tensor_b_in.get(0) - learning_rate * tensor_b_out.data()[j_iter])
|
||||
|
||||
assert tensor_w_in.data()[0] < 0.01
|
||||
assert tensor_w_in.data()[0] > 0.0
|
||||
assert tensor_w_in.data()[1] > 1.5
|
||||
assert tensor_b_in.data()[0] < 0.7
|
||||
|
||||
|
|
|
|||
108
python/test/test_logistic_regression.py
Normal file
108
python/test/test_logistic_regression.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
import pyshader as ps
|
||||
import kp
|
||||
|
||||
def test_logistic_regression():
|
||||
|
||||
@ps.python2shader
|
||||
def compute_shader(
|
||||
index = ("input", "GlobalInvocationId", ps.ivec3),
|
||||
x_i = ("buffer", 0, ps.Array(ps.f32)),
|
||||
x_j = ("buffer", 1, ps.Array(ps.f32)),
|
||||
y = ("buffer", 2, ps.Array(ps.f32)),
|
||||
w_in = ("buffer", 3, ps.Array(ps.f32)),
|
||||
w_out_i = ("buffer", 4, ps.Array(ps.f32)),
|
||||
w_out_j = ("buffer", 5, ps.Array(ps.f32)),
|
||||
b_in = ("buffer", 6, ps.Array(ps.f32)),
|
||||
b_out = ("buffer", 7, ps.Array(ps.f32)),
|
||||
l_out = ("buffer", 8, ps.Array(ps.f32)),
|
||||
M = ("buffer", 9, ps.Array(ps.f32))):
|
||||
|
||||
i = index.x
|
||||
|
||||
m = M[0]
|
||||
|
||||
w_curr = vec2(w_in[0], w_in[1])
|
||||
b_curr = b_in[0]
|
||||
|
||||
x_curr = vec2(x_i[i], x_j[i])
|
||||
y_curr = y[i]
|
||||
|
||||
z_dot = w_curr @ x_curr
|
||||
z = z_dot + b_curr
|
||||
y_hat = 1.0 / (1.0 + exp(-z))
|
||||
|
||||
d_z = y_hat - y_curr
|
||||
d_w = (1.0 / m) * x_curr * d_z
|
||||
d_b = (1.0 / m) * d_z
|
||||
|
||||
loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat)))
|
||||
|
||||
w_out_i[i] = d_w.x
|
||||
w_out_j[i] = d_w.y
|
||||
b_out[i] = d_b
|
||||
l_out[i] = loss
|
||||
|
||||
|
||||
mgr = kp.Manager(0)
|
||||
|
||||
# First we create input and ouput tensors for shader
|
||||
tensor_x_i = kp.Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
|
||||
tensor_x_j = kp.Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_y = kp.Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_w_in = kp.Tensor([0.001, 0.001])
|
||||
tensor_w_out_i = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_out_j = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_b_in = kp.Tensor([0.0])
|
||||
tensor_b_out = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_l_out = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_m = kp.Tensor([ tensor_y.size() ])
|
||||
|
||||
# We store them in an array for easier interaction
|
||||
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
|
||||
tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
|
||||
|
||||
mgr.eval_tensor_create_def(params)
|
||||
|
||||
# Create a managed sequence
|
||||
sq = mgr.create_sequence()
|
||||
|
||||
# Clear previous operations and begin recording for new operations
|
||||
sq.begin()
|
||||
|
||||
# Record operation to sync memory from local to GPU memory
|
||||
sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
|
||||
|
||||
# Record operation to execute GPU shader against all our parameters
|
||||
sq.record_algo_data(params, compute_shader.to_spirv())
|
||||
|
||||
# Record operation to sync memory from GPU to local memory
|
||||
sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
|
||||
|
||||
# Stop recording operations
|
||||
sq.end()
|
||||
|
||||
ITERATIONS = 100
|
||||
learning_rate = 0.1
|
||||
|
||||
# Perform machine learning training and inference across all input X and Y
|
||||
for i_iter in range(ITERATIONS):
|
||||
|
||||
# Execute an iteration of the algorithm
|
||||
sq.eval()
|
||||
|
||||
# Calculate the parameters based on the respective derivatives calculated
|
||||
for j_iter in range(tensor_b_out.size()):
|
||||
tensor_w_in[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
|
||||
tensor_w_in[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
|
||||
tensor_b_in[0] -= learning_rate * tensor_b_out.data()[j_iter]
|
||||
|
||||
assert tensor_w_in.data()[0] < 0.01
|
||||
assert tensor_w_in.data()[0] > 0.0
|
||||
assert tensor_w_in.data()[1] > 1.5
|
||||
assert tensor_b_in.data()[0] < 0.7
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue