Merge pull request #97 from EthicalML/python_extensions

Python extensions for end to end example
2020-11-11 08:46:46 +00:00 · 2020-11-11 08:46:46 +00:00 · 5a3e4da916
commit 5a3e4da916
parent 90bc86d0eb 67f8adb8e1
7 changed files with 223 additions and 157 deletions
--- a/docs/images/binder-python.jpg
+++ b/docs/images/binder-python.jpg
--- a/docs/overview/python-package.rst
+++ b/docs/overview/python-package.rst
@ -2,13 +2,28 @@
 Python Package Overview
 ========

-This section provides an overview of the Python Package from a functionality perspective. If you wish to see all the classes and their respective functions you can find that in the `Python Class Reference Section <python-reference>`_.
+This section provides an overview of the Python Package from a functionality perspective. If you wish to see all the classes and their respective functions you can find that in the `Python Class Reference Section <python-reference.html>`_.

 Below is a diagram that provides insights on the relationship between Vulkan Kompute objects and Vulkan resources, which primarily encompass ownership of either CPU and/or GPU memory.

 .. image:: ../images/kompute-architecture.jpg
   :width: 70%

+Package Installation 
+^^^^^^^^^
+
+Once you set up the package dependencies, you can install Kompute from ```Pypi``` using ```pip``` by running:
+
+.. code-block:: bash
+
+    pip install kp
+
+You can also install from master branch using:
+
+.. code-block:: python
+
+    pip install git+git://github.com/EthicalML/vulkan-kompute.git@master
+
 Core Python Components
 ^^^^^^^^

@ -272,28 +287,16 @@ Similar to the logistic regression implementation in the C++ examples section, b
    print(tensor_b_in.data())


-Package Installation 
-^^^^^^^^^
-
-The package can be installed through the top level `setup.py` by running:
-
-```
-pip install kp
-```
-
-You can also install from master branch using:
-
-```
-pip install git+git://github.com/EthicalML/vulkan-kompute.git@master
-```
+Log Level Configuration
+^^^^^^

 You can configure log level with the function `kp.log_level` as outlined below.

 The values are TRACE=0, DEBUG=1, INFO=2, WARN=3, ERROR=4. Kompute defaults to INFO.

-```
-import kp
-kp.log_level(1)
-```
+.. code-block:: python
+   :linenos:

+    import kp
+    kp.log_level(1)

--- a/examples/python/README.md
+++ b/examples/python/README.md
@ -0,0 +1,17 @@
+# Kompute Python Example
+
+This folder contains the accompanying code for the article "High Performance Python for GPU Accelerated Machine Learning in Cross-Vendor GPUs".
+
+The easiest way to try this example is by using the [Google Binder Notebook](https://colab.research.google.com/drive/15uQ7qMZuOyk8JcXF-3SB2R5yNFW21I4P), which will allow you to use a GPU for free and runs without much setup.
+
+<a href="https://colab.research.google.com/drive/15uQ7qMZuOyk8JcXF-3SB2R5yNFW21I4P">
+<img src="https://raw.githubusercontent.com/EthicalML/vulkan-kompute/python_extensions/docs/images/binder-python.jpg">
+</a>
+
+Alternatively if you want to test the example yourself locally, you can get setup and started through the following links:
+
+1. Install the [Kompute Python Package](https://kompute.cc/overview/python-package.html#package-installation)
+2. Run the [Array Multiplication Code](https://github.com/EthicalML/vulkan-kompute/blob/python_extensions/python/test/test_array_multiplication.py)
+3. Run the [Logistic Regression Code](https://github.com/EthicalML/vulkan-kompute/blob/python_extensions/python/test/test_logistic_regression.py)
+
+
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@ -39,12 +39,33 @@ PYBIND11_MODULE(kp, m) {
                return std::unique_ptr<kp::Tensor>(new kp::Tensor(data, tensorTypes));
            }), "Initialiser with list of data components and tensor GPU memory type.")
        .def("data", &kp::Tensor::data, DOC(kp, Tensor, data))
-        .def("get", [](kp::Tensor &self, uint32_t index) -> float { return self.data()[index]; },
+        .def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; },
                "When only an index is necessary")
-        .def("set", [](kp::Tensor &self, uint32_t index, float value) {
+        .def("__setitem__", [](kp::Tensor &self, size_t index, float value) {
                self.data()[index] = value; })
-        .def("set", &kp::Tensor::setData, "Overrides the data in the local Tensor memory.")
+        .def("set_data", &kp::Tensor::setData, "Overrides the data in the local Tensor memory.")
+        .def("__iter__", [](kp::Tensor &self) {
+                return py::make_iterator(self.data().begin(), self.data().end());
+            }, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists
+            "Iterator to enable looping within data structure as required.")
+        .def("__contains__", [](kp::Tensor &self, float v) {
+                for (size_t i = 0; i < self.data().size(); ++i) {
+                    if (v == self.data()[i]) {
+                            return true;
+                        }
+                    }
+                return false;
+            })
+        .def("__reversed__", [](kp::Tensor &self) { 
+                size_t size = self.data().size();
+                std::vector<float> reversed(size);
+                for (size_t i = 0; i < size; i++) {
+                    reversed[size - i - 1] = self.data()[i];
+                }
+                return reversed;
+            })
        .def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
+        .def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
        .def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.")
        .def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.")
        .def("map_data_from_host", &kp::Tensor::mapDataFromHostMemory, "Maps data into GPU memory from tensor local data.")
--- a/python/test/test_array_multiplication.py
+++ b/python/test/test_array_multiplication.py
@ -0,0 +1,35 @@
+import pyshader as ps
+import kp
+
+
+def test_array_multiplication():
+
+    # 1. Create Kompute Manager (selects device 0 by default)
+    mgr = kp.Manager()
+
+    # 2. Create Kompute Tensors to hold data
+    tensor_in_a = kp.Tensor([2, 2, 2])
+    tensor_in_b = kp.Tensor([1, 2, 3])
+    tensor_out = kp.Tensor([0, 0, 0])
+
+    # 3. Initialise the Kompute Tensors in the GPU
+    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+
+    # 4. Define the multiplication shader code to run on the GPU
+    @ps.python2shader
+    def compute_shader_multiply(index=("input", "GlobalInvocationId", ps.ivec3),
+                                data1=("buffer", 0, ps.Array(ps.f32)),
+                                data2=("buffer", 1, ps.Array(ps.f32)),
+                                data3=("buffer", 2, ps.Array(ps.f32))):
+        i = index.x
+        data3[i] = data1[i] * data2[i]
+
+    # 5. Run shader code against our previously defined tensors
+    mgr.eval_algo_data_def(
+      [tensor_in_a, tensor_in_b, tensor_out],
+      compute_shader_multiply.to_spirv())
+
+    # 6. Sync tensor data from GPU back to local
+    mgr.eval_tensor_sync_local_def([tensor_out])
+
+    assert tensor_out.data() == [2.0, 4.0, 6.0]
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@ -1,9 +1,6 @@
 import os

-from pyshader import python2shader, f32, ivec3, Array
-from pyshader.stdlib import exp, log
-
-from kp import Tensor, Manager, Sequence
+import kp

 DIRNAME = os.path.dirname(os.path.abspath(__file__))

@ -12,11 +9,11 @@ def test_opmult():
    Test basic OpMult operation
    """

-    tensor_in_a = Tensor([2, 2, 2])
-    tensor_in_b = Tensor([1, 2, 3])
-    tensor_out = Tensor([0, 0, 0])
+    tensor_in_a = kp.Tensor([2, 2, 2])
+    tensor_in_b = kp.Tensor([1, 2, 3])
+    tensor_out = kp.Tensor([0, 0, 0])

-    mgr = Manager()
+    mgr = kp.Manager()

    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])

@ -31,11 +28,11 @@ def test_opalgobase_data():
    Test basic OpAlgoBase operation
    """

-    tensor_in_a = Tensor([2, 2, 2])
-    tensor_in_b = Tensor([1, 2, 3])
-    tensor_out = Tensor([0, 0, 0])
+    tensor_in_a = kp.Tensor([2, 2, 2])
+    tensor_in_b = kp.Tensor([1, 2, 3])
+    tensor_out = kp.Tensor([0, 0, 0])

-    mgr = Manager()
+    mgr = kp.Manager()

    shaderData = """
        #version 450
@ -67,11 +64,11 @@ def test_opalgobase_file():
    Test basic OpAlgoBase operation
    """

-    tensor_in_a = Tensor([2, 2, 2])
-    tensor_in_b = Tensor([1, 2, 3])
-    tensor_out = Tensor([0, 0, 0])
+    tensor_in_a = kp.Tensor([2, 2, 2])
+    tensor_in_b = kp.Tensor([1, 2, 3])
+    tensor_out = kp.Tensor([0, 0, 0])

-    mgr = Manager()
+    mgr = kp.Manager()

    shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")

@ -87,11 +84,11 @@ def test_sequence():
    """
    Test basic OpAlgoBase operation
    """
-    mgr = Manager(0, [2])
+    mgr = kp.Manager(0, [2])

-    tensor_in_a = Tensor([2, 2, 2])
-    tensor_in_b = Tensor([1, 2, 3])
-    tensor_out = Tensor([0, 0, 0])
+    tensor_in_a = kp.Tensor([2, 2, 2])
+    tensor_in_b = kp.Tensor([1, 2, 3])
+    tensor_out = kp.Tensor([0, 0, 0])

    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])

@ -109,118 +106,3 @@ def test_sequence():
    seq.eval()

    assert tensor_out.data() == [2.0, 4.0, 6.0]
-
-def test_pyshader_pyshader():
-
-    @python2shader
-    def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
-                                data1=("buffer", 0, Array(f32)),
-                                data2=("buffer", 1, Array(f32)),
-                                data3=("buffer", 2, Array(f32))):
-        i = index.x
-        data3[i] = data1[i] * data2[i]
-
-    tensor_in_a = Tensor([2, 2, 2])
-    tensor_in_b = Tensor([1, 2, 3])
-    tensor_out = Tensor([0, 0, 0])
-
-    mgr = Manager()
-
-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
-    mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
-    mgr.eval_tensor_sync_local_def([tensor_out])
-
-    assert tensor_out.data() == [2.0, 4.0, 6.0]
-
-def test_logistic_regression_pyshader():
-    @python2shader
-    def compute_shader(
-            index   = ("input", "GlobalInvocationId", ivec3),
-            x_i     = ("buffer", 0, Array(f32)),
-            x_j     = ("buffer", 1, Array(f32)),
-            y       = ("buffer", 2, Array(f32)),
-            w_in    = ("buffer", 3, Array(f32)),
-            w_out_i = ("buffer", 4, Array(f32)),
-            w_out_j = ("buffer", 5, Array(f32)),
-            b_in    = ("buffer", 6, Array(f32)),
-            b_out   = ("buffer", 7, Array(f32)),
-            l_out   = ("buffer", 8, Array(f32)),
-            M       = ("buffer", 9, Array(f32))):
-
-        i = index.x
-
-        m = M[0]
-
-        w_curr = vec2(w_in[0], w_in[1])
-        b_curr = b_in[0]
-
-        x_curr = vec2(x_i[i], x_j[i])
-        y_curr = y[i]
-
-        z_dot = w_curr @ x_curr
-        z = z_dot + b_curr
-        y_hat = 1.0 / (1.0 + exp(-z))
-
-        d_z = y_hat - y_curr
-        d_w = (1.0 / m) * x_curr * d_z
-        d_b = (1.0 / m) * d_z
-
-        loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat)))
-
-        w_out_i[i] = d_w.x
-        w_out_j[i] = d_w.y
-        b_out[i] = d_b
-        l_out[i] = loss
-
-
-    # First we create input and ouput tensors for shader
-    tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
-    tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
-
-    tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
-
-    tensor_w_in = Tensor([0.001, 0.001])
-    tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
-    tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
-
-    tensor_b_in = Tensor([0.0])
-    tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
-
-    tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
-
-    tensor_m = Tensor([ 5.0 ])
-
-    # We store them in an array for easier interaction
-    params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
-        tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
-
-    mgr = Manager()
-
-    mgr.eval_tensor_create_def(params)
-
-    # Record commands for efficient evaluation
-    sq = mgr.create_sequence()
-    sq.begin()
-    sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
-    sq.record_algo_data(params, compute_shader.to_spirv())
-    sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
-    sq.end()
-
-    ITERATIONS = 100
-    learning_rate = 0.1
-
-    # Perform machine learning training and inference across all input X and Y
-    for i_iter in range(ITERATIONS):
-        sq.eval()
-
-        # Calculate the parameters based on the respective derivatives calculated
-        for j_iter in range(tensor_b_out.size()):
-            tensor_w_in.set(0, tensor_w_in.get(0) - learning_rate * tensor_w_out_i.data()[j_iter])
-            tensor_w_in.set(1, tensor_w_in.get(1) - learning_rate * tensor_w_out_j.data()[j_iter])
-            tensor_b_in.set(0, tensor_b_in.get(0) - learning_rate * tensor_b_out.data()[j_iter])
-
-    assert tensor_w_in.data()[0] < 0.01
-    assert tensor_w_in.data()[0] > 0.0
-    assert tensor_w_in.data()[1] > 1.5
-    assert tensor_b_in.data()[0] < 0.7
-
--- a/python/test/test_logistic_regression.py
+++ b/python/test/test_logistic_regression.py
@ -0,0 +1,108 @@
+import pyshader as ps
+import kp
+
+def test_logistic_regression():
+
+    @ps.python2shader
+    def compute_shader(
+            index   = ("input", "GlobalInvocationId", ps.ivec3),
+            x_i     = ("buffer", 0, ps.Array(ps.f32)),
+            x_j     = ("buffer", 1, ps.Array(ps.f32)),
+            y       = ("buffer", 2, ps.Array(ps.f32)),
+            w_in    = ("buffer", 3, ps.Array(ps.f32)),
+            w_out_i = ("buffer", 4, ps.Array(ps.f32)),
+            w_out_j = ("buffer", 5, ps.Array(ps.f32)),
+            b_in    = ("buffer", 6, ps.Array(ps.f32)),
+            b_out   = ("buffer", 7, ps.Array(ps.f32)),
+            l_out   = ("buffer", 8, ps.Array(ps.f32)),
+            M       = ("buffer", 9, ps.Array(ps.f32))):
+
+        i = index.x
+
+        m = M[0]
+
+        w_curr = vec2(w_in[0], w_in[1])
+        b_curr = b_in[0]
+
+        x_curr = vec2(x_i[i], x_j[i])
+        y_curr = y[i]
+
+        z_dot = w_curr @ x_curr
+        z = z_dot + b_curr
+        y_hat = 1.0 / (1.0 + exp(-z))
+
+        d_z = y_hat - y_curr
+        d_w = (1.0 / m) * x_curr * d_z
+        d_b = (1.0 / m) * d_z
+
+        loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat)))
+
+        w_out_i[i] = d_w.x
+        w_out_j[i] = d_w.y
+        b_out[i] = d_b
+        l_out[i] = loss
+
+
+    mgr = kp.Manager(0)
+
+    # First we create input and ouput tensors for shader
+    tensor_x_i = kp.Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
+    tensor_x_j = kp.Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
+
+    tensor_y = kp.Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
+
+    tensor_w_in = kp.Tensor([0.001, 0.001])
+    tensor_w_out_i = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+    tensor_w_out_j = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+
+    tensor_b_in = kp.Tensor([0.0])
+    tensor_b_out = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+
+    tensor_l_out = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+
+    tensor_m = kp.Tensor([ tensor_y.size() ])
+
+    # We store them in an array for easier interaction
+    params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
+        tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
+
+    mgr.eval_tensor_create_def(params)
+
+    # Create a managed sequence
+    sq = mgr.create_sequence()
+
+    # Clear previous operations and begin recording for new operations
+    sq.begin()
+
+    # Record operation to sync memory from local to GPU memory
+    sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
+
+    # Record operation to execute GPU shader against all our parameters
+    sq.record_algo_data(params, compute_shader.to_spirv())
+
+    # Record operation to sync memory from GPU to local memory
+    sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
+
+    # Stop recording operations
+    sq.end()
+
+    ITERATIONS = 100
+    learning_rate = 0.1
+
+    # Perform machine learning training and inference across all input X and Y
+    for i_iter in range(ITERATIONS):
+
+        # Execute an iteration of the algorithm
+        sq.eval()
+
+        # Calculate the parameters based on the respective derivatives calculated
+        for j_iter in range(tensor_b_out.size()):
+            tensor_w_in[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
+            tensor_w_in[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
+            tensor_b_in[0] -= learning_rate * tensor_b_out.data()[j_iter]
+
+    assert tensor_w_in.data()[0] < 0.01
+    assert tensor_w_in.data()[0] > 0.0
+    assert tensor_w_in.data()[1] > 1.5
+    assert tensor_b_in.data()[0] < 0.7
+