Updated examples in readme

2021-02-28 15:53:09 +00:00 · 2021-02-28 15:53:09 +00:00 · ddb77702ee
commit ddb77702ee
parent 38f356fdae
3 changed files with 221 additions and 36 deletions
--- a/README.md
+++ b/README.md
@ -56,35 +56,65 @@ int main() {
    // 2. Create and initialise Kompute Tensors through manager
    auto tensorInA = mgr.tensor({ 2., 2., 2. });
    auto tensorInB = mgr.tensor({ 1., 2., 3. });
-    auto tensorOut = mgr.tensor({ 0., 0., 0. });
+    auto tensorOutA = mgr.tensor({ 0., 0., 0. });
+    auto tensorOutB = mgr.tensor({ 0., 0., 0. });

-    // 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path)
-    std::string shaderString = (R"(
+    std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
+
+    // 3. Create algorithm based on shader (supports buffers & push/spec constants)
+    std::string shader = (R"(
        #version 450

        layout (local_size_x = 1) in;

        // The input tensors bind index is relative to index in parameter passed
-        layout(set = 0, binding = 0) buffer bina { float tina[]; };
-        layout(set = 0, binding = 1) buffer binb { float tinb[]; };
-        layout(set = 0, binding = 2) buffer bout { float tout[]; };
+        layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
+        layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
+        layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
+        layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
+
+        // Kompute supports push constants updated on dispatch
+        layout(push_constant) uniform PushConstants {
+            float val;
+        } push_const;
+
+        // Kompute also supports spec constants on initalization
+        layout(constant_id = 0) const float const_one = 0;

        void main() {
            uint index = gl_GlobalInvocationID.x;
-            tout[index] = tina[index] * tinb[index];
+            out_a[index] += in_a[index] * in_b[index];
+            out_b[index] += const_one * push_const.val;
        }
    )");

-    // 3. Run operation with string shader synchronously
-    mgr.evalOpDefault<kp::OpAlgoBase>(
-        { tensorInA, tensorInB, tensorOut },
-        kp::Shader::compile_source(shaderString));
+    kp::Workgroup workgroup({3, 1, 1});
+    kp::Constants specConsts({ 2 });

-    // 4. Map results back from GPU memory to print the results
-    mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorInA, tensorInB, tensorOut });
+    auto algorithm = mgr.algorithm(params, kp::Shader::compile_source(shader), workgroup, specConsts);

-    // Prints the output which is Output: { 2, 4, 6 }
-    for (const float& elem : tensorOut->data()) std::cout << elem << "  ";
+    kp::Constants pushConstsA({ 2.0 });
+    kp::Constants pushConstsB({ 3.0 });
+
+    // 4. Run operation synchronously using sequence
+    mgr.sequence()
+        ->record<kp::OpTensorSyncDevice>(params)
+        ->record<kp::OpAlgoDispatch>(algorithm, pushConstsA)
+        ->record<kp::OpAlgoDispatch>(algorithm, pushConstsB)
+        ->eval();
+
+    // 5. Sync results from the GPU asynchronously
+    sq = mgr.sequence()
+    sq->evalAsync<kp::OpTensorSyncLocal>(params);
+
+    // ... Do other work asynchronously whilst GPU finishes
+
+    sq->evalAwait();
+
+    // Prints the first output which is: { 4, 8, 12 }
+    for (const float& elem : tensorOutA->data()) std::cout << elem << "  ";
+    // Prints the second output which is: { 10, 10, 10 }
+    for (const float& elem : tensorOutB->data()) std::cout << elem << "  ";
 }

 ```
@ -94,34 +124,72 @@ int main() {
 The [Python package](https://kompute.cc/overview/python-package.html) provides a [high level interactive interface](https://kompute.cc/overview/python-reference.html) that enables for experimentation whilst ensuring high performance and fast development workflows.

 ```python
+
 # 1. Create Kompute Manager with default settings (device 0 and first compute compatible queue)
-mgr = Manager()
+mgr = kp.Manager()

-# 2. Create and initialise Kompute Tensors (can be initialized with List[] or np.Array)
-tensor_in_a = Tensor([2, 2, 2])
-tensor_in_b = Tensor([1, 2, 3])
-tensor_out = Tensor([0, 0, 0])
+# 2. Create and initialise Kompute Tensors through manager
+tensor_in_a = mgr.tensor([2, 2, 2])
+tensor_in_b = mgr.tensor([1, 2, 3])
+tensor_out_a = mgr.tensor([0, 0, 0])
+tensor_out_b = mgr.tensor([0, 0, 0])

-mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]

-# 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path)
-@python2shader
-def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
-                            data1=("buffer", 0, Array(f32)),
-                            data2=("buffer", 1, Array(f32)),
-                            data3=("buffer", 2, Array(f32))):
-    i = index.x
-    data3[i] = data1[i] * data2[i]
+# 3. Create algorithm based on shader (supports buffers & push/spec constants)
+shader = """
+    #version 450

-# 4. Run multiplication operation synchronously
-mgr.eval_algo_data_def(
-    [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
+    layout (local_size_x = 1) in;

-# 5. Map results back from GPU memory to print the results
-mgr.eval_tensor_sync_local_def([tensor_out])
+    // The input tensors bind index is relative to index in parameter passed
+    layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
+    layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
+    layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
+    layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
+
+    // Kompute supports push constants updated on dispatch
+    layout(push_constant) uniform PushConstants {
+        float val;
+    } push_const;
+
+    // Kompute also supports spec constants on initalization
+    layout(constant_id = 0) const float const_one = 0;
+
+    void main() {
+        uint index = gl_GlobalInvocationID.x;
+        out_a[index] += in_a[index] * in_b[index];
+        out_b[index] += const_one * push_const.val;
+    }
+"""
+
+workgroup = (3, 1, 1)
+spec_consts = [2]
+push_consts_a = [2]
+push_consts_b = [3]
+
+algo = mgr.algorithm(params, kp.Shader.compile_source(shader), workgroup, spec_consts)
+
+# 4. Run operation synchronously using sequence
+(mgr.sequence()
+    .record(kp.OpTensorSyncDevice(params))
+    .record(kp.OpAlgoDispatch(algo, push_consts_a))
+    .record(kp.OpAlgoDispatch(algo, push_consts_b))
+    .eval())
+
+# 5. Sync results from the GPU asynchronously
+sq = mgr.sequence()
+sq.eval_async(kp.OpTensorSyncLocal(params))
+
+# ... Do other work asynchronously whilst GPU finishes
+
+sq.eval_await()
+
+# Prints the first output which is: { 4, 8, 12 }
+print(tensor_out_a)
+# Prints the first output which is: { 10, 10, 10 }
+print(tensor_out_b)

-# Prints [2.0, 4.0, 6.0]
-print(tensor_out.data())
 ```

 ### Interactive Notebooks & Hands on Videos
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@ -30,6 +30,63 @@ kp_log = logging.getLogger("kp")
 #
 #    assert tensor_out.data() == [2.0, 4.0, 6.0]

+def test_end_to_end():
+
+    mgr = kp.Manager()
+
+    tensor_in_a = mgr.tensor([2, 2, 2])
+    tensor_in_b = mgr.tensor([1, 2, 3])
+    tensor_out_a = mgr.tensor([0, 0, 0])
+    tensor_out_b = mgr.tensor([0, 0, 0])
+
+    params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
+
+    shader = """
+        #version 450
+
+        layout (local_size_x = 1) in;
+
+        // The input tensors bind index is relative to index in parameter passed
+        layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
+        layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
+        layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
+        layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
+
+        // Kompute supports push constants updated on dispatch
+        layout(push_constant) uniform PushConstants {
+            float val;
+        } push_const;
+
+        // Kompute also supports spec constants on initalization
+        layout(constant_id = 0) const float const_one = 0;
+
+        void main() {
+            uint index = gl_GlobalInvocationID.x;
+            out_a[index] += in_a[index] * in_b[index];
+            out_b[index] += const_one * push_const.val;
+        }
+    """
+
+    workgroup = (3, 1, 1)
+    spec_consts = [2]
+    push_consts_a = [2]
+    push_consts_b = [3]
+
+    algo = mgr.algorithm(params, kp.Shader.compile_source(shader), workgroup, spec_consts)
+
+    (mgr.sequence()
+        .record(kp.OpTensorSyncDevice(params))
+        .record(kp.OpAlgoDispatch(algo, push_consts_a))
+        .record(kp.OpAlgoDispatch(algo, push_consts_b))
+        .eval())
+
+    sq = mgr.sequence()
+    sq.eval_async(kp.OpTensorSyncLocal(params))
+
+    sq.eval_await()
+
+    assert tensor_out_a.data().tolist() == [4, 8, 12]
+    assert tensor_out_b.data().tolist() == [10, 10, 10]


 def test_shader_str():
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@ -3,6 +3,66 @@

 #include "kompute/Kompute.hpp"

+TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) {
+
+    kp::Manager mgr; 
+
+    auto tensorInA = mgr.tensor({ 2., 2., 2. });
+    auto tensorInB = mgr.tensor({ 1., 2., 3. });
+    auto tensorOutA = mgr.tensor({ 0., 0., 0. });
+    auto tensorOutB = mgr.tensor({ 0., 0., 0. });
+
+    std::string shader = (R"(
+        #version 450
+
+        layout (local_size_x = 1) in;
+
+        // The input tensors bind index is relative to index in parameter passed
+        layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
+        layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
+        layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
+        layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
+
+        // Kompute supports push constants updated on dispatch
+        layout(push_constant) uniform PushConstants {
+            float val;
+        } push_const;
+
+        // Kompute also supports spec constants on initalization
+        layout(constant_id = 0) const float const_one = 0;
+
+        void main() {
+            uint index = gl_GlobalInvocationID.x;
+            out_a[index] += in_a[index] * in_b[index];
+            out_b[index] += const_one * push_const.val;
+        }
+    )");
+
+    std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
+
+    kp::Workgroup workgroup({3, 1, 1});
+    kp::Constants specConsts({ 2 });
+    kp::Constants pushConstsA({ 2.0 });
+    kp::Constants pushConstsB({ 3.0 });
+
+    auto algorithm = mgr.algorithm(params, kp::Shader::compile_source(shader), workgroup, specConsts);
+
+    // 3. Run operation with string shader synchronously
+    mgr.sequence()
+        ->record<kp::OpTensorSyncDevice>(params)
+        ->record<kp::OpAlgoDispatch>(algorithm, pushConstsA)
+        ->record<kp::OpAlgoDispatch>(algorithm, pushConstsB)
+        ->eval();
+
+    auto sq = mgr.sequence();
+    sq->evalAsync<kp::OpTensorSyncLocal>(params);
+
+    sq->evalAwait();
+
+    EXPECT_EQ(tensorOutA->data(), std::vector<float>({ 4, 8, 12 }));
+    EXPECT_EQ(tensorOutB->data(), std::vector<float>({ 10, 10, 10 }));
+}
+
 TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
 {