diff --git a/examples/python_naive_matmul/README.md b/examples/python_naive_matmul/README.md
new file mode 100644
index 000000000..26a89172b
--- /dev/null
+++ b/examples/python_naive_matmul/README.md
@@ -0,0 +1,9 @@
+# Naive Matmul Implementation
+
+This demonstrate a basic matmul implementation using Python and vulkan-kompute
+
+To test the implementation simply run the `matmul.py` script :
+
+```
+python matmul.py
+```
diff --git a/examples/python_naive_matmul/matmul.py b/examples/python_naive_matmul/matmul.py
new file mode 100644
index 000000000..1e7caa871
--- /dev/null
+++ b/examples/python_naive_matmul/matmul.py
@@ -0,0 +1,60 @@
+import kp
+import numpy as np
+
+
+def main():
+    mgr = kp.Manager()
+
+    tensor_size = 4
+    tensor_shape = [tensor_size, tensor_size]
+    tensor_in_1 = mgr.tensor(np.triu(np.ones(tensor_shape)))
+    tensor_in_2 = mgr.tensor(np.triu(np.ones(tensor_shape)))
+    tensor_out = mgr.tensor(np.zeros(tensor_shape))
+
+    print(f'Input tensors:\n'
+          f'{tensor_in_1.data().reshape(tensor_shape)}\n'
+          f'{tensor_in_2.data().reshape(tensor_shape)}\n')
+
+    params = [tensor_in_1, tensor_in_2, tensor_out]
+
+    matmul_shader = kp.Shader.compile_source('''
+#version 450
+
+layout (local_size_x = 1, local_size_y = 1) in;
+
+layout (set = 0, binding = 0) readonly buffer buf_in_tensor_1 { float in_tensor_1[]; };
+layout (set = 0, binding = 1) readonly buffer buf_in_tensor_2 { float in_tensor_2[]; };
+layout (set = 0, binding = 2) writeonly buffer buf_out_tensor { float out_tensor[]; };
+
+layout (constant_id = 0) const float tensor_size_f = 0;
+
+
+void main()
+{
+    uint globalRow = gl_GlobalInvocationID.x;
+    uint globalCol = gl_GlobalInvocationID.y;
+    uint tensor_size = uint(tensor_size_f);
+    float acc = 0.0;
+    for(uint k = 0u; k < tensor_size; k++)
+        acc += in_tensor_1[(k * tensor_size) + globalRow] * in_tensor_2[(globalCol * tensor_size) + k];
+    out_tensor[(globalCol * tensor_size) + globalRow] = acc;
+}''')
+
+    algo = mgr.algorithm(
+        params,  # params
+        matmul_shader,  # spirv
+        (*tensor_shape, 1),  # workgroup
+        [float(tensor_size)],  # spec_consts
+        [])  # push_consts
+
+    (mgr.sequence()
+     .record(kp.OpTensorSyncDevice(params))
+     .record(kp.OpAlgoDispatch(algo))
+     .record(kp.OpTensorSyncLocal(params))
+     .eval())
+
+    print(f'Output :\n{tensor_out.data().reshape(tensor_shape)}')
+
+
+if __name__ == '__main__':
+    main()