Updated examples to new interface

This commit is contained in:
Alejandro Saucedo 2021-03-01 21:13:08 +00:00
parent f163aaf5e8
commit 7f686b47da
6 changed files with 410 additions and 607 deletions

View file

@ -14,17 +14,19 @@ Then you can interact with it from your interpreter. Below is the same sample as
.. code-block:: python
:linenos:
from kp import Manager, Tensor
from kp import Manager, Tensor, OpTensorSyncDevice, OpTensorSyncLocal, OpAlgoDispatch
from pyshader import python2shader, ivec3, f32, Array
mgr = Manager()
# Can be initialized with List[] or np.Array
tensor_in_a = Tensor([2, 2, 2])
tensor_in_b = Tensor([1, 2, 3])
tensor_out = Tensor([0, 0, 0])
tensor_in_a = mgr.tensor([2, 2, 2])
tensor_in_b = mgr.tensor([1, 2, 3])
tensor_out = mgr.tensor([0, 0, 0])
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
sq = mgr.sequence()
sq.eval(OpTensorSyncLocal([tensor_in_a, tensor_in_b, tensor_out]))
# Define the function via PyShader or directly as glsl string or spirv bytes
@python2shader
@ -35,15 +37,13 @@ Then you can interact with it from your interpreter. Below is the same sample as
i = index.x
data3[i] = data1[i] * data2[i]
algo = mgr.algorithm([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
# Run shader operation synchronously
mgr.eval_algo_data_def(
[tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
sq.eval(OpAlgoDispatch(algo))
sq.eval(OpAlgoSyncLocal([tensor_out]))
mgr.eval_await_def()
mgr.eval_tensor_sync_local_def([tensor_out])
assert tensor_out.data() == [2.0, 4.0, 6.0]
assert tensor_out.data().tolist() == [2.0, 4.0, 6.0]
Python Example (Extended)
@ -55,6 +55,7 @@ Similarly you can find the same extended example as above:
:linenos:
from kp import Manager, Tensor
import kp
from pyshader import python2shader, ivec3, f32, Array
mgr = Manager(0, [2])
@ -77,20 +78,19 @@ Similarly you can find the same extended example as above:
i = index.x
data3[i] = data1[i] * data2[i]
# Run shader operation asynchronously and then await
mgr.eval_async_algo_data_def(
[tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
mgr.eval_await_def()
algo = mgr.algorithm([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
seq.begin()
seq.record_tensor_sync_local([tensor_in_a])
seq.record_tensor_sync_local([tensor_in_b])
seq.record_tensor_sync_local([tensor_out])
seq.end()
# Run shader operation asynchronously and then await
mgr.eval_async(kp.OpAlgoDispatch(algo)))
mgr.eval_await()
seq.record(kp.OpTensorSyncLocal([tensor_in_a]))
seq.record(kp.OpTensorSyncLocal([tensor_in_b]))
seq.record(kp.OpTensorSyncLocal([tensor_out]))
seq.eval()
assert tensor_out.data() == [2.0, 4.0, 6.0]
assert tensor_out.data().tolist() == [2.0, 4.0, 6.0]
Kompute Operation Capabilities
^^^^^
@ -101,33 +101,29 @@ Handling multiple capabilites of processing can be done by compute shaders being
:linenos:
from kp import Manager
import kp
# We'll assume we have the shader data available
from my_spv_shader_data import mult_shader, sum_shader
mgr = Manager()
t1 = mgr.build_tensor([2,2,2])
t2 = mgr.build_tensor([1,2,3])
t3 = mgr.build_tensor([1,2,3])
t1 = mgr.tensor([2,2,2])
t2 = mgr.tensor([1,2,3])
t3 = mgr.tensor([1,2,3])
mgr.sequence().eval(kp.OpTensorSyncLocal([t1, t3]))
# Create multiple separate sequences
sq_mult = mgr.create_sequence("SQ_MULT")
sq_sum = mgr.create_sequence("SQ_SUM")
sq_sync = mgr.create_sequence("SQ_SYNC")
sq_mult = mgr.sequence()
sq_sum = mgr.sequence()
sq_sync = mgr.sequence()
# Initialize sq_mult
sq_mult.begin()
sq_mult.record_algo_data([t1, t2, t3], add_shader)
sq_mult.end()
sq_mult.record(kp.OpAlgoDispatch(mgr.algorithm([t1, t2, t3], add_shader))
sq_sum.begin()
sq_sum.record_algo_data([t3, t2, t1], sum_shader)
sq_sum.end()
sq_sum.record(kp.OpAlgoDispatch(mgr.algorithm([t3, t2, t1], sum_shader))
sq_sync.begin()
sq_sync.record_tensor_sync_local([t1, t3])
sq_sync.end()
sq_sync.record(kp.OpTensorSyncLocal([t1, t3]))
# Run multiple iterations
for i in range(10):
@ -147,6 +143,7 @@ Similar to the logistic regression implementation in the C++ examples section, b
:linenos:
from kp import Manager, Tensor
import kp
from pyshader import python2shader, ivec3, f32, Array
@python2shader
@ -189,38 +186,37 @@ Similar to the logistic regression implementation in the C++ examples section, b
l_out[i] = loss
mgr = Manager()
# First we create input and ouput tensors for shader
tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
tensor_x_i = mgr.tensor([0.0, 1.0, 1.0, 1.0, 1.0])
tensor_x_j = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
tensor_y = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
tensor_w_in = Tensor([0.001, 0.001])
tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_w_in = mgr.tensor([0.001, 0.001])
tensor_w_out_i = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_w_out_j = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_b_in = Tensor([0.0])
tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_b_in = mgr.tensor([0.0])
tensor_b_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_l_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_m = Tensor([ 5.0 ])
tensor_m = mgr.tensor([ 5.0 ])
# We store them in an array for easier interaction
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
mgr = Manager()
mgr.eval_tensor_create_def(params)
sq.sequence().eval(kp.OpTensorSyncDevice(params))
# Record commands for efficient evaluation
sq = mgr.create_sequence()
sq.begin()
sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
sq.record_algo_data(params, compute_shader.to_spirv())
sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
sq.end()
sq = mgr.sequence()
sq.record(kp.OpTensorSyncDevice([tensor_w_in, tensor_b_in]))
sq.record(kp.OpAlgoDispatch(mgr.algorithm(params, compute_shader.to_spirv())))
sq.record(kp.OpTensorSyncLocal([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]))
ITERATIONS = 100
learning_rate = 0.1