Merge pull request #91 from EthicalML/python_shader_extension
Python shader extension
This commit is contained in:
commit
de6ddd6740
8 changed files with 1277 additions and 43 deletions
29
README.md
29
README.md
|
|
@ -306,8 +306,18 @@ tensor_out = Tensor([0, 0, 0])
|
|||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
shaderFilePath = "shaders/glsl/opmult.comp"
|
||||
mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
|
||||
# Define the function via PyShader or directly as glsl string or spirv bytes
|
||||
@python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
|
||||
data1=("buffer", 0, Array(f32)),
|
||||
data2=("buffer", 1, Array(f32)),
|
||||
data3=("buffer", 2, Array(f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
|
||||
# Run shader operation synchronously
|
||||
mgr.eval_algo_data_def(
|
||||
[tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
|
||||
|
||||
# Alternatively can pass raw string/bytes:
|
||||
# shaderFileData = """ shader code here... """
|
||||
|
|
@ -332,13 +342,22 @@ tensor_in_a = Tensor([2, 2, 2])
|
|||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
|
||||
shaderFilePath = "../../shaders/glsl/opmult.comp"
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
seq = mgr.create_sequence("op")
|
||||
|
||||
mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
|
||||
# Define the function via PyShader or directly as glsl string or spirv bytes
|
||||
@python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
|
||||
data1=("buffer", 0, Array(f32)),
|
||||
data2=("buffer", 1, Array(f32)),
|
||||
data3=("buffer", 2, Array(f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
|
||||
# Run shader operation asynchronously and then await
|
||||
mgr.eval_async_algo_data_def(
|
||||
[tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
|
||||
mgr.eval_await_def()
|
||||
|
||||
seq.begin()
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ Below is a diagram that provides insights on the relationship between Vulkan Kom
|
|||
.. image:: ../images/kompute-architecture.jpg
|
||||
:width: 70%
|
||||
|
||||
Python Components
|
||||
Core Python Components
|
||||
^^^^^^^^
|
||||
|
||||
The Python package exposes three main classes:
|
||||
|
|
@ -30,7 +30,89 @@ More specifically, it can be through the following functions:
|
|||
* mgr.eval_async_<opname>_def - Runs operation asynchronously under a new anonymous sequence
|
||||
* seq.record_<opname> - Records operation in sequence (requires sequence to be in recording mode)
|
||||
|
||||
You can see these operations being used in the `Simple Python example <https://kompute.cc/index.html#python-example-simple>`_ and in the `Extended Python Example <https://kompute.cc/index.html#python-example-extended>`_.
|
||||
Python Example (Simple)
|
||||
^^^^^
|
||||
|
||||
Then you can interact with it from your interpreter. Below is the same sample as above "Your First Kompute (Simple Version)" but in Python:
|
||||
|
||||
.. code-block:: python
|
||||
:linenos:
|
||||
|
||||
mgr = Manager()
|
||||
|
||||
# Can be initialized with List[] or np.Array
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
# Define the function via PyShader or directly as glsl string or spirv bytes
|
||||
@python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
|
||||
data1=("buffer", 0, Array(f32)),
|
||||
data2=("buffer", 1, Array(f32)),
|
||||
data3=("buffer", 2, Array(f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
|
||||
# Run shader operation synchronously
|
||||
mgr.eval_algo_data_def(
|
||||
[tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
|
||||
|
||||
# Alternatively can pass raw string/bytes:
|
||||
# shaderFileData = """ shader code here... """
|
||||
# mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderFileData))
|
||||
|
||||
mgr.eval_await_def()
|
||||
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
|
||||
Python Example (Extended)
|
||||
^^^^^
|
||||
|
||||
Similarly you can find the same extended example as above:
|
||||
|
||||
.. code-block:: python
|
||||
:linenos:
|
||||
|
||||
mgr = Manager(0, [2])
|
||||
|
||||
# Can be initialized with List[] or np.Array
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
seq = mgr.create_sequence("op")
|
||||
|
||||
# Define the function via PyShader or directly as glsl string or spirv bytes
|
||||
@python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
|
||||
data1=("buffer", 0, Array(f32)),
|
||||
data2=("buffer", 1, Array(f32)),
|
||||
data3=("buffer", 2, Array(f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
|
||||
# Run shader operation asynchronously and then await
|
||||
mgr.eval_async_algo_data_def(
|
||||
[tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
|
||||
mgr.eval_await_def()
|
||||
|
||||
seq.begin()
|
||||
seq.record_tensor_sync_local([tensor_in_a])
|
||||
seq.record_tensor_sync_local([tensor_in_b])
|
||||
seq.record_tensor_sync_local([tensor_out])
|
||||
seq.end()
|
||||
|
||||
seq.eval()
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
Kompute Operation Capabilities
|
||||
^^^^^
|
||||
|
|
@ -38,7 +120,8 @@ Kompute Operation Capabilities
|
|||
Handling multiple capabilites of processing can be done by compute shaders being loaded into separate sequences. The example below shows how this can be done:
|
||||
|
||||
.. code-block:: python
|
||||
:linenos:
|
||||
:linenos:
|
||||
|
||||
from kp import Manager
|
||||
|
||||
# We'll assume we have the shader data available
|
||||
|
|
@ -77,6 +160,117 @@ Handling multiple capabilites of processing can be done by compute shaders being
|
|||
|
||||
print(t1.data(), t2.data(), t3.data())
|
||||
|
||||
Machine Learning Logistic Regression Implementation
|
||||
^^^^^^
|
||||
|
||||
Similar to the logistic regression implementation in the C++ examples section, below you can find the Python implementation of the Logistic Regression algorithm.
|
||||
|
||||
.. code-block:: python
|
||||
:linenos:
|
||||
|
||||
@python2shader
|
||||
def compute_shader(
|
||||
index = ("input", "GlobalInvocationId", ivec3),
|
||||
x_i = ("buffer", 0, Array(f32)),
|
||||
x_j = ("buffer", 1, Array(f32)),
|
||||
y = ("buffer", 2, Array(f32)),
|
||||
w_in = ("buffer", 3, Array(f32)),
|
||||
w_out_i = ("buffer", 4, Array(f32)),
|
||||
w_out_j = ("buffer", 5, Array(f32)),
|
||||
b_in = ("buffer", 6, Array(f32)),
|
||||
b_out = ("buffer", 7, Array(f32)),
|
||||
l_out = ("buffer", 8, Array(f32)),
|
||||
M = ("buffer", 9, Array(f32))):
|
||||
|
||||
i = index.x
|
||||
|
||||
m = M[0]
|
||||
|
||||
w_curr = vec2(w_in[0], w_in[1])
|
||||
b_curr = b_in[0]
|
||||
|
||||
x_curr = vec2(x_i[i], x_j[i])
|
||||
y_curr = y[i]
|
||||
|
||||
z_dot = w_curr @ x_curr
|
||||
z = z_dot + b_curr
|
||||
y_hat = 1.0 / (1.0 + exp(-z))
|
||||
|
||||
d_z = y_hat - y_curr
|
||||
d_w = (1.0 / m) * x_curr * d_z
|
||||
d_b = (1.0 / m) * d_z
|
||||
|
||||
loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat)))
|
||||
|
||||
w_out_i[i] = d_w.x
|
||||
w_out_j[i] = d_w.y
|
||||
b_out[i] = d_b
|
||||
l_out[i] = loss
|
||||
|
||||
|
||||
# First we create input and ouput tensors for shader
|
||||
tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
|
||||
tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_w_in = Tensor([0.001, 0.001])
|
||||
tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_b_in = Tensor([0.0])
|
||||
tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_m = Tensor([ 5.0 ])
|
||||
|
||||
# We store them in an array for easier interaction
|
||||
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
|
||||
tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
|
||||
|
||||
mgr = Manager()
|
||||
|
||||
mgr.eval_tensor_create_def(params)
|
||||
|
||||
# Record commands for efficient evaluation
|
||||
sq = mgr.create_sequence()
|
||||
sq.begin()
|
||||
sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
|
||||
sq.record_algo_data(params, compute_shader.to_spirv())
|
||||
sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
|
||||
sq.end()
|
||||
|
||||
ITERATIONS = 100
|
||||
learning_rate = 0.1
|
||||
|
||||
# Perform machine learning training and inference across all input X and Y
|
||||
for i_iter in range(ITERATIONS):
|
||||
sq.eval()
|
||||
|
||||
# Calculate the parameters based on the respective derivatives calculated
|
||||
w_in_i_val = tensor_w_in.data()[0]
|
||||
w_in_j_val = tensor_w_in.data()[1]
|
||||
b_in_val = tensor_b_in.data()[0]
|
||||
|
||||
for j_iter in range(tensor_b_out.size()):
|
||||
w_in_i_val -= learning_rate * tensor_w_out_i.data()[j_iter]
|
||||
w_in_j_val -= learning_rate * tensor_w_out_j.data()[j_iter]
|
||||
b_in_val -= learning_rate * tensor_b_out.data()[j_iter]
|
||||
|
||||
# Update the parameters to process inference again
|
||||
tensor_w_in.set_data([w_in_i_val, w_in_j_val])
|
||||
tensor_b_in.set_data([b_in_val])
|
||||
|
||||
assert tensor_w_in.data()[0] < 0.01
|
||||
assert tensor_w_in.data()[0] > 0.0
|
||||
assert tensor_w_in.data()[1] > 1.5
|
||||
assert tensor_b_in.data()[0] < 0.7
|
||||
|
||||
# Print outputs
|
||||
print(tensor_w_in.data())
|
||||
print(tensor_b_in.data())
|
||||
|
||||
|
||||
Package Installation
|
||||
^^^^^^^^^
|
||||
|
|
|
|||
|
|
@ -6,9 +6,6 @@ Python Class Documentation & Reference
|
|||
This section provides a breakdown of the Python classes and what each of their functions provide.
|
||||
Below is a diagram that provides insights on the relationship between Vulkan Kompute objects and Vulkan resources, which primarily encompass ownership of either CPU and/or GPU memory.
|
||||
|
||||
.. image:: ../images/kompute-architecture.jpg
|
||||
:width: 70%
|
||||
|
||||
Manager
|
||||
-------
|
||||
|
||||
|
|
|
|||
846
python/src/docstrings.hpp
Normal file
846
python/src/docstrings.hpp
Normal file
|
|
@ -0,0 +1,846 @@
|
|||
/*
|
||||
This file contains docstrings for use in the Python bindings.
|
||||
Do not edit! They were automatically extracted by pybind11_mkdoc.
|
||||
*/
|
||||
|
||||
#define __EXPAND(x) x
|
||||
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
|
||||
#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
|
||||
#define __CAT1(a, b) a ## b
|
||||
#define __CAT2(a, b) __CAT1(a, b)
|
||||
#define __DOC1(n1) __doc_##n1
|
||||
#define __DOC2(n1, n2) __doc_##n1##_##n2
|
||||
#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
|
||||
#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
|
||||
#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
|
||||
#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
|
||||
#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
|
||||
#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
|
||||
|
||||
#if defined(__GNUG__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
#endif
|
||||
|
||||
|
||||
static const char *__doc_kp_Algorithm =
|
||||
R"doc(Abstraction for compute shaders that are run on top of tensors grouped
|
||||
via ParameterGroups (which group descriptorsets))doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_Algorithm =
|
||||
R"doc(Base constructor for Algorithm. Should not be used unless explicit
|
||||
intended.)doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_Algorithm_2 =
|
||||
R"doc(Default constructor for Algorithm
|
||||
|
||||
@param device The Vulkan device to use for creating resources @param
|
||||
commandBuffer The vulkan command buffer to bind the pipeline and
|
||||
shaders)doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_createDescriptorPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_createParameters = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_createPipeline = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_createShaderModule = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_init =
|
||||
R"doc(Initialiser for the shader data provided to the algorithm as well as
|
||||
tensor parameters that will be used in shader.
|
||||
|
||||
@param shaderFileData The bytes in spir-v format of the shader
|
||||
@tensorParams The Tensors to be used in the Algorithm / shader for
|
||||
processing)doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mCommandBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mDescriptorPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mDescriptorSet = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mDescriptorSetLayout = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mFreeDescriptorPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mFreeDescriptorSet = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mFreeDescriptorSetLayout = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mFreePipeline = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mFreePipelineCache = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mFreePipelineLayout = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mFreeShaderModule = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mPipeline = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mPipelineCache = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mPipelineLayout = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_mShaderModule = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_recordDispatch =
|
||||
R"doc(Records the dispatch function with the provided template parameters or
|
||||
alternatively using the size of the tensor by default.
|
||||
|
||||
@param x Layout X dispatch value @param y Layout Y dispatch value
|
||||
@param z Layout Z dispatch value)doc";
|
||||
|
||||
static const char *__doc_kp_Manager =
|
||||
R"doc(Base orchestrator which creates and manages device and child
|
||||
components)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_Manager =
|
||||
R"doc(Base constructor and default used which creates the base resources
|
||||
including choosing the device 0 by default.)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_Manager_2 =
|
||||
R"doc(Similar to base constructor but allows the user to provide the device
|
||||
they would like to create the resources on.
|
||||
|
||||
@param physicalDeviceIndex The index of the physical device to use
|
||||
@param familyQueueIndices (Optional) List of queue indices to add for
|
||||
explicit allocation @param totalQueues The total number of compute
|
||||
queues to create.)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_Manager_3 =
|
||||
R"doc(Manager constructor which allows your own vulkan application to
|
||||
integrate with the vulkan kompute use.
|
||||
|
||||
@param instance Vulkan compute instance to base this application
|
||||
@param physicalDevice Vulkan physical device to use for application
|
||||
@param device Vulkan logical device to use for all base resources
|
||||
@param physicalDeviceIndex Index for vulkan physical device used)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_buildTensor =
|
||||
R"doc(Function that simplifies the common workflow of tensor creation and
|
||||
initialization. It will take the constructor parameters for a Tensor
|
||||
and will will us it to create a new Tensor and then create it using
|
||||
the OpCreateTensor command.
|
||||
|
||||
@param data The data to initialize the tensor with @param tensorType
|
||||
The type of tensor to initialize @returns Initialized Tensor with
|
||||
memory Syncd to GPU device)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_createDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_createInstance = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_createManagedSequence =
|
||||
R"doc(Create a new managed Kompute sequence so it's available within the
|
||||
manager.
|
||||
|
||||
@param sequenceName The name for the named sequence to be created, if
|
||||
empty then default indexed value is used @param queueIndex The queue
|
||||
to use from the available queues @return Weak pointer to the manager
|
||||
owned sequence resource)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_evalOp =
|
||||
R"doc(Function that evaluates operation against named sequence.
|
||||
|
||||
@param tensors The tensors to be used in the operation recorded @param
|
||||
sequenceName The name of the sequence to be retrieved or created
|
||||
@param TArgs Template parameters that will be used to initialise
|
||||
Operation to allow for extensible configurations on initialisation)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_evalOpAsync =
|
||||
R"doc(Function that evaluates operation against named sequence
|
||||
asynchronously.
|
||||
|
||||
@param tensors The tensors to be used in the operation recorded @param
|
||||
sequenceName The name of the sequence to be retrieved or created
|
||||
@param params Template parameters that will be used to initialise
|
||||
Operation to allow for extensible configurations on initialisation)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_evalOpAsyncDefault =
|
||||
R"doc(Operation that evaluates operation against default sequence
|
||||
asynchronously.
|
||||
|
||||
@param tensors The tensors to be used in the operation recorded @param
|
||||
params Template parameters that will be used to initialise Operation
|
||||
to allow for extensible configurations on initialisation)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_evalOpAwait =
|
||||
R"doc(Operation that awaits for named sequence to finish.
|
||||
|
||||
@param sequenceName The name of the sequence to wait for termination
|
||||
@param waitFor The amount of time to wait before timing out)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_evalOpAwaitDefault =
|
||||
R"doc(Operation that awaits for default sequence to finish.
|
||||
|
||||
@param tensors The tensors to be used in the operation recorded @param
|
||||
params Template parameters that will be used to initialise Operation
|
||||
to allow for extensible configurations on initialisation)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_evalOpDefault =
|
||||
R"doc(Function that evaluates operation against a newly created sequence.
|
||||
|
||||
@param tensors The tensors to be used in the operation recorded @param
|
||||
TArgs Template parameters that will be used to initialise Operation to
|
||||
allow for extensible configurations on initialisation)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_getOrCreateManagedSequence =
|
||||
R"doc(Get or create a managed Sequence that will be contained by this
|
||||
manager. If the named sequence does not currently exist, it would be
|
||||
created and initialised.
|
||||
|
||||
@param sequenceName The name for the named sequence to be retrieved or
|
||||
created @return Shared pointer to the manager owned sequence resource)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mComputeQueueFamilyIndices = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mComputeQueues = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mCurrentSequenceIndex = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mFreeDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mFreeInstance = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mInstance = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mManagedSequences = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mPhysicalDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_mPhysicalDeviceIndex = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase =
|
||||
R"doc(Operation that provides a general abstraction that simplifies the use
|
||||
of algorithm and parameter components which can be used with shaders.
|
||||
By default it enables the user to provide a dynamic number of tensors
|
||||
which are then passed as inputs.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_x = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_y = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_z = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_2 =
|
||||
R"doc(Default constructor with parameters that provides the bare minimum
|
||||
requirements for the operations to be able to create and manage their
|
||||
sub-components.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that are to be used in this operation @param
|
||||
shaderFilePath Optional parameter to specify the shader to load
|
||||
(either in spirv or raw format) @param komputeWorkgroup Optional
|
||||
parameter to specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_3 =
|
||||
R"doc(Constructor that enables a file to be passed to the operation with the
|
||||
contents of the shader. This can be either in raw format or in
|
||||
compiled SPIR-V binary format.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that are to be used in this operation @param
|
||||
shaderFilePath Parameter to specify the shader to load (either in
|
||||
spirv or raw format) @param komputeWorkgroup Optional parameter to
|
||||
specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_4 =
|
||||
R"doc(Constructor that enables raw shader data to be passed to the main
|
||||
operation which can be either in raw shader glsl code or in compiled
|
||||
SPIR-V binary.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that are to be used in this operation @param
|
||||
shaderDataRaw Optional parameter to specify the shader data either in
|
||||
binary or raw form @param komputeWorkgroup Optional parameter to
|
||||
specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_fetchSpirvBinaryData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_init =
|
||||
R"doc(The init function is responsible for the initialisation of the
|
||||
algorithm component based on the parameters specified, and allows for
|
||||
extensibility on the options provided. Further dependent classes can
|
||||
perform more specific checks such as ensuring tensors provided are
|
||||
initialised, etc.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mAlgorithm = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mFreeAlgorithm = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mKomputeWorkgroup = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mShaderDataRaw =
|
||||
R"doc(< Optional member variable which can be provided to contain either the
|
||||
raw shader content or the spirv binary content)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mShaderFilePath =
|
||||
R"doc(< Optional member variable which can be provided for the OpAlgoBase to
|
||||
find the data automatically and load for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_postEval =
|
||||
R"doc(Executes after the recorded commands are submitted, and performs a
|
||||
copy of the GPU Device memory into the staging buffer so the output
|
||||
data can be retrieved.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_record =
|
||||
R"doc(This records the commands that are to be sent to the GPU. This
|
||||
includes the barriers that ensure the memory has been copied before
|
||||
going in and out of the shader, as well as the dispatch operation that
|
||||
sends the shader processing to the gpu. This function also records the
|
||||
GPU memory copy of the output data for the staging buffer so it can be
|
||||
read by the host.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut =
|
||||
R"doc(Operation base class to simplify the creation of operations that
|
||||
require right hand and left hand side datapoints together with a
|
||||
single output. The expected data passed is two input tensors and one
|
||||
output tensor.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_OpAlgoLhsRhsOut = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_OpAlgoLhsRhsOut_2 =
|
||||
R"doc(Default constructor with parameters that provides the bare minimum
|
||||
requirements for the operations to be able to create and manage their
|
||||
sub-components.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that are to be used in this operation @param
|
||||
freeTensors Whether operation manages the memory of the Tensors @param
|
||||
komputeWorkgroup Optional parameter to specify the layout for
|
||||
processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_init =
|
||||
R"doc(The init function is responsible for ensuring that all of the tensors
|
||||
provided are aligned with requirements such as LHS, RHS and Output
|
||||
tensors, and creates the algorithm component which processes the
|
||||
computation.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorLHS =
|
||||
R"doc(< Reference to the parameter used in the left hand side equation of
|
||||
the shader)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorOutput =
|
||||
R"doc(< Reference to the parameter used in the output of the shader and will
|
||||
be copied with a staging vector)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorOutputStaging = R"doc(< Staging temporary tensor user do to copy the output of the tensor)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorRHS =
|
||||
R"doc(< Reference to the parameter used in the right hand side equation of
|
||||
the shader)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_postEval =
|
||||
R"doc(Executes after the recorded commands are submitted, and performs a
|
||||
copy of the GPU Device memory into the staging buffer so the output
|
||||
data can be retrieved.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoLhsRhsOut_record =
|
||||
R"doc(This records the commands that are to be sent to the GPU. This
|
||||
includes the barriers that ensure the memory has been copied before
|
||||
going in and out of the shader, as well as the dispatch operation that
|
||||
sends the shader processing to the gpu. This function also records the
|
||||
GPU memory copy of the output data for the staging buffer so it can be
|
||||
read by the host.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase =
|
||||
R"doc(Base Operation which provides the high level interface that Kompute
|
||||
operations implement in order to perform a set of actions in the GPU.
|
||||
|
||||
Operations can perform actions on tensors, and optionally can also own
|
||||
an Algorithm with respective parameters. kp::Operations with
|
||||
kp::Algorithms would inherit from kp::OpBaseAlgo.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_OpBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_OpBase_2 =
|
||||
R"doc(Default constructor with parameters that provides the bare minimum
|
||||
requirements for the operations to be able to create and manage their
|
||||
sub-components.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that are to be used in this operation @param
|
||||
freeTensors Whether operation manages the memory of the Tensors)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_init =
|
||||
R"doc(The init function is responsible for setting up all the resources and
|
||||
should be called after the Operation has been created.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_mCommandBuffer = R"doc(< Vulkan Command Buffer)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_mDevice = R"doc(< Vulkan Logical Device)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_mFreeTensors =
|
||||
R"doc(< Explicit boolean that specifies whether the < tensors are freed (if
|
||||
they are managed))doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_mPhysicalDevice = R"doc(< Vulkan Physical Device)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_mTensors =
|
||||
R"doc(< Tensors referenced by operation that can be managed < optionally by
|
||||
operation)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_postEval =
|
||||
R"doc(Post eval is called after the Sequence has called eval and submitted
|
||||
the commands to the GPU for processing, and can be used to perform any
|
||||
tear-down steps required as the computation iteration finishes. It's
|
||||
worth noting that there are situations where eval can be called
|
||||
multiple times, so the resources that are destroyed should not require
|
||||
a re-init unless explicitly provided by the user.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_preEval =
|
||||
R"doc(Pre eval is called before the Sequence has called eval and submitted
|
||||
the commands to the GPU for processing, and can be used to perform any
|
||||
per-eval setup steps required as the computation iteration begins.
|
||||
It's worth noting that there are situations where eval can be called
|
||||
multiple times, so the resources that are created should be idempotent
|
||||
in case it's called multiple times in a row.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_record =
|
||||
R"doc(The record function is intended to only send a record command or run
|
||||
commands that are expected to record operations that are to be
|
||||
submitted as a batch into the GPU.)doc";
|
||||
|
||||
static const char *__doc_kp_OpMult =
|
||||
R"doc(Operation that performs multiplication on two tensors and outpus on
|
||||
third tensor.)doc";
|
||||
|
||||
static const char *__doc_kp_OpMult_OpMult = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
|
||||
static const char *__doc_kp_OpMult_OpMult_2 =
|
||||
R"doc(Default constructor with parameters that provides the bare minimum
|
||||
requirements for the operations to be able to create and manage their
|
||||
sub-components.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that are to be used in this operation @param
|
||||
komputeWorkgroup Optional parameter to specify the layout for
|
||||
processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy =
|
||||
R"doc(Operation that copies the data from the first tensor to the rest of
|
||||
the tensors provided, using a record command for all the vectors. This
|
||||
operation does not own/manage the memory of the tensors passed to it.
|
||||
The operation must only receive tensors of type)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_OpTensorCopy = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_OpTensorCopy_2 =
|
||||
R"doc(Default constructor with parameters that provides the core vulkan
|
||||
resources and the tensors that will be used in the operation.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that will be used to create in operation.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_init =
|
||||
R"doc(Performs basic checks such as ensuring there are at least two tensors
|
||||
provided, that they are initialised and that they are not of type
|
||||
TensorTypes::eStorage.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_postEval =
|
||||
R"doc(Copies the local vectors for all the tensors to sync the data with the
|
||||
gpu.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_record =
|
||||
R"doc(Records the copy commands from the first tensor into all the other
|
||||
tensors provided. Also optionally records a barrier.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCreate =
|
||||
R"doc(Operation that creates tensor and manages the memory of the components
|
||||
created)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCreate_OpTensorCreate = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCreate_OpTensorCreate_2 =
|
||||
R"doc(Default constructor with parameters that provides the bare minimum
|
||||
requirements for the operations to be able to create and manage their
|
||||
sub-components.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that will be used to create in operation.
|
||||
@param freeTensors Whether operation manages the memory of the Tensors)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCreate_init =
|
||||
R"doc(In charge of initialising the primary Tensor as well as the staging
|
||||
tensor as required. It will only initialise a staging tensor if the
|
||||
Primary tensor is of type Device. For staging tensors it performs a
|
||||
mapDataIntoHostMemory which would perform immediately as opposed to on
|
||||
sequence eval/submission.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCreate_mStagingTensors = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCreate_postEval =
|
||||
R"doc(Performs a copy back into the main tensor to ensure that the data
|
||||
contained is the one that is now being stored in the GPU.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCreate_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCreate_record =
|
||||
R"doc(Record runs the core actions to create the tensors. For device tensors
|
||||
it records a copyCommand to move the data from the staging tensor to
|
||||
the device tensor. The mapping for staging tensors happens in the init
|
||||
function not in the record function.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice =
|
||||
R"doc(Operation that syncs tensor's device by mapping local data into the
|
||||
device memory. For TensorTypes::eDevice it will use a staging tensor
|
||||
to perform the copy. For TensorTypes::eStaging it will only copy the
|
||||
data and perform a map, which will be executed during the record (as
|
||||
opposed to during the sequence eval/submit). This function cannot be
|
||||
carried out for TensorTypes::eStaging.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice_2 =
|
||||
R"doc(Default constructor with parameters that provides the core vulkan
|
||||
resources and the tensors that will be used in the operation. The
|
||||
tensos provided cannot be of type TensorTypes::eStorage.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that will be used to create in operation.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_init =
|
||||
R"doc(Performs basic checks such as ensuring that there is at least one
|
||||
tensor provided, that they are initialized and that they are not of
|
||||
type TensorTpes::eStaging. For staging tensors in host memory, the map
|
||||
is performed during the init function.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_mStagingTensors = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_record =
|
||||
R"doc(For device tensors, it records the copy command to the device tensor
|
||||
from the temporary staging tensor.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal =
|
||||
R"doc(Operation that syncs tensor's local data by mapping the data from
|
||||
device memory into the local vector. For TensorTypes::eDevice it will
|
||||
use a staging tensor to perform the copy. For TensorTypes::eStaging it
|
||||
will only copy the data and perform a map, which will be executed
|
||||
during the postSubmit (there will be no copy during the sequence
|
||||
eval/submit). This function cannot be carried out for
|
||||
TensorTypes::eStaging.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal_2 =
|
||||
R"doc(Default constructor with parameters that provides the core vulkan
|
||||
resources and the tensors that will be used in the operation. The
|
||||
tensors provided cannot be of type TensorTypes::eStorage.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that will be used to create in operation.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_init =
|
||||
R"doc(Performs basic checks such as ensuring that there is at least one
|
||||
tensor provided, that they are initialized and that they are not of
|
||||
type TensorTpes::eStaging.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_mStagingTensors = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_postEval =
|
||||
R"doc(For host tensors it performs the map command from the host memory into
|
||||
local memory.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_record =
|
||||
R"doc(For device tensors, it records the copy command into the staging
|
||||
tensor from the device tensor.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_Sequence =
|
||||
R"doc(Base constructor for Sequence. Should not be used unless explicit
|
||||
intended.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_Sequence_2 =
|
||||
R"doc(Main constructor for sequence which requires core vulkan components to
|
||||
generate all dependent resources.
|
||||
|
||||
@param physicalDevice Vulkan physical device @param device Vulkan
|
||||
logical device @param computeQueue Vulkan compute queue @param
|
||||
queueIndex Vulkan compute queue index in device)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_begin =
|
||||
R"doc(Begins recording commands for commands to be submitted into the
|
||||
command buffer.
|
||||
|
||||
@return Boolean stating whether execution was successful.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_createCommandBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_createCommandPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_end =
|
||||
R"doc(Ends the recording and stops recording commands when the record
|
||||
command is sent.
|
||||
|
||||
@return Boolean stating whether execution was successful.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_eval =
|
||||
R"doc(Eval sends all the recorded and stored operations in the vector of
|
||||
operations into the gpu as a submit job with a barrier.
|
||||
|
||||
@return Boolean stating whether execution was successful.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_evalAsync =
|
||||
R"doc(Eval Async sends all the recorded and stored operations in the vector
|
||||
of operations into the gpu as a submit job with a barrier. EvalAwait()
|
||||
must be called after to ensure the sequence is terminated correctly.
|
||||
|
||||
@return Boolean stating whether execution was successful.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_evalAwait =
|
||||
R"doc(Eval Await waits for the fence to finish processing and then once it
|
||||
finishes, it runs the postEval of all operations.
|
||||
|
||||
@param waitFor Number of milliseconds to wait before timing out.
|
||||
@return Boolean stating whether execution was successful.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_freeMemoryDestroyGPUResources =
|
||||
R"doc(Destroys and frees the GPU resources which include the buffer and
|
||||
memory and sets the sequence as init=False.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_init =
|
||||
R"doc(Initialises sequence including the creation of the command pool and
|
||||
the command buffer.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_isInit =
|
||||
R"doc(Returns true if the sequence has been successfully initialised.
|
||||
|
||||
@return Boolean stating if sequence has been initialised.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_isRecording =
|
||||
R"doc(Returns true if the sequence is currently in recording activated.
|
||||
|
||||
@return Boolean stating if recording ongoing.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_isRunning =
|
||||
R"doc(Returns true if the sequence is currently running - mostly used for
|
||||
async workloads.
|
||||
|
||||
@return Boolean stating if currently running.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mCommandBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mCommandPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mComputeQueue = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mFence = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mFreeCommandBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mFreeCommandPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mIsInit = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mIsRunning = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mOperations = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mPhysicalDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mQueueIndex = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_mRecording = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_record =
|
||||
R"doc(Record function for operation to be added to the GPU queue in batch.
|
||||
This template requires classes to be derived from the OpBase class.
|
||||
This function also requires the Sequence to be recording, otherwise it
|
||||
will not be able to add the operation.
|
||||
|
||||
@param tensors Vector of tensors to use for the operation @param TArgs
|
||||
Template parameters that are used to initialise operation which allows
|
||||
for extensible configurations on initialisation.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor =
|
||||
R"doc(Structured data used in GPU operations.
|
||||
|
||||
Tensors are the base building block in Kompute to perform operations
|
||||
across GPUs. Each tensor would have a respective Vulkan memory and
|
||||
buffer, which would be used to store their respective data. The
|
||||
tensors can be used for GPU data storage or transfer.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_Tensor = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_Tensor_2 =
|
||||
R"doc(Default constructor with data provided which would be used to create
|
||||
the respective vulkan buffer and memory.
|
||||
|
||||
@param data Vector of data that will be used by the tensor @param
|
||||
tensorType Type for the tensor which is of type TensorTypes)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorTypes =
|
||||
R"doc(Type for tensors created: Device allows memory to be transferred from
|
||||
staging buffers. Staging are host memory visible. Storage are device
|
||||
visible but are not set up to transfer or receive data (only for
|
||||
shader storage).)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorTypes_eDevice = R"doc(< Type is device memory, source and destination)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorTypes_eStaging = R"doc(< Type is host memory, source and destination)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorTypes_eStorage = R"doc(< Type is Device memory (only))doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_constructDescriptorBufferInfo =
|
||||
R"doc(Constructs a vulkan descriptor buffer info which can be used to
|
||||
specify and reference the underlying buffer component of the tensor
|
||||
without exposing it.
|
||||
|
||||
@return Descriptor buffer info with own buffer)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_createBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_data =
|
||||
R"doc(Returns the vector of data currently contained by the Tensor. It is
|
||||
important to ensure that there is no out-of-sync data with the GPU
|
||||
memory.
|
||||
|
||||
@return Reference to vector of elements representing the data in the
|
||||
tensor.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_freeMemoryDestroyGPUResources =
|
||||
R"doc(Destroys and frees the GPU resources which include the buffer and
|
||||
memory.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_getBufferUsageFlags = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_getMemoryPropertyFlags = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_init =
|
||||
R"doc(Initialiser which calls the initialisation for all the respective
|
||||
tensors as well as creates the respective staging tensors. The staging
|
||||
tensors would only be created for the tensors of type
|
||||
TensorType::eDevice as otherwise there is no need to copy from host
|
||||
memory.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_isInit =
|
||||
R"doc(Returns true if the tensor initialisation function has been carried
|
||||
out successful, which would mean that the buffer and memory will have
|
||||
been provisioned.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mFreeBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mFreeMemory = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mIsInit = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mMemory = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mPhysicalDevice = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mShape = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mTensorType = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mapDataFromHostMemory =
|
||||
R"doc(Maps data from the Host Visible GPU memory into the data vector. It
|
||||
requires the Tensor to be of staging type for it to work.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mapDataIntoHostMemory =
|
||||
R"doc(Maps data from the data vector into the Host Visible GPU memory. It
|
||||
requires the tensor to be of staging type for it to work.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_memorySize = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_operator_array =
|
||||
R"doc(Overrides the subscript operator to expose the underlying data's
|
||||
subscript operator which in this case would be its underlying
|
||||
vector's.
|
||||
|
||||
@param i The index where the element will be returned from. @return
|
||||
Returns the element in the position requested.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_recordBufferMemoryBarrier =
|
||||
R"doc(Records the buffer memory barrier into the command buffer which
|
||||
ensures that relevant data transfers are carried out correctly.
|
||||
|
||||
@param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
@param srcAccessMask Access flags for source access mask @param
|
||||
dstAccessMask Access flags for destination access mask @param
|
||||
scrStageMask Pipeline stage flags for source stage mask @param
|
||||
dstStageMask Pipeline stage flags for destination stage mask)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_recordCopyFrom =
|
||||
R"doc(Records a copy from the memory of the tensor provided to the current
|
||||
thensor. This is intended to pass memory into a processing, to perform
|
||||
a staging buffer transfer, or to gather output (between others).
|
||||
|
||||
@param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
@param copyFromTensor Tensor to copy the data from @param
|
||||
createBarrier Whether to create a barrier that ensures the data is
|
||||
copied before further operations. Default is true.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_setData =
|
||||
R"doc(Sets / resets the vector data of the tensor. This function does not
|
||||
perform any copies into GPU memory and is only performed on the host.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_shape =
|
||||
R"doc(Returns the shape of the tensor, which includes the number of
|
||||
dimensions and the size per dimension.
|
||||
|
||||
@return Array containing the sizes for each dimension. Zero means
|
||||
respective dimension is not active.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_size =
|
||||
R"doc(Returns the size/magnitude of the Tensor, which will be the total
|
||||
number of elements across all dimensions
|
||||
|
||||
@return Unsigned integer representing the total number of elements)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_tensorType =
|
||||
R"doc(Retrieve the tensor type of the Tensor
|
||||
|
||||
@return Tensor type of tensor)doc";
|
||||
|
||||
#if defined(__GNUG__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include <kompute/Kompute.hpp>
|
||||
|
||||
#include "docstrings.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
PYBIND11_MODULE(kp, m) {
|
||||
|
|
@ -21,22 +23,22 @@ PYBIND11_MODULE(kp, m) {
|
|||
#endif
|
||||
});
|
||||
|
||||
py::enum_<kp::Tensor::TensorTypes>(m, "TensorTypes", "Enum with GPU memory types for Tensor.")
|
||||
py::enum_<kp::Tensor::TensorTypes>(m, "TensorTypes", DOC(kp, Tensor, TensorTypes))
|
||||
.value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.")
|
||||
.value("staging", kp::Tensor::TensorTypes::eStaging, "Tensor used for transfer of data to device.")
|
||||
.value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.")
|
||||
.export_values();
|
||||
|
||||
py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", "Structured data used in GPU operations.")
|
||||
py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
|
||||
.def(py::init(
|
||||
[](const std::vector<float>& data) {
|
||||
return std::unique_ptr<kp::Tensor>(new kp::Tensor(data));
|
||||
}), "Initialiser with only list of data components.")
|
||||
}), DOC(kp, Tensor, Tensor, 2))
|
||||
.def(py::init(
|
||||
[](const std::vector<float>& data, kp::Tensor::TensorTypes tensorTypes) {
|
||||
return std::unique_ptr<kp::Tensor>(new kp::Tensor(data, tensorTypes));
|
||||
}), "Initialiser with list of data components and tensor GPU memory type.")
|
||||
.def("data", &kp::Tensor::data, "Retrieves the data as a list containing the local Tensor memory data.")
|
||||
.def("data", &kp::Tensor::data, DOC(kp, Tensor, data))
|
||||
.def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
|
||||
.def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.")
|
||||
.def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.")
|
||||
|
|
@ -70,7 +72,17 @@ PYBIND11_MODULE(kp, m) {
|
|||
"Records operation to run multiplication compute shader to two input tensors and an output tensor")
|
||||
.def("record_algo_file", &kp::Sequence::record<kp::OpAlgoBase, std::string>,
|
||||
"Records an operation using a custom shader provided from a shader path")
|
||||
.def("record_algo_data", &kp::Sequence::record<kp::OpAlgoBase, std::vector<char>>,
|
||||
.def("record_algo_data", [](kp::Sequence &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
py::bytes &bytes) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.record<kp::OpAlgoBase>(
|
||||
tensors,
|
||||
std::vector<char>(data, data + length));
|
||||
},
|
||||
"Records an operation using a custom shader provided as raw string or spirv bytes")
|
||||
.def("record_algo_lro", &kp::Sequence::record<kp::OpAlgoLhsRhsOut>,
|
||||
"Records operation to run left right out operation with custom shader");
|
||||
|
|
@ -87,7 +99,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
}), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.")
|
||||
.def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name")
|
||||
.def("create_sequence", &kp::Manager::createManagedSequence,
|
||||
py::arg("name"), py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues")
|
||||
py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues")
|
||||
.def("build_tensor", &kp::Manager::buildTensor,
|
||||
py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice,
|
||||
"Build and initialise tensor")
|
||||
|
|
@ -110,8 +122,20 @@ PYBIND11_MODULE(kp, m) {
|
|||
"Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with new anonymous Sequence")
|
||||
.def("eval_algo_file_def", &kp::Manager::evalOpDefault<kp::OpAlgoBase, std::string>,
|
||||
"Evaluates an operation using a custom shader provided from a shader path with new anonymous Sequence")
|
||||
.def("eval_algo_data_def", &kp::Manager::evalOpDefault<kp::OpAlgoBase, std::vector<char>>,
|
||||
"Evaluates an operation using a custom shader provided as raw string or spirv bytes with new anonymous Sequence")
|
||||
.def("eval_algo_str_def", &kp::Manager::evalOpDefault<kp::OpAlgoBase, std::vector<char>>,
|
||||
"Evaluates an operation using a custom shader provided as string provided as list of characters with new anonymous Sequence")
|
||||
.def("eval_algo_data_def", [](kp::Manager &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
py::bytes &bytes) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpDefault<kp::OpAlgoBase>(
|
||||
tensors,
|
||||
std::vector<char>(data, data + length));
|
||||
},
|
||||
"Evaluates an operation using a custom shader provided as spirv bytes with new anonymous Sequence")
|
||||
.def("eval_algo_lro_def", &kp::Manager::evalOpDefault<kp::OpAlgoLhsRhsOut>,
|
||||
"Evaluates operation to run left right out operation with custom shader with new anonymous Sequence")
|
||||
// eval
|
||||
|
|
@ -127,8 +151,22 @@ PYBIND11_MODULE(kp, m) {
|
|||
"Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence")
|
||||
.def("eval_algo_file", &kp::Manager::evalOp<kp::OpAlgoBase, std::string>,
|
||||
"Evaluates an operation using a custom shader provided from a shader path with explicitly named Sequence")
|
||||
.def("eval_algo_data", &kp::Manager::evalOp<kp::OpAlgoBase, std::vector<char>>,
|
||||
"Evaluates an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence")
|
||||
.def("eval_algo_str", &kp::Manager::evalOp<kp::OpAlgoBase, std::vector<char>>,
|
||||
"Evaluates an operation using a custom shader provided as string provided as list of characters with explicitly named Sequence")
|
||||
.def("eval_algo_data", [](kp::Manager &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
std::string sequenceName,
|
||||
py::bytes &bytes) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOp<kp::OpAlgoBase>(
|
||||
tensors,
|
||||
sequenceName,
|
||||
std::vector<char>(data, data + length));
|
||||
},
|
||||
"Evaluates an operation using a custom shader provided as spirv bytes with explicitly named Sequence")
|
||||
.def("eval_algo_lro", &kp::Manager::evalOp<kp::OpAlgoLhsRhsOut>,
|
||||
"Evaluates operation to run left right out operation with custom shader with explicitly named Sequence")
|
||||
// eval async default
|
||||
|
|
@ -144,7 +182,19 @@ PYBIND11_MODULE(kp, m) {
|
|||
"Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with anonymous Sequence")
|
||||
.def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault<kp::OpAlgoBase, std::string>,
|
||||
"Evaluates asynchronously an operation using a custom shader provided from a shader path with anonymous Sequence")
|
||||
.def("eval_async_algo_data_def", &kp::Manager::evalOpAsyncDefault<kp::OpAlgoBase, std::vector<char>>,
|
||||
.def("eval_async_algo_str_def", &kp::Manager::evalOpAsyncDefault<kp::OpAlgoBase, std::vector<char>>,
|
||||
"Evaluates Asynchronously an operation using a custom shader provided as string provided as list of characters with new anonymous Sequence")
|
||||
.def("eval_async_algo_data_def", [](kp::Manager &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
py::bytes &bytes) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpAsyncDefault<kp::OpAlgoBase>(
|
||||
tensors,
|
||||
std::vector<char>(data, data + length));
|
||||
},
|
||||
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence")
|
||||
.def("eval_async_algo_lro_def", &kp::Manager::evalOpAsyncDefault<kp::OpAlgoLhsRhsOut>,
|
||||
"Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence")
|
||||
|
|
@ -161,7 +211,21 @@ PYBIND11_MODULE(kp, m) {
|
|||
"Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence")
|
||||
.def("eval_async_algo_file", &kp::Manager::evalOpAsync<kp::OpAlgoBase, std::string>,
|
||||
"Evaluates asynchronously an operation using a custom shader provided from a shader path with explicitly named Sequence")
|
||||
.def("eval_async_algo_data", &kp::Manager::evalOpAsync<kp::OpAlgoBase, std::vector<char>>,
|
||||
.def("eval_async_algo_str", &kp::Manager::evalOpAsync<kp::OpAlgoBase, std::vector<char>>,
|
||||
"Evaluates Asynchronous an operation using a custom shader provided as string provided as list of characters with explicitly named Sequence")
|
||||
.def("eval_async_algo_data", [](kp::Manager &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
std::string sequenceName,
|
||||
py::bytes &bytes) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpAsync<kp::OpAlgoBase>(
|
||||
tensors,
|
||||
sequenceName,
|
||||
std::vector<char>(data, data + length));
|
||||
},
|
||||
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence")
|
||||
.def("eval_async_algo_lro", &kp::Manager::evalOpAsync<kp::OpAlgoLhsRhsOut>,
|
||||
"Evaluates asynchronously operation to run left right out operation with custom shader with explicitly named Sequence");
|
||||
|
|
|
|||
1
python/test/requirements-dev.txt
Normal file
1
python/test/requirements-dev.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
pyshader==0.7.0
|
||||
|
|
@ -1,4 +1,7 @@
|
|||
|
||||
from pyshader import python2shader, f32, ivec3, Array
|
||||
from pyshader.stdlib import exp, log
|
||||
|
||||
from kp import Tensor, Manager, Sequence
|
||||
|
||||
def test_opmult():
|
||||
|
|
@ -49,7 +52,7 @@ def test_opalgobase_data():
|
|||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderData))
|
||||
mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderData))
|
||||
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
|
||||
|
|
@ -81,30 +84,142 @@ def test_sequence():
|
|||
"""
|
||||
Test basic OpAlgoBase operation
|
||||
"""
|
||||
|
||||
mgr = Manager(0, [2])
|
||||
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
seq = mgr.create_sequence("op")
|
||||
|
||||
shaderFilePath = "../../shaders/glsl/opmult.comp"
|
||||
mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
|
||||
mgr.eval_await_def()
|
||||
|
||||
seq.begin()
|
||||
seq.record_tensor_sync_local([tensor_in_a])
|
||||
seq.record_tensor_sync_local([tensor_in_b])
|
||||
seq.record_tensor_sync_local([tensor_out])
|
||||
seq.end()
|
||||
|
||||
seq.eval()
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
def test_pyshader_pyshader():
|
||||
|
||||
@python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
|
||||
data1=("buffer", 0, Array(f32)),
|
||||
data2=("buffer", 1, Array(f32)),
|
||||
data3=("buffer", 2, Array(f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
|
||||
mgr = Manager()
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sequence()
|
||||
def test_logistic_regression_pyshader():
|
||||
@python2shader
|
||||
def compute_shader(
|
||||
index = ("input", "GlobalInvocationId", ivec3),
|
||||
x_i = ("buffer", 0, Array(f32)),
|
||||
x_j = ("buffer", 1, Array(f32)),
|
||||
y = ("buffer", 2, Array(f32)),
|
||||
w_in = ("buffer", 3, Array(f32)),
|
||||
w_out_i = ("buffer", 4, Array(f32)),
|
||||
w_out_j = ("buffer", 5, Array(f32)),
|
||||
b_in = ("buffer", 6, Array(f32)),
|
||||
b_out = ("buffer", 7, Array(f32)),
|
||||
l_out = ("buffer", 8, Array(f32)),
|
||||
M = ("buffer", 9, Array(f32))):
|
||||
|
||||
i = index.x
|
||||
|
||||
m = M[0]
|
||||
|
||||
w_curr = vec2(w_in[0], w_in[1])
|
||||
b_curr = b_in[0]
|
||||
|
||||
x_curr = vec2(x_i[i], x_j[i])
|
||||
y_curr = y[i]
|
||||
|
||||
z_dot = w_curr @ x_curr
|
||||
z = z_dot + b_curr
|
||||
y_hat = 1.0 / (1.0 + exp(-z))
|
||||
|
||||
d_z = y_hat - y_curr
|
||||
d_w = (1.0 / m) * x_curr * d_z
|
||||
d_b = (1.0 / m) * d_z
|
||||
|
||||
loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat)))
|
||||
|
||||
w_out_i[i] = d_w.x
|
||||
w_out_j[i] = d_w.y
|
||||
b_out[i] = d_b
|
||||
l_out[i] = loss
|
||||
|
||||
|
||||
# First we create input and ouput tensors for shader
|
||||
tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
|
||||
tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_w_in = Tensor([0.001, 0.001])
|
||||
tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_b_in = Tensor([0.0])
|
||||
tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_m = Tensor([ 5.0 ])
|
||||
|
||||
# We store them in an array for easier interaction
|
||||
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
|
||||
tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
|
||||
|
||||
mgr = Manager()
|
||||
|
||||
mgr.eval_tensor_create_def(params)
|
||||
|
||||
# Record commands for efficient evaluation
|
||||
sq = mgr.create_sequence()
|
||||
sq.begin()
|
||||
sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
|
||||
sq.record_algo_data(params, compute_shader.to_spirv())
|
||||
sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
|
||||
sq.end()
|
||||
|
||||
ITERATIONS = 100
|
||||
learning_rate = 0.1
|
||||
|
||||
# Perform machine learning training and inference across all input X and Y
|
||||
for i_iter in range(ITERATIONS):
|
||||
sq.eval()
|
||||
|
||||
# Calculate the parameters based on the respective derivatives calculated
|
||||
w_in_i_val = tensor_w_in.data()[0]
|
||||
w_in_j_val = tensor_w_in.data()[1]
|
||||
b_in_val = tensor_b_in.data()[0]
|
||||
|
||||
for j_iter in range(tensor_b_out.size()):
|
||||
w_in_i_val -= learning_rate * tensor_w_out_i.data()[j_iter]
|
||||
w_in_j_val -= learning_rate * tensor_w_out_j.data()[j_iter]
|
||||
b_in_val -= learning_rate * tensor_b_out.data()[j_iter]
|
||||
|
||||
# Update the parameters to process inference again
|
||||
tensor_w_in.set_data([w_in_i_val, w_in_j_val])
|
||||
tensor_b_in.set_data([b_in_val])
|
||||
|
||||
assert tensor_w_in.data()[0] < 0.01
|
||||
assert tensor_w_in.data()[0] > 0.0
|
||||
assert tensor_w_in.data()[1] > 1.5
|
||||
assert tensor_b_in.data()[0] < 0.7
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
//#include <spdlog/fmt/bundled/ranges.h>
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression)
|
||||
|
|
@ -73,12 +72,11 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression)
|
|||
EXPECT_LT(wIn->data()[0], 0.01);
|
||||
EXPECT_GT(wIn->data()[1], 1.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
|
||||
// SPDLOG_WARN("Result wIn: {}, bIn: {}, loss: {}",
|
||||
// wIn->data(),
|
||||
// bIn->data(),
|
||||
// lOut->data());
|
||||
SPDLOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
|
||||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
|
||||
TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
|
||||
|
|
@ -156,8 +154,8 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
|
|||
EXPECT_GT(wIn->data()[1], 1.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
|
||||
// SPDLOG_WARN("Result wIn: {}, bIn: {}, loss: {}",
|
||||
// wIn->data(),
|
||||
// bIn->data(),
|
||||
// lOut->data());
|
||||
SPDLOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
|
||||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue