Merge pull request #177 from EthicalML/add_tensor_types

Add support for bool, double, int32, uint32 and float32 on Tensors via TensorT
This commit is contained in:
Alejandro Saucedo 2021-03-07 14:25:19 +00:00 committed by GitHub
commit 1d2d33b269
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
41 changed files with 1262 additions and 690 deletions

View file

@ -55,10 +55,13 @@ void kompute(const std::string& shader) {
kp::Manager mgr;
// 2. Create and initialise Kompute Tensors through manager
// Default tensor constructor simplifies creation of float values
auto tensorInA = mgr.tensor({ 2., 2., 2. });
auto tensorInB = mgr.tensor({ 1., 2., 3. });
auto tensorOutA = mgr.tensor({ 0., 0., 0. });
auto tensorOutB = mgr.tensor({ 0., 0., 0. });
// Explicit type constructor supports uint32, int32, double, float and bool
auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
@ -109,8 +112,8 @@ int main() {
// The input tensors bind index is relative to index in parameter passed
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
// Kompute supports push constants updated on dispatch
layout(push_constant) uniform PushConstants {
@ -122,8 +125,8 @@ int main() {
void main() {
uint index = gl_GlobalInvocationID.x;
out_a[index] += in_a[index] * in_b[index];
out_b[index] += const_one * push_const.val;
out_a[index] += uint( in_a[index] * in_b[index] );
out_b[index] += uint( const_one * push_const.val );
}
)");
@ -144,10 +147,13 @@ def kompute(shader):
mgr = kp.Manager()
# 2. Create and initialise Kompute Tensors through manager
# Default tensor constructor simplifies creation of float values
tensor_in_a = mgr.tensor([2, 2, 2])
tensor_in_b = mgr.tensor([1, 2, 3])
tensor_out_a = mgr.tensor([0, 0, 0])
tensor_out_b = mgr.tensor([0, 0, 0])
# Explicit type constructor supports uint32, int32, double, float and bool
tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
@ -194,8 +200,8 @@ if __name__ == "__main__":
// The input tensors bind index is relative to index in parameter passed
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
// Kompute supports push constants updated on dispatch
layout(push_constant) uniform PushConstants {
@ -207,8 +213,8 @@ if __name__ == "__main__":
void main() {
uint index = gl_GlobalInvocationID.x;
out_a[index] += in_a[index] * in_b[index];
out_b[index] += const_one * push_const.val;
out_a[index] += uint( in_a[index] * in_b[index] );
out_b[index] += uint( const_one * push_const.val );
}
"""

View file

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.17.0)
cmake_minimum_required(VERSION 3.4.1)
project(kompute_array_mult VERSION 0.1.0)
set(CMAKE_CXX_STANDARD 14)
@ -23,10 +23,6 @@ endif()
find_package(Vulkan REQUIRED)
if(KOMPUTE_OPT_ENABLE_SPDLOG)
find_package(spdlog REQUIRED)
endif()
add_executable(kompute_array_mult
src/Main.cpp)

View file

@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to
To build you just need to run the cmake command in this folder as follows:
```
cmake \
-Bbuild
cmake -Bbuild/ \
-DCMAKE_BUILD_TYPE=Debug \
-DKOMPUTE_OPT_INSTALL=0 \
-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \
-DKOMPUTE_OPT_ENABLE_SPDLOG=1
```
You can pass the following optional parameters based on your desired configuration:

View file

@ -39,16 +39,17 @@ int main()
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA, tensorInB, tensorOut };
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, kp::Shader::compile_source(shader));
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, kp::Shader::compileSource(shader));
mgr.sequence()
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(params);
->record<kp::OpTensorSyncLocal>(params)
->eval();
// prints "Output { 0 4 12 }"
std::cout<< "Output: { ";
for (const float& elem : tensorOut->data()) {
for (const float& elem : tensorOut->vector()) {
std::cout << elem << " ";
}
std::cout << "}" << std::endl;

View file

@ -54,7 +54,7 @@ void KomputeSummatorNode::_init() {
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm(
{ this->mPrimaryTensor, this->mSecondaryTensor },
kp::Shader::compile_source(shader));
kp::Shader::compileSource(shader));
// First we ensure secondary tensor loads to GPU

View file

@ -58,7 +58,7 @@ void KomputeSummator::_init() {
// Then we run the operation with both tensors
this->mSequence->record<kp::OpAlgoCreate>(
{ this->mPrimaryTensor, this->mSecondaryTensor },
kp::Shader::compile_source(shader));
kp::Shader::compileSource(shader));
// We map the result back to local
this->mSequence->record<kp::OpTensorSyncLocal>(

View file

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.17.0)
cmake_minimum_required(VERSION 3.4.1)
project(kompute_linear_reg VERSION 0.1.0)
set(CMAKE_CXX_STANDARD 14)
@ -23,10 +23,6 @@ endif()
find_package(Vulkan REQUIRED)
if(KOMPUTE_OPT_ENABLE_SPDLOG)
find_package(spdlog REQUIRED)
endif()
add_executable(kompute_linear_reg
src/Main.cpp)
@ -39,7 +35,7 @@ include_directories(
../../single_include/)
if(KOMPUTE_OPT_ENABLE_SPDLOG)
target_link_libraries(kompute_array_mult
target_link_libraries(kompute_linear_reg
spdlog::spdlog)
endif()

View file

@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to
To build you just need to run the cmake command in this folder as follows:
```
cmake \
-Bbuild
cmake -Bbuild/ \
-DCMAKE_BUILD_TYPE=Debug \
-DKOMPUTE_OPT_INSTALL=0 \
-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \
-DKOMPUTE_OPT_ENABLE_SPDLOG=1
```
You can pass the following optional parameters based on your desired configuration:

View file

@ -17,19 +17,19 @@ int main()
kp::Manager mgr;
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
auto xI = mgr.tensor({ 0, 1, 1, 1, 1 });
auto xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
auto y = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
auto wIn = mgr.tensor({ 0.001, 0.001 });
auto wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
auto wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
auto bIn = mgr.tensor({ 0 });
auto bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
auto lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
wIn, wOutI, wOutJ,
@ -40,7 +40,8 @@ int main()
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);

View file

@ -247,10 +247,16 @@ static const char *__doc_kp_Manager_sequence =
R"doc(Create a managed sequence that will be destroyed by this manager if it
hasn't been destroyed by its reference count going to zero.
@param queueIndex The queue to use from the available queues @returns
Shared pointer with initialised sequence)doc";
@param queueIndex The queue to use from the available queues @param
nrOfTimestamps The maximum number of timestamps to allocate. If zero
(default), disables latching of timestamps. @returns Shared pointer
with initialised sequence)doc";
static const char *__doc_kp_Manager_tensor =
static const char *__doc_kp_Manager_tensor = R"doc()doc";
static const char *__doc_kp_Manager_tensor_2 = R"doc()doc";
static const char *__doc_kp_Manager_tensorT =
R"doc(Create a managed tensor that will be destroyed by this manager if it
hasn't been destroyed by its reference count going to zero.
@ -264,18 +270,26 @@ of algorithm and parameter components which can be used with shaders.
By default it enables the user to provide a dynamic number of tensors
which are then passed as inputs.)doc";
static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = R"doc()doc";
static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch =
R"doc(Constructor that stores the algorithm to use as well as the relevant
push constants to override when recording.
@param algorithm The algorithm object to use for dispatch @param
pushConstants The push constants to use for override)doc";
static const char *__doc_kp_OpAlgoDispatch_mAlgorithm = R"doc()doc";
static const char *__doc_kp_OpAlgoDispatch_mPushConstants = R"doc()doc";
static const char *__doc_kp_OpAlgoDispatch_postEval =
R"doc(Executes after the recorded commands are submitted, and performs a
copy of the GPU Device memory into the staging buffer so the output
data can be retrieved.)doc";
R"doc(Does not perform any postEval commands.
static const char *__doc_kp_OpAlgoDispatch_preEval = R"doc(Does not perform any preEval commands.)doc";
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpAlgoDispatch_preEval =
R"doc(Does not perform any preEval commands.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpAlgoDispatch_record =
R"doc(This records the commands that are to be sent to the GPU. This
@ -283,7 +297,9 @@ includes the barriers that ensure the memory has been copied before
going in and out of the shader, as well as the dispatch operation that
sends the shader processing to the gpu. This function also records the
GPU memory copy of the output data for the staging buffer so it can be
read by the host.)doc";
read by the host.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpBase =
R"doc(Base Operation which provides the high level interface that Kompute
@ -299,7 +315,9 @@ the commands to the GPU for processing, and can be used to perform any
tear-down steps required as the computation iteration finishes. It's
worth noting that there are situations where eval can be called
multiple times, so the resources that are destroyed should not require
a re-init unless explicitly provided by the user.)doc";
a re-init unless explicitly provided by the user.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpBase_preEval =
R"doc(Pre eval is called before the Sequence has called eval and submitted
@ -307,12 +325,16 @@ the commands to the GPU for processing, and can be used to perform any
per-eval setup steps required as the computation iteration begins.
It's worth noting that there are situations where eval can be called
multiple times, so the resources that are created should be idempotent
in case it's called multiple times in a row.)doc";
in case it's called multiple times in a row.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpBase_record =
R"doc(The record function is intended to only send a record command or run
commands that are expected to record operations that are to be
submitted as a batch into the GPU.)doc";
submitted as a batch into the GPU.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpMult =
R"doc(Operation that performs multiplication on two tensors and outpus on
@ -323,12 +345,9 @@ R"doc(Default constructor with parameters that provides the bare minimum
requirements for the operations to be able to create and manage their
sub-components.
@param physicalDevice Vulkan physical device used to find device
queues @param device Vulkan logical device for passing to Algorithm
@param commandBuffer Vulkan Command Buffer to record commands into
@param tensors Tensors that are to be used in this operation @param
komputeWorkgroup Optional parameter to specify the layout for
processing)doc";
algorithm An algorithm that will be overridden with the OpMult shader
data and the tensors provided which are expected to be 3)doc";
static const char *__doc_kp_OpTensorCopy =
R"doc(Operation that copies the data from the first tensor to the rest of
@ -340,84 +359,95 @@ static const char *__doc_kp_OpTensorCopy_OpTensorCopy =
R"doc(Default constructor with parameters that provides the core vulkan
resources and the tensors that will be used in the operation.
@param physicalDevice Vulkan physical device used to find device
queues @param device Vulkan logical device for passing to Algorithm
@param commandBuffer Vulkan Command Buffer to record commands into
@param tensors Tensors that will be used to create in operation.)doc";
static const char *__doc_kp_OpTensorCopy_mTensors = R"doc()doc";
static const char *__doc_kp_OpTensorCopy_postEval =
R"doc(Copies the local vectors for all the tensors to sync the data with the
gpu.)doc";
gpu.
static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands.)doc";
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpTensorCopy_preEval =
R"doc(Does not perform any preEval commands.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpTensorCopy_record =
R"doc(Records the copy commands from the first tensor into all the other
tensors provided. Also optionally records a barrier.)doc";
tensors provided. Also optionally records a barrier.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpTensorSyncDevice =
R"doc(Operation that syncs tensor's device by mapping local data into the
device memory. For TensorTypes::eDevice it will use a record operation
for the memory to be syncd into GPU memory which means that the
operation will be done in sync with GPU commands. For
TensorTypes::eStaging it will only map the data into host memory which
TensorTypes::eHost it will only map the data into host memory which
will happen during preEval before the recorded commands are
dispatched. This operation won't have any effect on
TensorTypes::eStaging.)doc";
dispatched.)doc";
static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice =
R"doc(Default constructor with parameters that provides the core vulkan
resources and the tensors that will be used in the operation. The
tensos provided cannot be of type TensorTypes::eStorage.
@param physicalDevice Vulkan physical device used to find device
queues @param device Vulkan logical device for passing to Algorithm
@param commandBuffer Vulkan Command Buffer to record commands into
@param tensors Tensors that will be used to create in operation.)doc";
static const char *__doc_kp_OpTensorSyncDevice_mTensors = R"doc()doc";
static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc";
static const char *__doc_kp_OpTensorSyncDevice_postEval =
R"doc(Does not perform any postEval commands.
static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc";
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpTensorSyncDevice_preEval =
R"doc(Does not perform any preEval commands.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpTensorSyncDevice_record =
R"doc(For device tensors, it records the copy command for the tensor to copy
the data from its staging to device memory.)doc";
the data from its staging to device memory.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpTensorSyncLocal =
R"doc(Operation that syncs tensor's local memory by mapping device data into
the local CPU memory. For TensorTypes::eDevice it will use a record
operation for the memory to be syncd into GPU memory which means that
the operation will be done in sync with GPU commands. For
TensorTypes::eStaging it will only map the data into host memory which
TensorTypes::eHost it will only map the data into host memory which
will happen during preEval before the recorded commands are
dispatched. This operation won't have any effect on
TensorTypes::eStaging.)doc";
dispatched.)doc";
static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal =
R"doc(Default constructor with parameters that provides the core vulkan
resources and the tensors that will be used in the operation. The
tensors provided cannot be of type TensorTypes::eStorage.
@param physicalDevice Vulkan physical device used to find device
queues @param device Vulkan logical device for passing to Algorithm
@param commandBuffer Vulkan Command Buffer to record commands into
@param tensors Tensors that will be used to create in operation.)doc";
static const char *__doc_kp_OpTensorSyncLocal_mTensors = R"doc()doc";
static const char *__doc_kp_OpTensorSyncLocal_postEval =
R"doc(For host tensors it performs the map command from the host memory into
local memory.)doc";
local memory.
static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc";
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpTensorSyncLocal_preEval =
R"doc(Does not perform any preEval commands.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_OpTensorSyncLocal_record =
R"doc(For device tensors, it records the copy command for the tensor to copy
the data from its device to staging memory.)doc";
the data from its device to staging memory.
@param commandBuffer The command buffer to record the command into.)doc";
static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc";
@ -427,7 +457,8 @@ generate all dependent resources.
@param physicalDevice Vulkan physical device @param device Vulkan
logical device @param computeQueue Vulkan compute queue @param
queueIndex Vulkan compute queue index in device)doc";
queueIndex Vulkan compute queue index in device @param totalTimestamps
Maximum number of timestamps to allocate)doc";
static const char *__doc_kp_Sequence_begin =
R"doc(Begins recording commands for commands to be submitted into the
@ -443,6 +474,8 @@ static const char *__doc_kp_Sequence_createCommandBuffer = R"doc()doc";
static const char *__doc_kp_Sequence_createCommandPool = R"doc()doc";
static const char *__doc_kp_Sequence_createTimestampQueryPool = R"doc()doc";
static const char *__doc_kp_Sequence_destroy =
R"doc(Destroys and frees the GPU resources which include the buffer and
memory and sets the sequence as init=False.)doc";
@ -528,6 +561,10 @@ finishes, it runs the postEval of all operations.
@param waitFor Number of milliseconds to wait before timing out.
@return shared_ptr<Sequence> of the Sequence class itself)doc";
static const char *__doc_kp_Sequence_getTimestamps =
R"doc(Return the timestamps that were latched at the beginning and after
each operation during the last eval() call.)doc";
static const char *__doc_kp_Sequence_isInit =
R"doc(Returns true if the sequence has been initialised, and it's based on
the GPU resources being refrenced.
@ -607,9 +644,11 @@ R"doc(Clears command buffer and triggers re-record of all the current
operations saved, which is useful if the underlying kp::Tensors or
kp::Algorithms are modified and need to be re-recorded.)doc";
static const char *__doc_kp_Sequence_timestampQueryPool = R"doc()doc";
static const char *__doc_kp_Shader = R"doc(Shader utily class with functions to compile and process glsl files.)doc";
static const char *__doc_kp_Shader_compile_source =
static const char *__doc_kp_Shader_compileSource =
R"doc(Compile a single glslang source from string value. Currently this
function uses the glslang C++ interface which is not thread safe so
this funciton should not be called from multiple threads concurrently.
@ -622,7 +661,7 @@ List of pairs containing key value definitions @param resourcesLimit A
list that contains the resource limits for the GLSL compiler @return
The compiled SPIR-V binary in unsigned int32 format)doc";
static const char *__doc_kp_Shader_compile_sources =
static const char *__doc_kp_Shader_compileSources =
R"doc(Compile multiple sources with optional filenames. Currently this
function uses the glslang C++ interface which is not thread safe so
this funciton should not be called from multiple threads concurrently.
@ -644,14 +683,42 @@ across GPUs. Each tensor would have a respective Vulkan memory and
buffer, which would be used to store their respective data. The
tensors can be used for GPU data storage or transfer.)doc";
static const char *__doc_kp_Tensor_Tensor =
R"doc(Default constructor with data provided which would be used to create
the respective vulkan buffer and memory.
static const char *__doc_kp_TensorT = R"doc()doc";
static const char *__doc_kp_TensorT_TensorT = R"doc()doc";
static const char *__doc_kp_TensorT_data = R"doc()doc";
static const char *__doc_kp_TensorT_dataType = R"doc()doc";
static const char *__doc_kp_TensorT_operator_array = R"doc()doc";
static const char *__doc_kp_TensorT_setData = R"doc()doc";
static const char *__doc_kp_TensorT_vector = R"doc()doc";
static const char *__doc_kp_Tensor_Tensor =
R"doc(Constructor with data provided which would be used to create the
respective vulkan buffer and memory.
@param physicalDevice The physical device to use to fetch properties
@param device The device to use to create the buffer and memory from
@param data Non-zero-sized vector of data that will be used by the
tensor @param tensorType Type for the tensor which is of type
tensor @param tensorTypes Type for the tensor which is of type
TensorTypes)doc";
static const char *__doc_kp_Tensor_TensorDataTypes = R"doc()doc";
static const char *__doc_kp_Tensor_TensorDataTypes_eBool = R"doc()doc";
static const char *__doc_kp_Tensor_TensorDataTypes_eDouble = R"doc()doc";
static const char *__doc_kp_Tensor_TensorDataTypes_eFloat = R"doc()doc";
static const char *__doc_kp_Tensor_TensorDataTypes_eInt = R"doc()doc";
static const char *__doc_kp_Tensor_TensorDataTypes_eUnsignedInt = R"doc()doc";
static const char *__doc_kp_Tensor_TensorTypes =
R"doc(Type for tensors created: Device allows memory to be transferred from
staging buffers. Staging are host memory visible. Storage are device
@ -677,13 +744,14 @@ without exposing it.
static const char *__doc_kp_Tensor_createBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_data =
R"doc(Returns the vector of data currently contained by the Tensor. It is
important to ensure that there is no out-of-sync data with the GPU
memory.
static const char *__doc_kp_Tensor_data = R"doc()doc";
@return Reference to vector of elements representing the data in the
tensor.)doc";
static const char *__doc_kp_Tensor_dataType =
R"doc(Retrieve the underlying data type of the Tensor
@return Data type of tensor of type kp::Tensor::TensorDataTypes)doc";
static const char *__doc_kp_Tensor_dataTypeMemorySize = R"doc()doc";
static const char *__doc_kp_Tensor_destroy =
R"doc(Destroys and frees the GPU resources which include the buffer and
@ -697,9 +765,15 @@ static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc";
static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc";
static const char *__doc_kp_Tensor_isInit = R"doc()doc";
static const char *__doc_kp_Tensor_isInit =
R"doc(Check whether tensor is initialized based on the created gpu
resources.
static const char *__doc_kp_Tensor_mData = R"doc()doc";
@returns Boolean stating whether tensor is initialized)doc";
static const char *__doc_kp_Tensor_mDataType = R"doc()doc";
static const char *__doc_kp_Tensor_mDataTypeMemorySize = R"doc()doc";
static const char *__doc_kp_Tensor_mDevice = R"doc()doc";
@ -717,36 +791,28 @@ static const char *__doc_kp_Tensor_mPrimaryBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_mPrimaryMemory = R"doc()doc";
static const char *__doc_kp_Tensor_mRawData = R"doc()doc";
static const char *__doc_kp_Tensor_mSize = R"doc()doc";
static const char *__doc_kp_Tensor_mStagingBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_mStagingMemory = R"doc()doc";
static const char *__doc_kp_Tensor_mTensorType = R"doc()doc";
static const char *__doc_kp_Tensor_mapDataFromHostMemory =
R"doc(Maps data from the Host Visible GPU memory into the data vector. It
requires the Tensor to be of staging type for it to work.)doc";
static const char *__doc_kp_Tensor_mapDataIntoHostMemory =
R"doc(Maps data from the data vector into the Host Visible GPU memory. It
requires the tensor to be of staging type for it to work.)doc";
static const char *__doc_kp_Tensor_mapRawData = R"doc()doc";
static const char *__doc_kp_Tensor_memorySize = R"doc()doc";
static const char *__doc_kp_Tensor_operator_array =
R"doc(Overrides the subscript operator to expose the underlying data's
subscript operator which in this case would be its underlying
vector's.
@param i The index where the element will be returned from. @return
Returns the element in the position requested.)doc";
static const char *__doc_kp_Tensor_rawData = R"doc()doc";
static const char *__doc_kp_Tensor_rebuild =
R"doc(Initialiser which calls the initialisation for all the respective
tensors as well as creates the respective staging tensors. The staging
tensors would only be created for the tensors of type
TensorType::eDevice as otherwise there is no need to copy from host
memory.)doc";
R"doc(Function to trigger reinitialisation of the tensor buffer and memory
with new data as well as new potential device type.
@param data Vector of data to use to initialise vector from @param
tensorType The type to use for the tensor)doc";
static const char *__doc_kp_Tensor_recordBufferMemoryBarrier =
R"doc(Records the buffer memory barrier into the command buffer which
@ -788,7 +854,7 @@ would only be relevant for kp::Tensors of type eDevice.
@param createBarrier Whether to create a barrier that ensures the data
is copied before further operations. Default is true.)doc";
static const char *__doc_kp_Tensor_setData =
static const char *__doc_kp_Tensor_setRawData =
R"doc(Sets / resets the vector data of the tensor. This function does not
perform any copies into GPU memory and is only performed on the host.)doc";
@ -803,6 +869,10 @@ R"doc(Retrieve the tensor type of the Tensor
@return Tensor type of tensor)doc";
static const char *__doc_kp_Tensor_unmapRawData = R"doc()doc";
static const char *__doc_kp_Tensor_vector = R"doc()doc";
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

View file

@ -26,9 +26,9 @@ PYBIND11_MODULE(kp, m) {
py::module_ np = py::module_::import("numpy");
py::enum_<kp::Tensor::TensorTypes>(m, "TensorTypes")
.value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.")
.value("host", kp::Tensor::TensorTypes::eHost, "Tensor used for CPU visible GPU data.")
.value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.")
.value("device", kp::Tensor::TensorTypes::eDevice, DOC(kp, Tensor, TensorTypes, eDevice))
.value("host", kp::Tensor::TensorTypes::eHost, DOC(kp, Tensor, TensorTypes, eHost))
.value("storage", kp::Tensor::TensorTypes::eStorage, DOC(kp, Tensor, TensorTypes, eStorage))
.export_values();
#if !defined(KOMPUTE_DISABLE_SHADER_UTILS) || !KOMPUTE_DISABLE_SHADER_UTILS
@ -37,119 +37,168 @@ PYBIND11_MODULE(kp, m) {
const std::string& source,
const std::string& entryPoint,
const std::vector<std::pair<std::string,std::string>>& definitions) {
std::vector<uint32_t> spirv = kp::Shader::compile_source(source, entryPoint, definitions);
std::vector<uint32_t> spirv = kp::Shader::compileSource(source, entryPoint, definitions);
return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t));
},
"Compiles string source provided and returns the value in bytes",
py::arg("source"), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() )
DOC(kp, Shader, compileSource),
py::arg("source"),
py::arg("entryPoint") = "main",
py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() )
.def_static("compile_sources", [](
const std::vector<std::string>& source,
const std::vector<std::string>& files,
const std::string& entryPoint,
const std::vector<std::pair<std::string,std::string>>& definitions) {
std::vector<uint32_t> spirv = kp::Shader::compile_sources(source, files, entryPoint, definitions);
std::vector<uint32_t> spirv = kp::Shader::compileSources(source, files, entryPoint, definitions);
return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t));
},
"Compiles sources provided with file names and returns the value in bytes",
py::arg("sources"), py::arg("files") = std::vector<std::string>(), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() );
DOC(kp, Shader, compileSources),
py::arg("sources"),
py::arg("files") = std::vector<std::string>(),
py::arg("entryPoint") = "main",
py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() );
#endif // KOMPUTE_DISABLE_SHADER_UTILS
py::class_<kp::OpBase, std::shared_ptr<kp::OpBase>>(m, "OpBase");
py::class_<kp::OpBase, std::shared_ptr<kp::OpBase>>(m, "OpBase", DOC(kp, OpBase));
py::class_<kp::OpTensorSyncDevice, std::shared_ptr<kp::OpTensorSyncDevice>>(m, "OpTensorSyncDevice", py::base<kp::OpBase>())
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
py::class_<kp::OpTensorSyncDevice, std::shared_ptr<kp::OpTensorSyncDevice>>(
m, "OpTensorSyncDevice", py::base<kp::OpBase>(), DOC(kp, OpTensorSyncDevice))
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorSyncDevice, OpTensorSyncDevice));
py::class_<kp::OpTensorSyncLocal, std::shared_ptr<kp::OpTensorSyncLocal>>(m, "OpTensorSyncLocal", py::base<kp::OpBase>())
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
py::class_<kp::OpTensorSyncLocal, std::shared_ptr<kp::OpTensorSyncLocal>>(
m, "OpTensorSyncLocal", py::base<kp::OpBase>(), DOC(kp, OpTensorSyncLocal))
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorSyncLocal, OpTensorSyncLocal));
py::class_<kp::OpTensorCopy, std::shared_ptr<kp::OpTensorCopy>>(m, "OpTensorCopy", py::base<kp::OpBase>())
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
py::class_<kp::OpTensorCopy, std::shared_ptr<kp::OpTensorCopy>>(
m, "OpTensorCopy", py::base<kp::OpBase>(), DOC(kp, OpTensorCopy))
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorCopy, OpTensorCopy));
py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(m, "OpAlgoDispatch", py::base<kp::OpBase>())
py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(
m, "OpAlgoDispatch", py::base<kp::OpBase>(), DOC(kp, OpAlgoDispatch))
.def(py::init<const std::shared_ptr<kp::Algorithm>&,const kp::Constants&>(),
DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
py::arg("algorithm"), py::arg("push_consts") = kp::Constants());
py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(m, "OpMult", py::base<kp::OpBase>())
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&,const std::shared_ptr<kp::Algorithm>&>());
py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(
m, "OpMult", py::base<kp::OpBase>(), DOC(kp, OpMult))
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&,const std::shared_ptr<kp::Algorithm>&>(),
DOC(kp, OpMult, OpMult));
py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm")
.def("get_tensors", &kp::Algorithm::getTensors)
.def("destroy", &kp::Algorithm::destroy)
.def("get_spec_consts", &kp::Algorithm::getSpecializationConstants)
.def("is_init", &kp::Algorithm::isInit);
py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm", DOC(kp, Algorithm, Algorithm))
.def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors))
.def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy))
.def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants))
.def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit));
py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
.def("data", [](kp::Tensor& self) {
return py::array(self.data().size(), self.data().data());
}, "Returns stored data as a new numpy array.")
.def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; },
"When only an index is necessary")
.def("__setitem__", [](kp::Tensor &self, size_t index, float value) {
self.data()[index] = value; })
.def("set_data", [np](kp::Tensor &self, const py::array_t<float> data){
const py::array_t<float> flatdata = np.attr("ravel")(data);
const py::buffer_info info = flatdata.request();
const float* ptr = (float*) info.ptr;
self.setData(std::vector<float>(ptr, ptr+flatdata.size()));
}, "Overrides the data in the local Tensor memory.")
.def("__iter__", [](kp::Tensor &self) {
return py::make_iterator(self.data().begin(), self.data().end());
}, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists
"Iterator to enable looping within data structure as required.")
.def("__contains__", [](kp::Tensor &self, float v) {
for (size_t i = 0; i < self.data().size(); ++i) {
if (v == self.data()[i]) {
return true;
}
}
return false;
})
.def("__reversed__", [](kp::Tensor &self) {
size_t size = self.data().size();
std::vector<float> reversed(size);
for (size_t i = 0; i < size; i++) {
reversed[size - i - 1] = self.data()[i];
// Non-owning container exposing the underlying pointer
py::str dummyDataOwner; // Explicitly request data to not be owned by np
switch (self.dataType()) {
case kp::Tensor::TensorDataTypes::eFloat:
return py::array(self.size(), self.data<float>(), dummyDataOwner);
case kp::Tensor::TensorDataTypes::eUnsignedInt:
return py::array(self.size(), self.data<uint32_t>(), dummyDataOwner);
case kp::Tensor::TensorDataTypes::eInt:
return py::array(self.size(), self.data<int32_t>(), dummyDataOwner);
case kp::Tensor::TensorDataTypes::eDouble:
return py::array(self.size(), self.data<double>(), dummyDataOwner);
case kp::Tensor::TensorDataTypes::eBool:
return py::array(self.size(), self.data<bool>(), dummyDataOwner);
default:
throw std::runtime_error("Kompute Python data type not supported");
}
return reversed;
})
.def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
.def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
.def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.")
.def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.")
.def("destroy", &kp::Tensor::destroy, "Destroy tensor GPU resources.");
}, DOC(kp, Tensor, data))
.def("size", &kp::Tensor::size, DOC(kp, Tensor, size))
.def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size))
.def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType))
.def("data_type", &kp::Tensor::dataType, DOC(kp, Tensor, dataType))
.def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit))
.def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy));
py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
.def("record", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.record(op); })
.def("eval", [](kp::Sequence& self) { return self.eval(); })
.def("eval", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.eval(op); })
.def("eval_async", [](kp::Sequence& self) { return self.eval(); })
.def("eval_async", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.evalAsync(op); })
.def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); })
.def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); })
.def("is_recording", &kp::Sequence::isRecording)
.def("is_running", &kp::Sequence::isRunning)
.def("is_init", &kp::Sequence::isInit)
.def("get_timestamps", &kp::Sequence::getTimestamps)
.def("clear", &kp::Sequence::clear)
.def("destroy", &kp::Sequence::destroy);
.def("record", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.record(op); },
DOC(kp, Sequence, record))
.def("eval", [](kp::Sequence& self) { return self.eval(); },
DOC(kp, Sequence, eval))
.def("eval", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.eval(op); },
DOC(kp, Sequence, eval_2))
.def("eval_async", [](kp::Sequence& self) { return self.eval(); },
DOC(kp, Sequence, evalAwait))
.def("eval_async", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.evalAsync(op); },
DOC(kp, Sequence, evalAsync))
.def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); },
DOC(kp, Sequence, evalAwait))
.def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); },
DOC(kp, Sequence, evalAwait))
.def("is_recording", &kp::Sequence::isRecording,
DOC(kp, Sequence, isRecording))
.def("is_running", &kp::Sequence::isRunning,
DOC(kp, Sequence, isRunning))
.def("is_init", &kp::Sequence::isInit,
DOC(kp, Sequence, isInit))
.def("clear", &kp::Sequence::clear,
DOC(kp, Sequence, clear))
.def("rerecord", &kp::Sequence::rerecord,
DOC(kp, Sequence, rerecord))
.def("get_timestamps", &kp::Sequence::getTimestamps,
DOC(kp, Sequence, getTimestamps))
.def("destroy", &kp::Sequence::destroy,
DOC(kp, Sequence, destroy));
py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager")
.def(py::init())
.def(py::init<uint32_t>())
py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager", DOC(kp, Manager))
.def(py::init(), DOC(kp, Manager, Manager))
.def(py::init<uint32_t>(), DOC(kp, Manager, Manager_2))
.def(py::init<uint32_t,const std::vector<uint32_t>&,const std::vector<std::string>&>(),
DOC(kp, Manager, Manager_2),
py::arg("device") = 0,
py::arg("family_queue_indices") = std::vector<uint32_t>(),
py::arg("desired_extensions") = std::vector<std::string>())
.def("sequence", &kp::Manager::sequence, py::arg("queue_index") = 0, py::arg("total_timestamps") = 0)
.def("sequence", &kp::Manager::sequence, DOC(kp, Manager, sequence),
py::arg("queue_index") = 0, py::arg("total_timestamps") = 0)
.def("tensor", [np](kp::Manager& self,
const py::array_t<float> data,
const py::array_t<float>& data,
kp::Tensor::TensorTypes tensor_type) {
const py::array_t<float> flatdata = np.attr("ravel")(data);
const py::array_t<float>& flatdata = np.attr("ravel")(data);
const py::buffer_info info = flatdata.request();
const float* ptr = (float*) info.ptr;
return self.tensor(std::vector<float>(ptr, ptr+flatdata.size()), tensor_type);
KP_LOG_DEBUG("Kompute Python Manager tensor() creating tensor float with data size {}", flatdata.size());
return self.tensor(
info.ptr,
flatdata.size(),
sizeof(float),
kp::Tensor::TensorDataTypes::eFloat,
tensor_type);
},
"Tensor initialisation function with data and tensor type",
DOC(kp, Manager, tensor),
py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
.def("tensor_t", [np](kp::Manager& self,
const py::array& data,
kp::Tensor::TensorTypes tensor_type) {
// TODO: Suppport strides in numpy format
const py::array& flatdata = np.attr("ravel")(data);
const py::buffer_info info = flatdata.request();
KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with data size {} dtype {}",
flatdata.size(), std::string(py::str(flatdata.dtype())));
if (flatdata.dtype() == py::dtype::of<std::float_t>()) {
return self.tensor(
info.ptr, flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type);
} else if (flatdata.dtype() == py::dtype::of<std::uint32_t>()) {
return self.tensor(
info.ptr, flatdata.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type);
} else if (flatdata.dtype() == py::dtype::of<std::int32_t>()) {
return self.tensor(
info.ptr, flatdata.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type);
} else if (flatdata.dtype() == py::dtype::of<std::double_t>()) {
return self.tensor(
info.ptr, flatdata.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type);
} else if (flatdata.dtype() == py::dtype::of<bool>()) {
return self.tensor(
info.ptr, flatdata.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type);
} else {
throw std::runtime_error("Kompute Python no valid dtype supported");
}
},
DOC(kp, Manager, tensorT),
py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
.def("algorithm", [](kp::Manager& self,
const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
@ -163,8 +212,12 @@ PYBIND11_MODULE(kp, m) {
std::vector<uint32_t> spirvVec((uint32_t*)data, (uint32_t*)(data + length));
return self.algorithm(tensors, spirvVec, workgroup, spec_consts, push_consts);
},
"Algorithm initialisation function",
py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), py::arg("spec_consts") = kp::Constants(), py::arg("push_consts") = kp::Constants());
DOC(kp, Manager, algorithm),
py::arg("tensors"),
py::arg("spirv"),
py::arg("workgroup") = kp::Workgroup(),
py::arg("spec_consts") = kp::Constants(),
py::arg("push_consts") = kp::Constants());
#ifdef VERSION_INFO
m.attr("__version__") = VERSION_INFO;

View file

@ -9,9 +9,9 @@ def test_array_multiplication():
mgr = kp.Manager()
# 2. Create Kompute Tensors to hold data
tensor_in_a = mgr.tensor([2, 2, 2])
tensor_in_b = mgr.tensor([1, 2, 3])
tensor_out = mgr.tensor([0, 0, 0])
tensor_in_a = mgr.tensor(np.array([2, 2, 2]))
tensor_in_b = mgr.tensor(np.array([1, 2, 3]))
tensor_out = mgr.tensor(np.array([0, 0, 0]))
params = [tensor_in_a, tensor_in_b, tensor_out]

View file

@ -9,35 +9,15 @@ DIRNAME = os.path.dirname(os.path.abspath(__file__))
kp_log = logging.getLogger("kp")
# TODO: Add example with file
#def test_opalgobase_file():
# """
# Test basic OpMult operation
# """
#
# tensor_in_a = kp.Tensor([2, 2, 2])
# tensor_in_b = kp.Tensor([1, 2, 3])
# tensor_out = kp.Tensor([0, 0, 0])
#
# mgr = kp.Manager()
# mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
#
# shader_path = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv")
#
# mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)
#
# mgr.eval_tensor_sync_local_def([tensor_out])
#
# assert tensor_out.data() == [2.0, 4.0, 6.0]
def test_end_to_end():
mgr = kp.Manager()
tensor_in_a = mgr.tensor([2, 2, 2])
tensor_in_b = mgr.tensor([1, 2, 3])
tensor_out_a = mgr.tensor([0, 0, 0])
tensor_out_b = mgr.tensor([0, 0, 0])
# Explicit type constructor supports int, in32, double, float and int
tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
@ -49,8 +29,8 @@ def test_end_to_end():
// The input tensors bind index is relative to index in parameter passed
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
// Kompute supports push constants updated on dispatch
layout(push_constant) uniform PushConstants {
@ -62,8 +42,8 @@ def test_end_to_end():
void main() {
uint index = gl_GlobalInvocationID.x;
out_a[index] += in_a[index] * in_b[index];
out_b[index] += const_one * push_const.val;
out_a[index] += uint( in_a[index] * in_b[index] );
out_b[index] += uint( const_one * push_const.val );
}
"""

View file

@ -1,4 +1,5 @@
import pyshader as ps
import numpy as np
import kp
def test_logistic_regression():
@ -46,21 +47,21 @@ def test_logistic_regression():
mgr = kp.Manager(0)
# First we create input and ouput tensors for shader
tensor_x_i = mgr.tensor([0.0, 1.0, 1.0, 1.0, 1.0])
tensor_x_j = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
tensor_x_i = mgr.tensor(np.array([0.0, 1.0, 1.0, 1.0, 1.0]))
tensor_x_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0]))
tensor_y = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
tensor_y = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0]))
tensor_w_in = mgr.tensor([0.001, 0.001])
tensor_w_out_i = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_w_out_j = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_w_in = mgr.tensor(np.array([0.001, 0.001]))
tensor_w_out_i = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
tensor_w_out_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
tensor_b_in = mgr.tensor([0.0])
tensor_b_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_b_in = mgr.tensor(np.array([0.0]))
tensor_b_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
tensor_l_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
tensor_l_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
tensor_m = mgr.tensor([ tensor_y.size() ])
tensor_m = mgr.tensor(np.array([ tensor_y.size() ]))
# We store them in an array for easier interaction
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
@ -91,9 +92,9 @@ def test_logistic_regression():
# Calculate the parameters based on the respective derivatives calculated
for j_iter in range(tensor_b_out.size()):
tensor_w_in[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
tensor_w_in[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
tensor_b_in[0] -= learning_rate * tensor_b_out.data()[j_iter]
tensor_w_in.data()[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
tensor_w_in.data()[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
tensor_b_in.data()[0] -= learning_rate * tensor_b_out.data()[j_iter]
assert tensor_w_in.data()[0] < 0.01
assert tensor_w_in.data()[0] > 0.0

View file

@ -0,0 +1,206 @@
import pyshader as ps
import os
import pytest
import kp
import numpy as np
def test_type_float():
shader = """
#version 450
layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint index = gl_GlobalInvocationID.x;
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
}
"""
spirv = kp.Shader.compile_source(shader)
arr_in_a = np.array([123., 153., 231.], dtype=np.float32)
arr_in_b = np.array([9482, 1208, 1238], dtype=np.float32)
arr_out = np.array([0, 0, 0], dtype=np.float32)
mgr = kp.Manager()
tensor_in_a = mgr.tensor(arr_in_a)
tensor_in_b = mgr.tensor(arr_in_b)
tensor_out = mgr.tensor(arr_out)
params = [tensor_in_a, tensor_in_b, tensor_out]
(mgr.sequence()
.record(kp.OpTensorSyncDevice(params))
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
.record(kp.OpTensorSyncLocal([tensor_out]))
.eval())
assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
def test_type_float_double_incorrect():
shader = """
#version 450
layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint index = gl_GlobalInvocationID.x;
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
}
"""
spirv = kp.Shader.compile_source(shader)
arr_in_a = np.array([123., 153., 231.], dtype=np.float32)
arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32)
arr_out = np.array([0, 0, 0], dtype=np.float32)
mgr = kp.Manager()
tensor_in_a = mgr.tensor_t(arr_in_a)
tensor_in_b = mgr.tensor_t(arr_in_b)
tensor_out = mgr.tensor_t(arr_out)
params = [tensor_in_a, tensor_in_b, tensor_out]
(mgr.sequence()
.record(kp.OpTensorSyncDevice(params))
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
.record(kp.OpTensorSyncLocal([tensor_out]))
.eval())
assert np.all(tensor_out.data() != arr_in_a * arr_in_b)
@pytest.mark.skipif("swiftshader" in os.environ.get("VK_ICD_FILENAMES"),
reason="Swiftshader doesn't support double")
def test_type_double():
shader = """
#version 450
layout(set = 0, binding = 0) buffer tensorLhs { double valuesLhs[]; };
layout(set = 0, binding = 1) buffer tensorRhs { double valuesRhs[]; };
layout(set = 0, binding = 2) buffer tensorOutput { double valuesOutput[]; };
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint index = gl_GlobalInvocationID.x;
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
}
"""
spirv = kp.Shader.compile_source(shader)
arr_in_a = np.array([123., 153., 231.], dtype=np.float64)
arr_in_b = np.array([9482, 1208, 1238], dtype=np.float64)
arr_out = np.array([0, 0, 0], dtype=np.float64)
mgr = kp.Manager()
tensor_in_a = mgr.tensor_t(arr_in_a)
tensor_in_b = mgr.tensor_t(arr_in_b)
tensor_out = mgr.tensor_t(arr_out)
params = [tensor_in_a, tensor_in_b, tensor_out]
(mgr.sequence()
.record(kp.OpTensorSyncDevice(params))
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
.record(kp.OpTensorSyncLocal([tensor_out]))
.eval())
print(f"Dtype value {tensor_out.data().dtype}")
assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
def test_type_int():
shader = """
#version 450
layout(set = 0, binding = 0) buffer tensorLhs { int valuesLhs[]; };
layout(set = 0, binding = 1) buffer tensorRhs { int valuesRhs[]; };
layout(set = 0, binding = 2) buffer tensorOutput { int valuesOutput[]; };
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint index = gl_GlobalInvocationID.x;
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
}
"""
spirv = kp.Shader.compile_source(shader)
arr_in_a = np.array([123, 153, 231], dtype=np.int32)
arr_in_b = np.array([9482, 1208, 1238], dtype=np.int32)
arr_out = np.array([0, 0, 0], dtype=np.int32)
mgr = kp.Manager()
tensor_in_a = mgr.tensor_t(arr_in_a)
tensor_in_b = mgr.tensor_t(arr_in_b)
tensor_out = mgr.tensor_t(arr_out)
params = [tensor_in_a, tensor_in_b, tensor_out]
(mgr.sequence()
.record(kp.OpTensorSyncDevice(params))
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
.record(kp.OpTensorSyncLocal([tensor_out]))
.eval())
print(f"Dtype value {tensor_out.data().dtype}")
assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
def test_type_unsigned_int():
shader = """
#version 450
layout(set = 0, binding = 0) buffer tensorLhs { uint valuesLhs[]; };
layout(set = 0, binding = 1) buffer tensorRhs { uint valuesRhs[]; };
layout(set = 0, binding = 2) buffer tensorOutput { uint valuesOutput[]; };
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint index = gl_GlobalInvocationID.x;
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
}
"""
spirv = kp.Shader.compile_source(shader)
arr_in_a = np.array([123, 153, 231], dtype=np.uint32)
arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32)
arr_out = np.array([0, 0, 0], dtype=np.uint32)
mgr = kp.Manager()
tensor_in_a = mgr.tensor_t(arr_in_a)
tensor_in_b = mgr.tensor_t(arr_in_b)
tensor_out = mgr.tensor_t(arr_out)
params = [tensor_in_a, tensor_in_b, tensor_out]
(mgr.sequence()
.record(kp.OpTensorSyncDevice(params))
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
.record(kp.OpTensorSyncLocal([tensor_out]))
.eval())
print(f"Dtype value {tensor_out.data().dtype}")
assert np.all(tensor_out.data() == arr_in_a * arr_in_b)

View file

@ -57,7 +57,7 @@ class CMakeBuild(build_ext):
else:
cmake_args += ['-DKOMPUTE_EXTRA_CXX_FLAGS="-fPIC"']
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
build_args += ['--', '-j2']
build_args += ['--', '-j']
env = os.environ.copy()
env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),

View file

@ -762,7 +762,7 @@ class Shader
* GLSL compiler
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static std::vector<uint32_t> compile_sources(
static std::vector<uint32_t> compileSources(
const std::vector<std::string>& sources,
const std::vector<std::string>& files = {},
const std::string& entryPoint = "main",
@ -783,7 +783,7 @@ class Shader
* GLSL compiler
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static std::vector<uint32_t> compile_source(
static std::vector<uint32_t> compileSource(
const std::string& source,
const std::string& entryPoint = "main",
std::vector<std::pair<std::string, std::string>> definitions = {},
@ -818,6 +818,14 @@ class Tensor
eHost = 1, ///< Type is host memory, source and destination
eStorage = 2, ///< Type is Device memory (only)
};
enum class TensorDataTypes
{
eBool = 0,
eInt = 1,
eUnsignedInt = 2,
eFloat = 3,
eDouble = 4,
};
/**
* Constructor with data provided which would be used to create the
@ -831,14 +839,17 @@ class Tensor
*/
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<float>& data,
void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize,
const TensorDataTypes& dataType,
const TensorTypes& tensorType = TensorTypes::eDevice);
/**
* Destructor which is in charge of freeing vulkan resources unless they
* have been provided externally.
*/
~Tensor();
virtual ~Tensor();
/**
* Function to trigger reinitialisation of the tensor buffer and memory with
@ -847,8 +858,9 @@ class Tensor
* @param data Vector of data to use to initialise vector from
* @param tensorType The type to use for the tensor
*/
void rebuild(const std::vector<float>& data,
TensorTypes tensorType = TensorTypes::eDevice);
void rebuild(void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize);
/**
* Destroys and frees the GPU resources which include the buffer and memory.
@ -862,32 +874,6 @@ class Tensor
*/
bool isInit();
/**
* Returns the vector of data currently contained by the Tensor. It is
* important to ensure that there is no out-of-sync data with the GPU
* memory.
*
* @return Reference to vector of elements representing the data in the
* tensor.
*/
std::vector<float>& data();
/**
* Overrides the subscript operator to expose the underlying data's
* subscript operator which in this case would be its underlying
* vector's.
*
* @param i The index where the element will be returned from.
* @return Returns the element in the position requested.
*/
float& operator[](int index);
/**
* Returns the size/magnitude of the Tensor, which will be the total number
* of elements across all dimensions
*
* @return Unsigned integer representing the total number of elements
*/
uint32_t size();
/**
* Retrieve the tensor type of the Tensor
*
@ -895,12 +881,6 @@ class Tensor
*/
TensorTypes tensorType();
/**
* Sets / resets the vector data of the tensor. This function does not
* perform any copies into GPU memory and is only performed on the host.
*/
void setData(const std::vector<float>& data);
/**
* Records a copy from the memory of the tensor provided to the current
* thensor. This is intended to pass memory into a processing, to perform
@ -963,18 +943,118 @@ class Tensor
* @return Descriptor buffer info with own buffer
*/
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
/**
* Maps data from the Host Visible GPU memory into the data vector. It
* requires the Tensor to be of staging type for it to work.
* Returns the size/magnitude of the Tensor, which will be the total number
* of elements across all dimensions
*
* @return Unsigned integer representing the total number of elements
*/
void mapDataFromHostMemory();
// TODO: move to cpp
uint32_t size() {
return this->mSize;
}
// TODO: move to cpp
uint32_t dataTypeMemorySize() {
return this->mDataTypeMemorySize;
}
// TODO: move to cpp
uint32_t memorySize() {
return this->mSize * this->mDataTypeMemorySize;
}
/**
* Maps data from the data vector into the Host Visible GPU memory. It
* requires the tensor to be of staging type for it to work.
* Retrieve the underlying data type of the Tensor
*
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
*/
void mapDataIntoHostMemory();
TensorDataTypes dataType() {
return this->mDataType;
}
void* rawData() {
return this->mRawData;
}
// TODO: move to cpp
template <typename T>
T* data() {
return (T*)this->mRawData;
}
template <typename T>
std::vector<T> vector() {
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
/**
* Sets / resets the vector data of the tensor. This function does not
* perform any copies into GPU memory and is only performed on the host.
*/
void setRawData(const void* data)
{
// Copy data
memcpy(this->mRawData, data, this->memorySize());
}
protected:
// -------------- ALWAYS OWNED RESOURCES
TensorTypes mTensorType;
TensorDataTypes mDataType;
uint32_t mSize;
uint32_t mDataTypeMemorySize;
void* mRawData;
private:
void mapRawData() {
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
// Given we request coherent host memory we don't need to invalidate / flush
this->mRawData = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
}
void unmapRawData() {
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*hostVisibleMemory);
}
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
std::shared_ptr<vk::Device> mDevice;
@ -989,11 +1069,6 @@ class Tensor
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
bool mFreeStagingMemory = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<float> mData;
TensorTypes mTensorType = TensorTypes::eDevice;
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
vk::BufferUsageFlags bufferUsageFlags);
@ -1012,7 +1087,60 @@ class Tensor
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
uint64_t memorySize();
};
// TODO: Limit T to be only float, bool, double, etc
template <typename T>
class TensorT: public Tensor
{
public:
TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<T>& data,
const TensorTypes& tensorType = TensorTypes::eDevice)
: Tensor(physicalDevice,
device,
(void*)data.data(),
data.size(),
sizeof(T),
this->dataType(),
tensorType)
{
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size());
}
~TensorT() {
KP_LOG_DEBUG("Kompute TensorT destructor");
}
T* data() {
return (T*)this->mRawData;
}
std::vector<T> vector() {
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
T& operator[](int index) {
return *(((T*)this->mRawData) + index);
}
void setData(const std::vector<T>& data) {
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size());
if (data.size() != this->mSize) {
throw std::runtime_error(
"Kompute TensorT Cannot set data of different sizes");
}
Tensor::setRawData(data.data());
}
TensorDataTypes dataType();
};
} // End namespace kp
@ -1873,7 +2001,7 @@ class Manager
* If zero (default), disables latching of timestamps.
* @returns Shared pointer with initialised sequence
*/
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0);
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0);
/**
* Create a managed tensor that will be destroyed by this manager
@ -1883,9 +2011,46 @@ class Manager
* @param tensorType The type of tensor to initialize
* @returns Shared pointer with initialised tensor
*/
std::shared_ptr<Tensor> tensor(
template <typename T>
std::shared_ptr<TensorT<T>> tensorT(
const std::vector<T>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
this->mPhysicalDevice, this->mDevice, data, tensorType) };
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);
}
return tensor;
}
std::shared_ptr<TensorT<float>> tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
return this->tensorT<float>(data, tensorType);
}
std::shared_ptr<Tensor> tensor(
void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize,
const Tensor::TensorDataTypes& dataType,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) };
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);
}
return tensor;
}
/**
* Create a managed algorithm that will be destroyed by this manager

View file

@ -395,21 +395,6 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
}
std::shared_ptr<Tensor>
Manager::tensor(const std::vector<float>& data, Tensor::TensorTypes tensorType)
{
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
this->mPhysicalDevice, this->mDevice, data, tensorType) };
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);
}
return tensor;
}
std::shared_ptr<Algorithm>
Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,

View file

@ -13,6 +13,20 @@ OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
throw std::runtime_error(
"Kompute OpTensorCopy called with less than 2 tensor");
}
kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType();
uint32_t size = this->mTensors[0]->size();
for (const std::shared_ptr<Tensor>& tensor : tensors) {
if (tensor->dataType() != dataType) {
throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}",
dataType, tensor->dataType()));
}
if (tensor->size() != size) {
throw std::runtime_error(fmt::format("Attempting to copy tensors of different sizes from {} to {}",
size, tensor->size()));
}
}
}
OpTensorCopy::~OpTensorCopy()
@ -43,9 +57,15 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");
// TODO: Simplify with a copyRawData
uint32_t size = this->mTensors[0]->size();
uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize();
uint32_t memSize = size * dataTypeMemSize;
void* data = this->mTensors[0]->rawData();
// Copy the data from the first tensor into all the tensors
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->setData(this->mTensors[0]->data());
this->mTensors[i]->setRawData(data);
}
}

View file

@ -41,12 +41,6 @@ OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
// Performing sync of data as eval can be called multiple times with same op
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
this->mTensors[i]->mapDataIntoHostMemory();
}
}
}
void

View file

@ -48,11 +48,6 @@ OpTensorSyncLocal::postEval(const vk::CommandBuffer& commandBuffer)
KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called");
KP_LOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
this->mTensors[i]->mapDataFromHostMemory();
}
}
}
}

View file

@ -5,7 +5,7 @@
namespace kp {
std::vector<uint32_t>
Shader::compile_sources(
Shader::compileSources(
const std::vector<std::string>& sources,
const std::vector<std::string>& files,
const std::string& entryPoint,
@ -92,13 +92,13 @@ Shader::compile_sources(
}
std::vector<uint32_t>
Shader::compile_source(
Shader::compileSource(
const std::string& source,
const std::string& entryPoint,
std::vector<std::pair<std::string, std::string>> definitions,
const TBuiltInResource& resource)
{
return compile_sources({ source },
return compileSources({ source },
std::vector<std::string>({}),
entryPoint,
definitions,

View file

@ -5,17 +5,22 @@ namespace kp {
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<float>& data,
void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize,
const TensorDataTypes& dataType,
const TensorTypes& tensorType)
{
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
data.size(),
elementTotalCount,
tensorType);
this->mPhysicalDevice = physicalDevice;
this->mDevice = device;
this->mDataType = dataType;
this->mTensorType = tensorType;
this->rebuild(data, tensorType);
this->rebuild(data, elementTotalCount, elementMemorySize);
}
Tensor::~Tensor()
@ -29,12 +34,14 @@ Tensor::~Tensor()
}
void
Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
Tensor::rebuild(void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize)
{
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size());
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount);
this->mData = data;
this->mTensorType = tensorType;
this->mSize = elementTotalCount;
this->mDataTypeMemorySize = elementMemorySize;
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
KP_LOG_DEBUG(
@ -43,30 +50,9 @@ Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
}
this->allocateMemoryCreateGPUResources();
}
this->mapRawData();
std::vector<float>&
Tensor::data()
{
return this->mData;
}
float&
Tensor::operator[](int index)
{
return this->mData[index];
}
uint64_t
Tensor::memorySize()
{
return this->size() * sizeof(float);
}
uint32_t
Tensor::size()
{
return static_cast<uint32_t>(this->mData.size());
memcpy(this->mRawData, data, this->memorySize());
}
Tensor::TensorTypes
@ -78,18 +64,12 @@ Tensor::tensorType()
bool
Tensor::isInit()
{
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory;
return this->mDevice
&& this->mPrimaryBuffer
&& this->mPrimaryMemory
&& this->mRawData;
}
void
Tensor::setData(const std::vector<float>& data)
{
if (data.size() != this->mData.size()) {
throw std::runtime_error(
"Kompute Tensor Cannot set data of different sizes");
}
this->mData = data;
}
void
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
@ -195,66 +175,13 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::DescriptorBufferInfo
Tensor::constructDescriptorBufferInfo()
{
KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize());
vk::DeviceSize bufferSize = this->memorySize();
return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
0, // offset
bufferSize);
}
void
Tensor::mapDataFromHostMemory()
{
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
void* mapped = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
memcpy(this->mData.data(), mapped, bufferSize);
this->mDevice->unmapMemory(*hostVisibleMemory);
}
void
Tensor::mapDataIntoHostMemory()
{
KP_LOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
void* mapped = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
memcpy(mapped, this->mData.data(), bufferSize);
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*hostVisibleMemory);
}
vk::BufferUsageFlags
Tensor::getPrimaryBufferUsageFlags()
{
@ -285,7 +212,8 @@ Tensor::getPrimaryMemoryPropertyFlags()
return vk::MemoryPropertyFlagBits::eDeviceLocal;
break;
case TensorTypes::eHost:
return vk::MemoryPropertyFlagBits::eHostVisible;
return vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent;
break;
case TensorTypes::eStorage:
return vk::MemoryPropertyFlagBits::eDeviceLocal;
@ -435,12 +363,20 @@ Tensor::destroy()
{
KP_LOG_DEBUG("Kompute Tensor started destroy()");
// Setting raw data to null regardless whether device is available to invalidate Tensor
this->mRawData = nullptr;
this->mSize = 0;
this->mDataTypeMemorySize = 0;
if (!this->mDevice) {
KP_LOG_WARN(
"Kompute Tensor destructor reached with null Device pointer");
return;
}
// Unmap the current memory data
this->unmapRawData();
if (this->mFreePrimaryBuffer) {
if (!this->mPrimaryBuffer) {
KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer "
@ -504,4 +440,34 @@ Tensor::destroy()
KP_LOG_DEBUG("Kompute Tensor successful destroy()");
}
template<>
Tensor::TensorDataTypes
TensorT<bool>::dataType() {
return Tensor::TensorDataTypes::eBool;
}
template<>
Tensor::TensorDataTypes
TensorT<int32_t>::dataType() {
return Tensor::TensorDataTypes::eInt;
}
template<>
Tensor::TensorDataTypes
TensorT<uint32_t>::dataType() {
return Tensor::TensorDataTypes::eUnsignedInt;
}
template<>
Tensor::TensorDataTypes
TensorT<float>::dataType() {
return Tensor::TensorDataTypes::eFloat;
}
template<>
Tensor::TensorDataTypes
TensorT<double>::dataType() {
return Tensor::TensorDataTypes::eDouble;
}
}

View file

@ -74,9 +74,46 @@ class Manager
* @param tensorType The type of tensor to initialize
* @returns Shared pointer with initialised tensor
*/
std::shared_ptr<Tensor> tensor(
template <typename T>
std::shared_ptr<TensorT<T>> tensorT(
const std::vector<T>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
this->mPhysicalDevice, this->mDevice, data, tensorType) };
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);
}
return tensor;
}
std::shared_ptr<TensorT<float>> tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
return this->tensorT<float>(data, tensorType);
}
std::shared_ptr<Tensor> tensor(
void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize,
const Tensor::TensorDataTypes& dataType,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) };
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);
}
return tensor;
}
/**
* Create a managed algorithm that will be destroyed by this manager

View file

@ -39,7 +39,7 @@ class Shader
* GLSL compiler
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static std::vector<uint32_t> compile_sources(
static std::vector<uint32_t> compileSources(
const std::vector<std::string>& sources,
const std::vector<std::string>& files = {},
const std::string& entryPoint = "main",
@ -60,7 +60,7 @@ class Shader
* GLSL compiler
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static std::vector<uint32_t> compile_source(
static std::vector<uint32_t> compileSource(
const std::string& source,
const std::string& entryPoint = "main",
std::vector<std::pair<std::string, std::string>> definitions = {},

View file

@ -27,6 +27,14 @@ class Tensor
eHost = 1, ///< Type is host memory, source and destination
eStorage = 2, ///< Type is Device memory (only)
};
enum class TensorDataTypes
{
eBool = 0,
eInt = 1,
eUnsignedInt = 2,
eFloat = 3,
eDouble = 4,
};
/**
* Constructor with data provided which would be used to create the
@ -40,14 +48,17 @@ class Tensor
*/
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<float>& data,
void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize,
const TensorDataTypes& dataType,
const TensorTypes& tensorType = TensorTypes::eDevice);
/**
* Destructor which is in charge of freeing vulkan resources unless they
* have been provided externally.
*/
~Tensor();
virtual ~Tensor();
/**
* Function to trigger reinitialisation of the tensor buffer and memory with
@ -56,8 +67,9 @@ class Tensor
* @param data Vector of data to use to initialise vector from
* @param tensorType The type to use for the tensor
*/
void rebuild(const std::vector<float>& data,
TensorTypes tensorType = TensorTypes::eDevice);
void rebuild(void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize);
/**
* Destroys and frees the GPU resources which include the buffer and memory.
@ -71,32 +83,6 @@ class Tensor
*/
bool isInit();
/**
* Returns the vector of data currently contained by the Tensor. It is
* important to ensure that there is no out-of-sync data with the GPU
* memory.
*
* @return Reference to vector of elements representing the data in the
* tensor.
*/
std::vector<float>& data();
/**
* Overrides the subscript operator to expose the underlying data's
* subscript operator which in this case would be its underlying
* vector's.
*
* @param i The index where the element will be returned from.
* @return Returns the element in the position requested.
*/
float& operator[](int index);
/**
* Returns the size/magnitude of the Tensor, which will be the total number
* of elements across all dimensions
*
* @return Unsigned integer representing the total number of elements
*/
uint32_t size();
/**
* Retrieve the tensor type of the Tensor
*
@ -104,12 +90,6 @@ class Tensor
*/
TensorTypes tensorType();
/**
* Sets / resets the vector data of the tensor. This function does not
* perform any copies into GPU memory and is only performed on the host.
*/
void setData(const std::vector<float>& data);
/**
* Records a copy from the memory of the tensor provided to the current
* thensor. This is intended to pass memory into a processing, to perform
@ -172,18 +152,118 @@ class Tensor
* @return Descriptor buffer info with own buffer
*/
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
/**
* Maps data from the Host Visible GPU memory into the data vector. It
* requires the Tensor to be of staging type for it to work.
* Returns the size/magnitude of the Tensor, which will be the total number
* of elements across all dimensions
*
* @return Unsigned integer representing the total number of elements
*/
void mapDataFromHostMemory();
// TODO: move to cpp
uint32_t size() {
return this->mSize;
}
// TODO: move to cpp
uint32_t dataTypeMemorySize() {
return this->mDataTypeMemorySize;
}
// TODO: move to cpp
uint32_t memorySize() {
return this->mSize * this->mDataTypeMemorySize;
}
/**
* Maps data from the data vector into the Host Visible GPU memory. It
* requires the tensor to be of staging type for it to work.
* Retrieve the underlying data type of the Tensor
*
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
*/
void mapDataIntoHostMemory();
TensorDataTypes dataType() {
return this->mDataType;
}
void* rawData() {
return this->mRawData;
}
// TODO: move to cpp
template <typename T>
T* data() {
return (T*)this->mRawData;
}
template <typename T>
std::vector<T> vector() {
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
/**
* Sets / resets the vector data of the tensor. This function does not
* perform any copies into GPU memory and is only performed on the host.
*/
void setRawData(const void* data)
{
// Copy data
memcpy(this->mRawData, data, this->memorySize());
}
protected:
// -------------- ALWAYS OWNED RESOURCES
TensorTypes mTensorType;
TensorDataTypes mDataType;
uint32_t mSize;
uint32_t mDataTypeMemorySize;
void* mRawData;
private:
void mapRawData() {
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
// Given we request coherent host memory we don't need to invalidate / flush
this->mRawData = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
}
void unmapRawData() {
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*hostVisibleMemory);
}
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
std::shared_ptr<vk::Device> mDevice;
@ -198,11 +278,6 @@ class Tensor
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
bool mFreeStagingMemory = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<float> mData;
TensorTypes mTensorType = TensorTypes::eDevice;
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
vk::BufferUsageFlags bufferUsageFlags);
@ -221,7 +296,60 @@ class Tensor
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
uint64_t memorySize();
};
// TODO: Limit T to be only float, bool, double, etc
template <typename T>
class TensorT: public Tensor
{
public:
TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<T>& data,
const TensorTypes& tensorType = TensorTypes::eDevice)
: Tensor(physicalDevice,
device,
(void*)data.data(),
data.size(),
sizeof(T),
this->dataType(),
tensorType)
{
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size());
}
~TensorT() {
KP_LOG_DEBUG("Kompute TensorT destructor");
}
T* data() {
return (T*)this->mRawData;
}
std::vector<T> vector() {
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
T& operator[](int index) {
return *(((T*)this->mRawData) + index);
}
void setData(const std::vector<T>& data) {
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size());
if (data.size() != this->mSize) {
throw std::runtime_error(
"Kompute TensorT Cannot set data of different sizes");
}
Tensor::setRawData(data.data());
}
TensorDataTypes dataType();
};
} // End namespace kp

View file

@ -37,7 +37,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
}
)");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::vector<float> data(size, 0.0);
std::vector<float> resultSync(size, 100000000);
@ -73,7 +73,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
sq->eval<kp::OpTensorSyncLocal>(inputsSyncB);
for (uint32_t i = 0; i < numParallel; i++) {
EXPECT_EQ(inputsSyncB[i]->data(), resultSync);
EXPECT_EQ(inputsSyncB[i]->vector<float>(), resultSync);
}
kp::Manager mgrAsync(0, { 0, 2 });
@ -111,7 +111,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
sq->eval<kp::OpTensorSyncLocal>({ inputsAsyncB });
for (uint32_t i = 0; i < numParallel; i++) {
EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync);
EXPECT_EQ((inputsAsyncB[i]->vector<float>()), resultAsync);
}
// The speedup should be at least 40%
@ -145,15 +145,15 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
}
)");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::vector<float> data(size, 0.0);
std::vector<float> resultAsync(size, 100000000);
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(data);
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(data);
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(data);
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(data);
std::shared_ptr<kp::Sequence> sq1 = mgr.sequence();
std::shared_ptr<kp::Sequence> sq2 = mgr.sequence();
@ -172,6 +172,6 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
sq1->evalAsync<kp::OpTensorSyncLocal>({ tensorA, tensorB });
sq1->evalAwait();
EXPECT_EQ(tensorA->data(), resultAsync);
EXPECT_EQ(tensorB->data(), resultAsync);
EXPECT_EQ(tensorA->vector(), resultAsync);
EXPECT_EQ(tensorB->vector(), resultAsync);
}

View file

@ -5,9 +5,9 @@
TEST(TestDestroy, TestDestroyTensorSingle)
{
std::shared_ptr<kp::Tensor> tensorA = nullptr;
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
std::string shader(R"(
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
@ -16,7 +16,7 @@ TEST(TestDestroy, TestDestroyTensorSingle)
pa[index] = pa[index] + 1;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
{
std::shared_ptr<kp::Sequence> sq = nullptr;
@ -34,18 +34,19 @@ TEST(TestDestroy, TestDestroyTensorSingle)
->eval()
->eval<kp::OpTensorSyncLocal>(algo->getTensors());
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
tensorA->destroy();
EXPECT_FALSE(tensorA->isInit());
}
EXPECT_FALSE(tensorA->isInit());
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}
TEST(TestDestroy, TestDestroyTensorVector)
{
std::shared_ptr<kp::Tensor> tensorA = nullptr;
std::shared_ptr<kp::Tensor> tensorB = nullptr;
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
std::shared_ptr<kp::TensorT<float>> tensorB = nullptr;
std::string shader(R"(
#version 450
@ -57,7 +58,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
pa[index] = pa[index] + 1;
pb[index] = pb[index] + 2;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
{
std::shared_ptr<kp::Sequence> sq = nullptr;
@ -77,6 +78,9 @@ TEST(TestDestroy, TestDestroyTensorVector)
->record<kp::OpTensorSyncLocal>(algo->getTensors())
->eval();
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 2, 2, 2 }));
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 3, 3 }));
tensorA->destroy();
tensorB->destroy();
@ -84,13 +88,11 @@ TEST(TestDestroy, TestDestroyTensorVector)
EXPECT_FALSE(tensorB->isInit());
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 3, 3 }));
}
TEST(TestDestroy, TestDestroySequenceSingle)
{
std::shared_ptr<kp::Tensor> tensorA = nullptr;
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
std::string shader(R"(
#version 450
@ -101,7 +103,7 @@ TEST(TestDestroy, TestDestroySequenceSingle)
pa[index] = pa[index] + 1;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
{
std::shared_ptr<kp::Sequence> sq = nullptr;
@ -121,7 +123,8 @@ TEST(TestDestroy, TestDestroySequenceSingle)
sq->destroy();
EXPECT_FALSE(sq->isInit());
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}

View file

@ -14,19 +14,19 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::TensorT<float>> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
std::shared_ptr<kp::TensorT<float>> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::TensorT<float>> y = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wIn = mgr.tensor({ 0.001, 0.001 });
std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> bIn = mgr.tensor({ 0 });
std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
wIn, wOutI, wOutJ,
@ -88,21 +88,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::TensorT<float>> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
std::shared_ptr<kp::TensorT<float>> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::TensorT<float>> y = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::Tensor> wIn =
std::shared_ptr<kp::TensorT<float>> wIn =
mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> bIn =
std::shared_ptr<kp::TensorT<float>> bIn =
mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
wIn, wOutI, wOutJ,
@ -136,8 +136,6 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
wIn->data()[1] -= learningRate * wOutJ->data()[j];
bIn->data()[0] -= learningRate * bOut->data()[j];
}
wIn->mapDataIntoHostMemory();
bIn->mapDataIntoHostMemory();
}
// Based on the inputs the outputs should be at least:

View file

@ -7,9 +7,9 @@ TEST(TestManager, EndToEndOpMultEvalFlow)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
tensorRHS,
@ -20,16 +20,16 @@ TEST(TestManager, EndToEndOpMultEvalFlow)
->eval<kp::OpMult>(params, mgr.algorithm())
->eval<kp::OpTensorSyncLocal>(params);
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
}
TEST(TestManager, EndToEndOpMultSeqFlow)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
tensorRHS,
@ -41,16 +41,16 @@ TEST(TestManager, EndToEndOpMultSeqFlow)
->record<kp::OpTensorSyncLocal>(params)
->eval();
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
}
TEST(TestManager, TestMultipleSequences)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
tensorRHS,
@ -60,5 +60,5 @@ TEST(TestManager, TestMultipleSequences)
mgr.sequence()->eval<kp::OpMult>(params, mgr.algorithm());
mgr.sequence()->eval<kp::OpTensorSyncLocal>(params);
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
}

View file

@ -8,10 +8,12 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
kp::Manager mgr;
// Default tensor constructor simplifies creation of float values
auto tensorInA = mgr.tensor({ 2., 2., 2. });
auto tensorInB = mgr.tensor({ 1., 2., 3. });
auto tensorOutA = mgr.tensor({ 0., 0., 0. });
auto tensorOutB = mgr.tensor({ 0., 0., 0. });
// Explicit type constructor supports int, in32, double, float and int
auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
std::string shader = (R"(
#version 450
@ -21,8 +23,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
// The input tensors bind index is relative to index in parameter passed
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
// Kompute supports push constants updated on dispatch
layout(push_constant) uniform PushConstants {
@ -34,8 +36,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
void main() {
uint index = gl_GlobalInvocationID.x;
out_a[index] += in_a[index] * in_b[index];
out_b[index] += const_one * push_const.val;
out_a[index] += uint( in_a[index] * in_b[index] );
out_b[index] += uint( const_one * push_const.val );
}
)");
@ -49,7 +51,7 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
kp::Constants pushConstsB({ 3.0 });
auto algorithm = mgr.algorithm(
params, kp::Shader::compile_source(shader), workgroup, specConsts, pushConstsA);
params, kp::Shader::compileSource(shader), workgroup, specConsts, pushConstsA);
// 3. Run operation with string shader synchronously
mgr.sequence()
@ -64,8 +66,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
sq->evalAwait();
EXPECT_EQ(tensorOutA->data(), std::vector<float>({ 4, 8, 12 }));
EXPECT_EQ(tensorOutB->data(), std::vector<float>({ 10, 10, 10 }));
EXPECT_EQ(tensorOutA->vector(), std::vector<uint32_t>({ 4, 8, 12 }));
EXPECT_EQ(tensorOutB->vector(), std::vector<uint32_t>({ 10, 10, 10 }));
}
TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
@ -73,7 +75,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
std::string shader(R"(
#version 450
@ -84,7 +86,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
pa[index] = pa[index] + 1;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
{
mgr.sequence()
@ -96,14 +98,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
->eval();
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
}
TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
std::string shader(R"(
#version 450
@ -114,7 +116,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
pa[index] = pa[index] + 1;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm({ tensorA }, spirv);
@ -131,7 +133,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
mgr.sequence()->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
}
TEST(TestMultipleAlgoExecutions, MultipleSequences)
@ -139,7 +141,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
std::string shader(R"(
#version 450
@ -150,7 +152,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
pa[index] = pa[index] + 1;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm({ tensorA }, spirv);
@ -167,14 +169,14 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
}
TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
std::string shader(R"(
#version 450
@ -185,7 +187,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
pa[index] = pa[index] + 1;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm({ tensorA }, spirv);
@ -198,43 +200,6 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
}
TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
{
std::shared_ptr<kp::Tensor> tensorA = nullptr;
{
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
tensorA = mgr.tensor({ 0, 0, 0 });
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm({ tensorA }, spirv);
sq = mgr.sequence();
sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)->eval()->eval()->eval();
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
}

View file

@ -9,8 +9,8 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 3, 4, 5 });
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
std::string shader(R"(
#version 450
@ -27,7 +27,7 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
}
)");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA, tensorB };
@ -36,16 +36,16 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
->eval<kp::OpTensorSyncLocal>(params);
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
}
TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 3, 4, 5 });
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
std::vector<uint32_t> spirv = std::vector<uint32_t>(
(uint32_t*)
@ -62,8 +62,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
->eval<kp::OpTensorSyncLocal>(params);
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
}
// TODO: Add support to read from file for shader
@ -71,8 +71,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
//{
// kp::Manager mgr;
//
// std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
// std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
// std::shared_ptr<kp::TensorT<float>> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
// std::shared_ptr<kp::TensorT<float>> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
// mgr.rebuild({ tensorA, tensorB });
//
// mgr.evalOpDefault<kp::OpAlgoCreate>(
@ -81,6 +81,6 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
//
// mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
//
// EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
// EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
// EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
// EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
//}

View file

@ -11,8 +11,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
std::vector<float> testVecA{ 1, 2, 3 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -22,8 +22,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
->eval<kp::OpTensorCopy>({ tensorA, tensorB })
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
// Making sure the GPU holds the same data
EXPECT_EQ(tensorA->data(), tensorB->data());
// Making sure the GPU holds the same vector
EXPECT_EQ(tensorA->vector(), tensorB->vector());
}
TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
@ -35,9 +35,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
std::vector<float> testVecB{ 0, 0, 0 };
std::vector<float> testVecC{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
std::shared_ptr<kp::Tensor> tensorC = mgr.tensor(testVecC);
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
std::shared_ptr<kp::TensorT<float>> tensorC = mgr.tensor(testVecC);
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -47,14 +47,14 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC })
->eval<kp::OpTensorCopy>({ tensorA, tensorB, tensorC });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->data(), tensorC->data());
EXPECT_EQ(tensorA->vector(), tensorB->vector());
EXPECT_EQ(tensorA->vector(), tensorC->vector());
// Making sure the GPU holds the same data
// Making sure the GPU holds the same vector
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB, tensorC });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->data(), tensorC->data());
EXPECT_EQ(tensorA->vector(), tensorB->vector());
EXPECT_EQ(tensorA->vector(), tensorC->vector());
}
TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
@ -65,8 +65,8 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
std::vector<float> testVecA{ 3, 4, 5 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::Tensor> tensorB =
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::TensorT<float>> tensorB =
mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
// Only calling sync on device type tensor
@ -77,11 +77,11 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->vector(), tensorB->vector());
// Making sure the GPU holds the same data
// Making sure the GPU holds the same vector
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->vector(), tensorB->vector());
}
TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
@ -92,9 +92,9 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
std::vector<float> testVecA{ 4, 5, 6 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA =
std::shared_ptr<kp::TensorT<float>> tensorA =
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
// Only calling sync on device type tensor
mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB });
@ -104,11 +104,11 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->vector(), tensorB->vector());
// Making sure the GPU holds the same data
// Making sure the GPU holds the same vector
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->vector(), tensorB->vector());
}
TEST(TestOpTensorCopy, CopyHostToHostTensor)
@ -119,9 +119,9 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
std::vector<float> testVecA{ 5, 6, 7 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA =
std::shared_ptr<kp::TensorT<float>> tensorA =
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorB =
std::shared_ptr<kp::TensorT<float>> tensorB =
mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
EXPECT_TRUE(tensorA->isInit());
@ -131,11 +131,11 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
->eval<kp::OpTensorSyncDevice>({ tensorA })
->eval<kp::OpTensorCopy>({ tensorA, tensorB });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->vector(), tensorB->vector());
// Making sure the GPU holds the same data
// Making sure the GPU holds the same vector
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->vector(), tensorB->vector());
}
TEST(TestOpTensorCopy, SingleTensorShouldFail)
@ -145,7 +145,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
std::vector<float> testVecA{ 6, 7, 8 };
std::shared_ptr<kp::Tensor> tensorA =
std::shared_ptr<kp::TensorT<float>> tensorA =
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
EXPECT_TRUE(tensorA->isInit());

View file

@ -6,7 +6,7 @@
TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
{
std::vector<float> testVecA{ 9, 8, 7 };
std::shared_ptr<kp::Tensor> tensorA = nullptr;
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
{
kp::Manager mgr;
@ -15,7 +15,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
EXPECT_TRUE(tensorA->isInit());
EXPECT_EQ(tensorA->data(), testVecA);
EXPECT_EQ(tensorA->vector(), testVecA);
}
EXPECT_FALSE(tensorA->isInit());
@ -29,11 +29,11 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
EXPECT_EQ(tensorA->data(), testVecA);
EXPECT_EQ(tensorB->data(), testVecB);
EXPECT_EQ(tensorA->vector(), testVecA);
EXPECT_EQ(tensorB->vector(), testVecB);
tensorA->destroy();
tensorB->destroy();
@ -49,7 +49,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
kp::Manager mgr;
try {
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
} catch (const std::runtime_error& err) {
// check exception
ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=

View file

@ -11,7 +11,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
std::vector<float> testVecPreA{ 0, 0, 0 };
std::vector<float> testVecPostA{ 9, 8, 7 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecPreA);
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecPreA);
EXPECT_TRUE(tensorA->isInit());
@ -21,7 +21,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA });
EXPECT_EQ(tensorA->data(), testVecPostA);
EXPECT_EQ(tensorA->vector(), testVecPostA);
}
TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
@ -31,9 +31,9 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
std::vector<float> testVec{ 9, 8, 7 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Tensor> tensorC = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorC = mgr.tensor({ 0, 0, 0 });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -47,7 +47,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC });
EXPECT_EQ(tensorA->data(), testVec);
EXPECT_EQ(tensorB->data(), testVec);
EXPECT_EQ(tensorC->data(), testVec);
EXPECT_EQ(tensorA->vector(), testVec);
EXPECT_EQ(tensorB->vector(), testVec);
EXPECT_EQ(tensorC->vector(), testVec);
}

View file

@ -22,14 +22,14 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
pa[2] += pcs.z;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 });
@ -42,7 +42,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
sq->eval<kp::OpTensorSyncLocal>({ tensor });
EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
}
}
}
@ -65,14 +65,14 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
pa[2] += pcs.z;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 });
@ -85,7 +85,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
sq->eval<kp::OpTensorSyncLocal>({ tensor });
EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
}
}
}
@ -108,14 +108,14 @@ TEST(TestPushConstants, TestConstantsWrongSize)
pa[2] += pcs.z;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });

View file

@ -60,13 +60,13 @@ TEST(TestSequence, RerecordSequence)
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({1, 2, 3});
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({2, 2, 2});
std::shared_ptr<kp::Tensor> tensorOut = mgr.tensor({0, 0, 0});
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({1, 2, 3});
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({2, 2, 2});
std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensor({0, 0, 0});
sq->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB, tensorOut });
std::vector<uint32_t> spirv = kp::Shader::compile_source(R"(
std::vector<uint32_t> spirv = kp::Shader::compileSource(R"(
#version 450
layout (local_size_x = 1) in;
@ -90,7 +90,7 @@ TEST(TestSequence, RerecordSequence)
sq->eval();
EXPECT_EQ(tensorOut->data(), std::vector<float>({2, 4, 6}));
EXPECT_EQ(tensorOut->vector(), std::vector<float>({2, 4, 6}));
algo->rebuild({tensorOut, tensorA, tensorB}, spirv);
@ -98,7 +98,7 @@ TEST(TestSequence, RerecordSequence)
sq->rerecord();
sq->eval();
EXPECT_EQ(tensorB->data(), std::vector<float>({2, 8, 18}));
EXPECT_EQ(tensorB->vector(), std::vector<float>({2, 8, 18}));
}
@ -117,7 +117,7 @@ TEST(TestSequence, SequenceTimestamps)
pa[index] = pa[index] + 1;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
auto seq = mgr.sequence(0, 100); //100 timestamps
seq->record<kp::OpTensorSyncDevice>({ tensorA })

View file

@ -25,7 +25,7 @@ static const std::string shaderString = (R"(
)");
void compileShaderWithGivenResources(const std::string shaderString, const TBuiltInResource resources) {
kp::Shader::compile_source(shaderString, std::string("main"), std::vector<std::pair<std::string,std::string>>({}), resources);
kp::Shader::compileSource(shaderString, std::string("main"), std::vector<std::pair<std::string,std::string>>({}), resources);
}

View file

@ -18,15 +18,15 @@ TEST(TestSpecializationConstants, TestTwoConstants)
pb[index] = cTwo;
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
tensorB };
@ -42,8 +42,8 @@ TEST(TestSpecializationConstants, TestTwoConstants)
->record<kp::OpTensorSyncLocal>(params)
->eval();
EXPECT_EQ(tensorA->data(), std::vector<float>({ 5, 5, 5 }));
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0.3, 0.3, 0.3 }));
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 5, 5, 5 }));
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 0.3, 0.3, 0.3 }));
}
}
}

View file

@ -7,7 +7,7 @@ TEST(TestTensor, ConstructorData)
{
kp::Manager mgr;
std::vector<float> vec{ 0, 1, 2 };
std::shared_ptr<kp::Tensor> tensor = mgr.tensor(vec);
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor(vec);
EXPECT_EQ(tensor->size(), vec.size());
EXPECT_EQ(tensor->data(), vec);
EXPECT_EQ(tensor->vector(), vec);
}

View file

@ -7,8 +7,8 @@
TEST(TestWorkgroup, TestSimpleWorkgroup)
{
std::shared_ptr<kp::Tensor> tensorA = nullptr;
std::shared_ptr<kp::Tensor> tensorB = nullptr;
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
std::shared_ptr<kp::TensorT<float>> tensorB = nullptr;
{
std::shared_ptr<kp::Sequence> sq = nullptr;
@ -39,29 +39,29 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
sq->record<kp::OpAlgoDispatch>(algorithm);
sq->record<kp::OpTensorSyncLocal>(params);
sq->eval();
std::vector<float> expectedA = {
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15
};
std::vector<float> expectedB = {
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
};
EXPECT_EQ(tensorA->vector(), expectedA);
EXPECT_EQ(tensorB->vector(), expectedB);
}
}
std::vector<float> expectedA = {
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15
};
std::vector<float> expectedB = {
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
};
EXPECT_EQ(tensorA->data(), expectedA);
EXPECT_EQ(tensorB->data(), expectedB);
}