Merge pull request #177 from EthicalML/add_tensor_types
Add support for bool, double, int32, uint32 and float32 on Tensors via TensorT
This commit is contained in:
commit
1d2d33b269
41 changed files with 1262 additions and 690 deletions
30
README.md
30
README.md
|
|
@ -55,10 +55,13 @@ void kompute(const std::string& shader) {
|
|||
kp::Manager mgr;
|
||||
|
||||
// 2. Create and initialise Kompute Tensors through manager
|
||||
|
||||
// Default tensor constructor simplifies creation of float values
|
||||
auto tensorInA = mgr.tensor({ 2., 2., 2. });
|
||||
auto tensorInB = mgr.tensor({ 1., 2., 3. });
|
||||
auto tensorOutA = mgr.tensor({ 0., 0., 0. });
|
||||
auto tensorOutB = mgr.tensor({ 0., 0., 0. });
|
||||
// Explicit type constructor supports uint32, int32, double, float and bool
|
||||
auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
|
||||
auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
|
||||
|
||||
|
|
@ -109,8 +112,8 @@ int main() {
|
|||
// The input tensors bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
||||
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
|
||||
|
||||
// Kompute supports push constants updated on dispatch
|
||||
layout(push_constant) uniform PushConstants {
|
||||
|
|
@ -122,8 +125,8 @@ int main() {
|
|||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
out_a[index] += in_a[index] * in_b[index];
|
||||
out_b[index] += const_one * push_const.val;
|
||||
out_a[index] += uint( in_a[index] * in_b[index] );
|
||||
out_b[index] += uint( const_one * push_const.val );
|
||||
}
|
||||
)");
|
||||
|
||||
|
|
@ -144,10 +147,13 @@ def kompute(shader):
|
|||
mgr = kp.Manager()
|
||||
|
||||
# 2. Create and initialise Kompute Tensors through manager
|
||||
|
||||
# Default tensor constructor simplifies creation of float values
|
||||
tensor_in_a = mgr.tensor([2, 2, 2])
|
||||
tensor_in_b = mgr.tensor([1, 2, 3])
|
||||
tensor_out_a = mgr.tensor([0, 0, 0])
|
||||
tensor_out_b = mgr.tensor([0, 0, 0])
|
||||
# Explicit type constructor supports uint32, int32, double, float and bool
|
||||
tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
|
||||
tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
|
||||
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
|
||||
|
||||
|
|
@ -194,8 +200,8 @@ if __name__ == "__main__":
|
|||
// The input tensors bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
||||
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
|
||||
|
||||
// Kompute supports push constants updated on dispatch
|
||||
layout(push_constant) uniform PushConstants {
|
||||
|
|
@ -207,8 +213,8 @@ if __name__ == "__main__":
|
|||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
out_a[index] += in_a[index] * in_b[index];
|
||||
out_b[index] += const_one * push_const.val;
|
||||
out_a[index] += uint( in_a[index] * in_b[index] );
|
||||
out_b[index] += uint( const_one * push_const.val );
|
||||
}
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.17.0)
|
||||
cmake_minimum_required(VERSION 3.4.1)
|
||||
project(kompute_array_mult VERSION 0.1.0)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
|
@ -23,10 +23,6 @@ endif()
|
|||
|
||||
find_package(Vulkan REQUIRED)
|
||||
|
||||
if(KOMPUTE_OPT_ENABLE_SPDLOG)
|
||||
find_package(spdlog REQUIRED)
|
||||
endif()
|
||||
|
||||
add_executable(kompute_array_mult
|
||||
src/Main.cpp)
|
||||
|
||||
|
|
|
|||
|
|
@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to
|
|||
To build you just need to run the cmake command in this folder as follows:
|
||||
|
||||
```
|
||||
cmake \
|
||||
-Bbuild
|
||||
cmake -Bbuild/ \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DKOMPUTE_OPT_INSTALL=0 \
|
||||
-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \
|
||||
-DKOMPUTE_OPT_ENABLE_SPDLOG=1
|
||||
```
|
||||
|
||||
You can pass the following optional parameters based on your desired configuration:
|
||||
|
|
|
|||
|
|
@ -39,16 +39,17 @@ int main()
|
|||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA, tensorInB, tensorOut };
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, kp::Shader::compile_source(shader));
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, kp::Shader::compileSource(shader));
|
||||
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>(params);
|
||||
->record<kp::OpTensorSyncLocal>(params)
|
||||
->eval();
|
||||
|
||||
// prints "Output { 0 4 12 }"
|
||||
std::cout<< "Output: { ";
|
||||
for (const float& elem : tensorOut->data()) {
|
||||
for (const float& elem : tensorOut->vector()) {
|
||||
std::cout << elem << " ";
|
||||
}
|
||||
std::cout << "}" << std::endl;
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ void KomputeSummatorNode::_init() {
|
|||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
kp::Shader::compile_source(shader));
|
||||
kp::Shader::compileSource(shader));
|
||||
|
||||
|
||||
// First we ensure secondary tensor loads to GPU
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ void KomputeSummator::_init() {
|
|||
// Then we run the operation with both tensors
|
||||
this->mSequence->record<kp::OpAlgoCreate>(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
kp::Shader::compile_source(shader));
|
||||
kp::Shader::compileSource(shader));
|
||||
|
||||
// We map the result back to local
|
||||
this->mSequence->record<kp::OpTensorSyncLocal>(
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.17.0)
|
||||
cmake_minimum_required(VERSION 3.4.1)
|
||||
project(kompute_linear_reg VERSION 0.1.0)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
|
@ -23,10 +23,6 @@ endif()
|
|||
|
||||
find_package(Vulkan REQUIRED)
|
||||
|
||||
if(KOMPUTE_OPT_ENABLE_SPDLOG)
|
||||
find_package(spdlog REQUIRED)
|
||||
endif()
|
||||
|
||||
add_executable(kompute_linear_reg
|
||||
src/Main.cpp)
|
||||
|
||||
|
|
@ -39,7 +35,7 @@ include_directories(
|
|||
../../single_include/)
|
||||
|
||||
if(KOMPUTE_OPT_ENABLE_SPDLOG)
|
||||
target_link_libraries(kompute_array_mult
|
||||
target_link_libraries(kompute_linear_reg
|
||||
spdlog::spdlog)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to
|
|||
To build you just need to run the cmake command in this folder as follows:
|
||||
|
||||
```
|
||||
cmake \
|
||||
-Bbuild
|
||||
cmake -Bbuild/ \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DKOMPUTE_OPT_INSTALL=0 \
|
||||
-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \
|
||||
-DKOMPUTE_OPT_ENABLE_SPDLOG=1
|
||||
```
|
||||
|
||||
You can pass the following optional parameters based on your desired configuration:
|
||||
|
|
|
|||
|
|
@ -17,19 +17,19 @@ int main()
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
auto xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
auto xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
auto y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
auto wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
auto wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
auto wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
auto bIn = mgr.tensor({ 0 });
|
||||
auto bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
auto lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
|
|
@ -40,7 +40,8 @@ int main()
|
|||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
||||
params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
|
|
|
|||
|
|
@ -247,10 +247,16 @@ static const char *__doc_kp_Manager_sequence =
|
|||
R"doc(Create a managed sequence that will be destroyed by this manager if it
|
||||
hasn't been destroyed by its reference count going to zero.
|
||||
|
||||
@param queueIndex The queue to use from the available queues @returns
|
||||
Shared pointer with initialised sequence)doc";
|
||||
@param queueIndex The queue to use from the available queues @param
|
||||
nrOfTimestamps The maximum number of timestamps to allocate. If zero
|
||||
(default), disables latching of timestamps. @returns Shared pointer
|
||||
with initialised sequence)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_tensor =
|
||||
static const char *__doc_kp_Manager_tensor = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_tensor_2 = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_tensorT =
|
||||
R"doc(Create a managed tensor that will be destroyed by this manager if it
|
||||
hasn't been destroyed by its reference count going to zero.
|
||||
|
||||
|
|
@ -264,18 +270,26 @@ of algorithm and parameter components which can be used with shaders.
|
|||
By default it enables the user to provide a dynamic number of tensors
|
||||
which are then passed as inputs.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch =
|
||||
R"doc(Constructor that stores the algorithm to use as well as the relevant
|
||||
push constants to override when recording.
|
||||
|
||||
@param algorithm The algorithm object to use for dispatch @param
|
||||
pushConstants The push constants to use for override)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoDispatch_mAlgorithm = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoDispatch_mPushConstants = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoDispatch_postEval =
|
||||
R"doc(Executes after the recorded commands are submitted, and performs a
|
||||
copy of the GPU Device memory into the staging buffer so the output
|
||||
data can be retrieved.)doc";
|
||||
R"doc(Does not perform any postEval commands.
|
||||
|
||||
static const char *__doc_kp_OpAlgoDispatch_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoDispatch_preEval =
|
||||
R"doc(Does not perform any preEval commands.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoDispatch_record =
|
||||
R"doc(This records the commands that are to be sent to the GPU. This
|
||||
|
|
@ -283,7 +297,9 @@ includes the barriers that ensure the memory has been copied before
|
|||
going in and out of the shader, as well as the dispatch operation that
|
||||
sends the shader processing to the gpu. This function also records the
|
||||
GPU memory copy of the output data for the staging buffer so it can be
|
||||
read by the host.)doc";
|
||||
read by the host.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase =
|
||||
R"doc(Base Operation which provides the high level interface that Kompute
|
||||
|
|
@ -299,7 +315,9 @@ the commands to the GPU for processing, and can be used to perform any
|
|||
tear-down steps required as the computation iteration finishes. It's
|
||||
worth noting that there are situations where eval can be called
|
||||
multiple times, so the resources that are destroyed should not require
|
||||
a re-init unless explicitly provided by the user.)doc";
|
||||
a re-init unless explicitly provided by the user.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_preEval =
|
||||
R"doc(Pre eval is called before the Sequence has called eval and submitted
|
||||
|
|
@ -307,12 +325,16 @@ the commands to the GPU for processing, and can be used to perform any
|
|||
per-eval setup steps required as the computation iteration begins.
|
||||
It's worth noting that there are situations where eval can be called
|
||||
multiple times, so the resources that are created should be idempotent
|
||||
in case it's called multiple times in a row.)doc";
|
||||
in case it's called multiple times in a row.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpBase_record =
|
||||
R"doc(The record function is intended to only send a record command or run
|
||||
commands that are expected to record operations that are to be
|
||||
submitted as a batch into the GPU.)doc";
|
||||
submitted as a batch into the GPU.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpMult =
|
||||
R"doc(Operation that performs multiplication on two tensors and outpus on
|
||||
|
|
@ -323,12 +345,9 @@ R"doc(Default constructor with parameters that provides the bare minimum
|
|||
requirements for the operations to be able to create and manage their
|
||||
sub-components.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that are to be used in this operation @param
|
||||
komputeWorkgroup Optional parameter to specify the layout for
|
||||
processing)doc";
|
||||
algorithm An algorithm that will be overridden with the OpMult shader
|
||||
data and the tensors provided which are expected to be 3)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy =
|
||||
R"doc(Operation that copies the data from the first tensor to the rest of
|
||||
|
|
@ -340,84 +359,95 @@ static const char *__doc_kp_OpTensorCopy_OpTensorCopy =
|
|||
R"doc(Default constructor with parameters that provides the core vulkan
|
||||
resources and the tensors that will be used in the operation.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that will be used to create in operation.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_mTensors = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_postEval =
|
||||
R"doc(Copies the local vectors for all the tensors to sync the data with the
|
||||
gpu.)doc";
|
||||
gpu.
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_preEval =
|
||||
R"doc(Does not perform any preEval commands.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorCopy_record =
|
||||
R"doc(Records the copy commands from the first tensor into all the other
|
||||
tensors provided. Also optionally records a barrier.)doc";
|
||||
tensors provided. Also optionally records a barrier.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice =
|
||||
R"doc(Operation that syncs tensor's device by mapping local data into the
|
||||
device memory. For TensorTypes::eDevice it will use a record operation
|
||||
for the memory to be syncd into GPU memory which means that the
|
||||
operation will be done in sync with GPU commands. For
|
||||
TensorTypes::eStaging it will only map the data into host memory which
|
||||
TensorTypes::eHost it will only map the data into host memory which
|
||||
will happen during preEval before the recorded commands are
|
||||
dispatched. This operation won't have any effect on
|
||||
TensorTypes::eStaging.)doc";
|
||||
dispatched.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice =
|
||||
R"doc(Default constructor with parameters that provides the core vulkan
|
||||
resources and the tensors that will be used in the operation. The
|
||||
tensos provided cannot be of type TensorTypes::eStorage.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that will be used to create in operation.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_mTensors = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc";
|
||||
static const char *__doc_kp_OpTensorSyncDevice_postEval =
|
||||
R"doc(Does not perform any postEval commands.
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_preEval =
|
||||
R"doc(Does not perform any preEval commands.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncDevice_record =
|
||||
R"doc(For device tensors, it records the copy command for the tensor to copy
|
||||
the data from its staging to device memory.)doc";
|
||||
the data from its staging to device memory.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal =
|
||||
R"doc(Operation that syncs tensor's local memory by mapping device data into
|
||||
the local CPU memory. For TensorTypes::eDevice it will use a record
|
||||
operation for the memory to be syncd into GPU memory which means that
|
||||
the operation will be done in sync with GPU commands. For
|
||||
TensorTypes::eStaging it will only map the data into host memory which
|
||||
TensorTypes::eHost it will only map the data into host memory which
|
||||
will happen during preEval before the recorded commands are
|
||||
dispatched. This operation won't have any effect on
|
||||
TensorTypes::eStaging.)doc";
|
||||
dispatched.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal =
|
||||
R"doc(Default constructor with parameters that provides the core vulkan
|
||||
resources and the tensors that will be used in the operation. The
|
||||
tensors provided cannot be of type TensorTypes::eStorage.
|
||||
|
||||
@param physicalDevice Vulkan physical device used to find device
|
||||
queues @param device Vulkan logical device for passing to Algorithm
|
||||
@param commandBuffer Vulkan Command Buffer to record commands into
|
||||
@param tensors Tensors that will be used to create in operation.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_mTensors = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_postEval =
|
||||
R"doc(For host tensors it performs the map command from the host memory into
|
||||
local memory.)doc";
|
||||
local memory.
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_preEval =
|
||||
R"doc(Does not perform any preEval commands.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_OpTensorSyncLocal_record =
|
||||
R"doc(For device tensors, it records the copy command for the tensor to copy
|
||||
the data from its device to staging memory.)doc";
|
||||
the data from its device to staging memory.
|
||||
|
||||
@param commandBuffer The command buffer to record the command into.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc";
|
||||
|
||||
|
|
@ -427,7 +457,8 @@ generate all dependent resources.
|
|||
|
||||
@param physicalDevice Vulkan physical device @param device Vulkan
|
||||
logical device @param computeQueue Vulkan compute queue @param
|
||||
queueIndex Vulkan compute queue index in device)doc";
|
||||
queueIndex Vulkan compute queue index in device @param totalTimestamps
|
||||
Maximum number of timestamps to allocate)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_begin =
|
||||
R"doc(Begins recording commands for commands to be submitted into the
|
||||
|
|
@ -443,6 +474,8 @@ static const char *__doc_kp_Sequence_createCommandBuffer = R"doc()doc";
|
|||
|
||||
static const char *__doc_kp_Sequence_createCommandPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_createTimestampQueryPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_destroy =
|
||||
R"doc(Destroys and frees the GPU resources which include the buffer and
|
||||
memory and sets the sequence as init=False.)doc";
|
||||
|
|
@ -528,6 +561,10 @@ finishes, it runs the postEval of all operations.
|
|||
@param waitFor Number of milliseconds to wait before timing out.
|
||||
@return shared_ptr<Sequence> of the Sequence class itself)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_getTimestamps =
|
||||
R"doc(Return the timestamps that were latched at the beginning and after
|
||||
each operation during the last eval() call.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_isInit =
|
||||
R"doc(Returns true if the sequence has been initialised, and it's based on
|
||||
the GPU resources being refrenced.
|
||||
|
|
@ -607,9 +644,11 @@ R"doc(Clears command buffer and triggers re-record of all the current
|
|||
operations saved, which is useful if the underlying kp::Tensors or
|
||||
kp::Algorithms are modified and need to be re-recorded.)doc";
|
||||
|
||||
static const char *__doc_kp_Sequence_timestampQueryPool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Shader = R"doc(Shader utily class with functions to compile and process glsl files.)doc";
|
||||
|
||||
static const char *__doc_kp_Shader_compile_source =
|
||||
static const char *__doc_kp_Shader_compileSource =
|
||||
R"doc(Compile a single glslang source from string value. Currently this
|
||||
function uses the glslang C++ interface which is not thread safe so
|
||||
this funciton should not be called from multiple threads concurrently.
|
||||
|
|
@ -622,7 +661,7 @@ List of pairs containing key value definitions @param resourcesLimit A
|
|||
list that contains the resource limits for the GLSL compiler @return
|
||||
The compiled SPIR-V binary in unsigned int32 format)doc";
|
||||
|
||||
static const char *__doc_kp_Shader_compile_sources =
|
||||
static const char *__doc_kp_Shader_compileSources =
|
||||
R"doc(Compile multiple sources with optional filenames. Currently this
|
||||
function uses the glslang C++ interface which is not thread safe so
|
||||
this funciton should not be called from multiple threads concurrently.
|
||||
|
|
@ -644,14 +683,42 @@ across GPUs. Each tensor would have a respective Vulkan memory and
|
|||
buffer, which would be used to store their respective data. The
|
||||
tensors can be used for GPU data storage or transfer.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_Tensor =
|
||||
R"doc(Default constructor with data provided which would be used to create
|
||||
the respective vulkan buffer and memory.
|
||||
static const char *__doc_kp_TensorT = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_TensorT_TensorT = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_TensorT_data = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_TensorT_dataType = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_TensorT_operator_array = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_TensorT_setData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_TensorT_vector = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_Tensor =
|
||||
R"doc(Constructor with data provided which would be used to create the
|
||||
respective vulkan buffer and memory.
|
||||
|
||||
@param physicalDevice The physical device to use to fetch properties
|
||||
@param device The device to use to create the buffer and memory from
|
||||
@param data Non-zero-sized vector of data that will be used by the
|
||||
tensor @param tensorType Type for the tensor which is of type
|
||||
tensor @param tensorTypes Type for the tensor which is of type
|
||||
TensorTypes)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorDataTypes = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorDataTypes_eBool = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorDataTypes_eDouble = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorDataTypes_eFloat = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorDataTypes_eInt = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorDataTypes_eUnsignedInt = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_TensorTypes =
|
||||
R"doc(Type for tensors created: Device allows memory to be transferred from
|
||||
staging buffers. Staging are host memory visible. Storage are device
|
||||
|
|
@ -677,13 +744,14 @@ without exposing it.
|
|||
|
||||
static const char *__doc_kp_Tensor_createBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_data =
|
||||
R"doc(Returns the vector of data currently contained by the Tensor. It is
|
||||
important to ensure that there is no out-of-sync data with the GPU
|
||||
memory.
|
||||
static const char *__doc_kp_Tensor_data = R"doc()doc";
|
||||
|
||||
@return Reference to vector of elements representing the data in the
|
||||
tensor.)doc";
|
||||
static const char *__doc_kp_Tensor_dataType =
|
||||
R"doc(Retrieve the underlying data type of the Tensor
|
||||
|
||||
@return Data type of tensor of type kp::Tensor::TensorDataTypes)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_dataTypeMemorySize = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_destroy =
|
||||
R"doc(Destroys and frees the GPU resources which include the buffer and
|
||||
|
|
@ -697,9 +765,15 @@ static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc";
|
|||
|
||||
static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_isInit = R"doc()doc";
|
||||
static const char *__doc_kp_Tensor_isInit =
|
||||
R"doc(Check whether tensor is initialized based on the created gpu
|
||||
resources.
|
||||
|
||||
static const char *__doc_kp_Tensor_mData = R"doc()doc";
|
||||
@returns Boolean stating whether tensor is initialized)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mDataType = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mDataTypeMemorySize = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mDevice = R"doc()doc";
|
||||
|
||||
|
|
@ -717,36 +791,28 @@ static const char *__doc_kp_Tensor_mPrimaryBuffer = R"doc()doc";
|
|||
|
||||
static const char *__doc_kp_Tensor_mPrimaryMemory = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mRawData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mSize = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mStagingBuffer = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mStagingMemory = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mTensorType = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mapDataFromHostMemory =
|
||||
R"doc(Maps data from the Host Visible GPU memory into the data vector. It
|
||||
requires the Tensor to be of staging type for it to work.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_mapDataIntoHostMemory =
|
||||
R"doc(Maps data from the data vector into the Host Visible GPU memory. It
|
||||
requires the tensor to be of staging type for it to work.)doc";
|
||||
static const char *__doc_kp_Tensor_mapRawData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_memorySize = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_operator_array =
|
||||
R"doc(Overrides the subscript operator to expose the underlying data's
|
||||
subscript operator which in this case would be its underlying
|
||||
vector's.
|
||||
|
||||
@param i The index where the element will be returned from. @return
|
||||
Returns the element in the position requested.)doc";
|
||||
static const char *__doc_kp_Tensor_rawData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_rebuild =
|
||||
R"doc(Initialiser which calls the initialisation for all the respective
|
||||
tensors as well as creates the respective staging tensors. The staging
|
||||
tensors would only be created for the tensors of type
|
||||
TensorType::eDevice as otherwise there is no need to copy from host
|
||||
memory.)doc";
|
||||
R"doc(Function to trigger reinitialisation of the tensor buffer and memory
|
||||
with new data as well as new potential device type.
|
||||
|
||||
@param data Vector of data to use to initialise vector from @param
|
||||
tensorType The type to use for the tensor)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_recordBufferMemoryBarrier =
|
||||
R"doc(Records the buffer memory barrier into the command buffer which
|
||||
|
|
@ -788,7 +854,7 @@ would only be relevant for kp::Tensors of type eDevice.
|
|||
@param createBarrier Whether to create a barrier that ensures the data
|
||||
is copied before further operations. Default is true.)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_setData =
|
||||
static const char *__doc_kp_Tensor_setRawData =
|
||||
R"doc(Sets / resets the vector data of the tensor. This function does not
|
||||
perform any copies into GPU memory and is only performed on the host.)doc";
|
||||
|
||||
|
|
@ -803,6 +869,10 @@ R"doc(Retrieve the tensor type of the Tensor
|
|||
|
||||
@return Tensor type of tensor)doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_unmapRawData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Tensor_vector = R"doc()doc";
|
||||
|
||||
#if defined(__GNUG__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -26,9 +26,9 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::module_ np = py::module_::import("numpy");
|
||||
|
||||
py::enum_<kp::Tensor::TensorTypes>(m, "TensorTypes")
|
||||
.value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.")
|
||||
.value("host", kp::Tensor::TensorTypes::eHost, "Tensor used for CPU visible GPU data.")
|
||||
.value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.")
|
||||
.value("device", kp::Tensor::TensorTypes::eDevice, DOC(kp, Tensor, TensorTypes, eDevice))
|
||||
.value("host", kp::Tensor::TensorTypes::eHost, DOC(kp, Tensor, TensorTypes, eHost))
|
||||
.value("storage", kp::Tensor::TensorTypes::eStorage, DOC(kp, Tensor, TensorTypes, eStorage))
|
||||
.export_values();
|
||||
|
||||
#if !defined(KOMPUTE_DISABLE_SHADER_UTILS) || !KOMPUTE_DISABLE_SHADER_UTILS
|
||||
|
|
@ -37,119 +37,168 @@ PYBIND11_MODULE(kp, m) {
|
|||
const std::string& source,
|
||||
const std::string& entryPoint,
|
||||
const std::vector<std::pair<std::string,std::string>>& definitions) {
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(source, entryPoint, definitions);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(source, entryPoint, definitions);
|
||||
return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t));
|
||||
},
|
||||
"Compiles string source provided and returns the value in bytes",
|
||||
py::arg("source"), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() )
|
||||
DOC(kp, Shader, compileSource),
|
||||
py::arg("source"),
|
||||
py::arg("entryPoint") = "main",
|
||||
py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() )
|
||||
.def_static("compile_sources", [](
|
||||
const std::vector<std::string>& source,
|
||||
const std::vector<std::string>& files,
|
||||
const std::string& entryPoint,
|
||||
const std::vector<std::pair<std::string,std::string>>& definitions) {
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_sources(source, files, entryPoint, definitions);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSources(source, files, entryPoint, definitions);
|
||||
return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t));
|
||||
},
|
||||
"Compiles sources provided with file names and returns the value in bytes",
|
||||
py::arg("sources"), py::arg("files") = std::vector<std::string>(), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() );
|
||||
DOC(kp, Shader, compileSources),
|
||||
py::arg("sources"),
|
||||
py::arg("files") = std::vector<std::string>(),
|
||||
py::arg("entryPoint") = "main",
|
||||
py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() );
|
||||
#endif // KOMPUTE_DISABLE_SHADER_UTILS
|
||||
|
||||
py::class_<kp::OpBase, std::shared_ptr<kp::OpBase>>(m, "OpBase");
|
||||
py::class_<kp::OpBase, std::shared_ptr<kp::OpBase>>(m, "OpBase", DOC(kp, OpBase));
|
||||
|
||||
py::class_<kp::OpTensorSyncDevice, std::shared_ptr<kp::OpTensorSyncDevice>>(m, "OpTensorSyncDevice", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
|
||||
py::class_<kp::OpTensorSyncDevice, std::shared_ptr<kp::OpTensorSyncDevice>>(
|
||||
m, "OpTensorSyncDevice", py::base<kp::OpBase>(), DOC(kp, OpTensorSyncDevice))
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorSyncDevice, OpTensorSyncDevice));
|
||||
|
||||
py::class_<kp::OpTensorSyncLocal, std::shared_ptr<kp::OpTensorSyncLocal>>(m, "OpTensorSyncLocal", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
|
||||
py::class_<kp::OpTensorSyncLocal, std::shared_ptr<kp::OpTensorSyncLocal>>(
|
||||
m, "OpTensorSyncLocal", py::base<kp::OpBase>(), DOC(kp, OpTensorSyncLocal))
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorSyncLocal, OpTensorSyncLocal));
|
||||
|
||||
py::class_<kp::OpTensorCopy, std::shared_ptr<kp::OpTensorCopy>>(m, "OpTensorCopy", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
|
||||
py::class_<kp::OpTensorCopy, std::shared_ptr<kp::OpTensorCopy>>(
|
||||
m, "OpTensorCopy", py::base<kp::OpBase>(), DOC(kp, OpTensorCopy))
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorCopy, OpTensorCopy));
|
||||
|
||||
py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(m, "OpAlgoDispatch", py::base<kp::OpBase>())
|
||||
py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(
|
||||
m, "OpAlgoDispatch", py::base<kp::OpBase>(), DOC(kp, OpAlgoDispatch))
|
||||
.def(py::init<const std::shared_ptr<kp::Algorithm>&,const kp::Constants&>(),
|
||||
DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
|
||||
py::arg("algorithm"), py::arg("push_consts") = kp::Constants());
|
||||
|
||||
py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(m, "OpMult", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&,const std::shared_ptr<kp::Algorithm>&>());
|
||||
py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(
|
||||
m, "OpMult", py::base<kp::OpBase>(), DOC(kp, OpMult))
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&,const std::shared_ptr<kp::Algorithm>&>(),
|
||||
DOC(kp, OpMult, OpMult));
|
||||
|
||||
py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm")
|
||||
.def("get_tensors", &kp::Algorithm::getTensors)
|
||||
.def("destroy", &kp::Algorithm::destroy)
|
||||
.def("get_spec_consts", &kp::Algorithm::getSpecializationConstants)
|
||||
.def("is_init", &kp::Algorithm::isInit);
|
||||
py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm", DOC(kp, Algorithm, Algorithm))
|
||||
.def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors))
|
||||
.def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy))
|
||||
.def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants))
|
||||
.def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit));
|
||||
|
||||
py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
|
||||
.def("data", [](kp::Tensor& self) {
|
||||
return py::array(self.data().size(), self.data().data());
|
||||
}, "Returns stored data as a new numpy array.")
|
||||
.def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; },
|
||||
"When only an index is necessary")
|
||||
.def("__setitem__", [](kp::Tensor &self, size_t index, float value) {
|
||||
self.data()[index] = value; })
|
||||
.def("set_data", [np](kp::Tensor &self, const py::array_t<float> data){
|
||||
const py::array_t<float> flatdata = np.attr("ravel")(data);
|
||||
const py::buffer_info info = flatdata.request();
|
||||
const float* ptr = (float*) info.ptr;
|
||||
self.setData(std::vector<float>(ptr, ptr+flatdata.size()));
|
||||
}, "Overrides the data in the local Tensor memory.")
|
||||
.def("__iter__", [](kp::Tensor &self) {
|
||||
return py::make_iterator(self.data().begin(), self.data().end());
|
||||
}, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists
|
||||
"Iterator to enable looping within data structure as required.")
|
||||
.def("__contains__", [](kp::Tensor &self, float v) {
|
||||
for (size_t i = 0; i < self.data().size(); ++i) {
|
||||
if (v == self.data()[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
})
|
||||
.def("__reversed__", [](kp::Tensor &self) {
|
||||
size_t size = self.data().size();
|
||||
std::vector<float> reversed(size);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
reversed[size - i - 1] = self.data()[i];
|
||||
// Non-owning container exposing the underlying pointer
|
||||
py::str dummyDataOwner; // Explicitly request data to not be owned by np
|
||||
switch (self.dataType()) {
|
||||
case kp::Tensor::TensorDataTypes::eFloat:
|
||||
return py::array(self.size(), self.data<float>(), dummyDataOwner);
|
||||
case kp::Tensor::TensorDataTypes::eUnsignedInt:
|
||||
return py::array(self.size(), self.data<uint32_t>(), dummyDataOwner);
|
||||
case kp::Tensor::TensorDataTypes::eInt:
|
||||
return py::array(self.size(), self.data<int32_t>(), dummyDataOwner);
|
||||
case kp::Tensor::TensorDataTypes::eDouble:
|
||||
return py::array(self.size(), self.data<double>(), dummyDataOwner);
|
||||
case kp::Tensor::TensorDataTypes::eBool:
|
||||
return py::array(self.size(), self.data<bool>(), dummyDataOwner);
|
||||
default:
|
||||
throw std::runtime_error("Kompute Python data type not supported");
|
||||
}
|
||||
return reversed;
|
||||
})
|
||||
.def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
|
||||
.def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
|
||||
.def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.")
|
||||
.def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.")
|
||||
.def("destroy", &kp::Tensor::destroy, "Destroy tensor GPU resources.");
|
||||
}, DOC(kp, Tensor, data))
|
||||
.def("size", &kp::Tensor::size, DOC(kp, Tensor, size))
|
||||
.def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size))
|
||||
.def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType))
|
||||
.def("data_type", &kp::Tensor::dataType, DOC(kp, Tensor, dataType))
|
||||
.def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit))
|
||||
.def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy));
|
||||
|
||||
py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
|
||||
.def("record", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.record(op); })
|
||||
.def("eval", [](kp::Sequence& self) { return self.eval(); })
|
||||
.def("eval", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.eval(op); })
|
||||
.def("eval_async", [](kp::Sequence& self) { return self.eval(); })
|
||||
.def("eval_async", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.evalAsync(op); })
|
||||
.def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); })
|
||||
.def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); })
|
||||
.def("is_recording", &kp::Sequence::isRecording)
|
||||
.def("is_running", &kp::Sequence::isRunning)
|
||||
.def("is_init", &kp::Sequence::isInit)
|
||||
.def("get_timestamps", &kp::Sequence::getTimestamps)
|
||||
.def("clear", &kp::Sequence::clear)
|
||||
.def("destroy", &kp::Sequence::destroy);
|
||||
.def("record", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.record(op); },
|
||||
DOC(kp, Sequence, record))
|
||||
.def("eval", [](kp::Sequence& self) { return self.eval(); },
|
||||
DOC(kp, Sequence, eval))
|
||||
.def("eval", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.eval(op); },
|
||||
DOC(kp, Sequence, eval_2))
|
||||
.def("eval_async", [](kp::Sequence& self) { return self.eval(); },
|
||||
DOC(kp, Sequence, evalAwait))
|
||||
.def("eval_async", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.evalAsync(op); },
|
||||
DOC(kp, Sequence, evalAsync))
|
||||
.def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); },
|
||||
DOC(kp, Sequence, evalAwait))
|
||||
.def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); },
|
||||
DOC(kp, Sequence, evalAwait))
|
||||
.def("is_recording", &kp::Sequence::isRecording,
|
||||
DOC(kp, Sequence, isRecording))
|
||||
.def("is_running", &kp::Sequence::isRunning,
|
||||
DOC(kp, Sequence, isRunning))
|
||||
.def("is_init", &kp::Sequence::isInit,
|
||||
DOC(kp, Sequence, isInit))
|
||||
.def("clear", &kp::Sequence::clear,
|
||||
DOC(kp, Sequence, clear))
|
||||
.def("rerecord", &kp::Sequence::rerecord,
|
||||
DOC(kp, Sequence, rerecord))
|
||||
.def("get_timestamps", &kp::Sequence::getTimestamps,
|
||||
DOC(kp, Sequence, getTimestamps))
|
||||
.def("destroy", &kp::Sequence::destroy,
|
||||
DOC(kp, Sequence, destroy));
|
||||
|
||||
py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager")
|
||||
.def(py::init())
|
||||
.def(py::init<uint32_t>())
|
||||
py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager", DOC(kp, Manager))
|
||||
.def(py::init(), DOC(kp, Manager, Manager))
|
||||
.def(py::init<uint32_t>(), DOC(kp, Manager, Manager_2))
|
||||
.def(py::init<uint32_t,const std::vector<uint32_t>&,const std::vector<std::string>&>(),
|
||||
DOC(kp, Manager, Manager_2),
|
||||
py::arg("device") = 0,
|
||||
py::arg("family_queue_indices") = std::vector<uint32_t>(),
|
||||
py::arg("desired_extensions") = std::vector<std::string>())
|
||||
.def("sequence", &kp::Manager::sequence, py::arg("queue_index") = 0, py::arg("total_timestamps") = 0)
|
||||
.def("sequence", &kp::Manager::sequence, DOC(kp, Manager, sequence),
|
||||
py::arg("queue_index") = 0, py::arg("total_timestamps") = 0)
|
||||
.def("tensor", [np](kp::Manager& self,
|
||||
const py::array_t<float> data,
|
||||
const py::array_t<float>& data,
|
||||
kp::Tensor::TensorTypes tensor_type) {
|
||||
const py::array_t<float> flatdata = np.attr("ravel")(data);
|
||||
const py::array_t<float>& flatdata = np.attr("ravel")(data);
|
||||
const py::buffer_info info = flatdata.request();
|
||||
const float* ptr = (float*) info.ptr;
|
||||
return self.tensor(std::vector<float>(ptr, ptr+flatdata.size()), tensor_type);
|
||||
KP_LOG_DEBUG("Kompute Python Manager tensor() creating tensor float with data size {}", flatdata.size());
|
||||
return self.tensor(
|
||||
info.ptr,
|
||||
flatdata.size(),
|
||||
sizeof(float),
|
||||
kp::Tensor::TensorDataTypes::eFloat,
|
||||
tensor_type);
|
||||
},
|
||||
"Tensor initialisation function with data and tensor type",
|
||||
DOC(kp, Manager, tensor),
|
||||
py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
|
||||
.def("tensor_t", [np](kp::Manager& self,
|
||||
const py::array& data,
|
||||
kp::Tensor::TensorTypes tensor_type) {
|
||||
// TODO: Suppport strides in numpy format
|
||||
const py::array& flatdata = np.attr("ravel")(data);
|
||||
const py::buffer_info info = flatdata.request();
|
||||
KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with data size {} dtype {}",
|
||||
flatdata.size(), std::string(py::str(flatdata.dtype())));
|
||||
if (flatdata.dtype() == py::dtype::of<std::float_t>()) {
|
||||
return self.tensor(
|
||||
info.ptr, flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type);
|
||||
} else if (flatdata.dtype() == py::dtype::of<std::uint32_t>()) {
|
||||
return self.tensor(
|
||||
info.ptr, flatdata.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type);
|
||||
} else if (flatdata.dtype() == py::dtype::of<std::int32_t>()) {
|
||||
return self.tensor(
|
||||
info.ptr, flatdata.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type);
|
||||
} else if (flatdata.dtype() == py::dtype::of<std::double_t>()) {
|
||||
return self.tensor(
|
||||
info.ptr, flatdata.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type);
|
||||
} else if (flatdata.dtype() == py::dtype::of<bool>()) {
|
||||
return self.tensor(
|
||||
info.ptr, flatdata.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type);
|
||||
} else {
|
||||
throw std::runtime_error("Kompute Python no valid dtype supported");
|
||||
}
|
||||
},
|
||||
DOC(kp, Manager, tensorT),
|
||||
py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
|
||||
.def("algorithm", [](kp::Manager& self,
|
||||
const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
|
||||
|
|
@ -163,8 +212,12 @@ PYBIND11_MODULE(kp, m) {
|
|||
std::vector<uint32_t> spirvVec((uint32_t*)data, (uint32_t*)(data + length));
|
||||
return self.algorithm(tensors, spirvVec, workgroup, spec_consts, push_consts);
|
||||
},
|
||||
"Algorithm initialisation function",
|
||||
py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), py::arg("spec_consts") = kp::Constants(), py::arg("push_consts") = kp::Constants());
|
||||
DOC(kp, Manager, algorithm),
|
||||
py::arg("tensors"),
|
||||
py::arg("spirv"),
|
||||
py::arg("workgroup") = kp::Workgroup(),
|
||||
py::arg("spec_consts") = kp::Constants(),
|
||||
py::arg("push_consts") = kp::Constants());
|
||||
|
||||
#ifdef VERSION_INFO
|
||||
m.attr("__version__") = VERSION_INFO;
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ def test_array_multiplication():
|
|||
mgr = kp.Manager()
|
||||
|
||||
# 2. Create Kompute Tensors to hold data
|
||||
tensor_in_a = mgr.tensor([2, 2, 2])
|
||||
tensor_in_b = mgr.tensor([1, 2, 3])
|
||||
tensor_out = mgr.tensor([0, 0, 0])
|
||||
tensor_in_a = mgr.tensor(np.array([2, 2, 2]))
|
||||
tensor_in_b = mgr.tensor(np.array([1, 2, 3]))
|
||||
tensor_out = mgr.tensor(np.array([0, 0, 0]))
|
||||
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
|
|
|
|||
|
|
@ -9,35 +9,15 @@ DIRNAME = os.path.dirname(os.path.abspath(__file__))
|
|||
|
||||
kp_log = logging.getLogger("kp")
|
||||
|
||||
# TODO: Add example with file
|
||||
#def test_opalgobase_file():
|
||||
# """
|
||||
# Test basic OpMult operation
|
||||
# """
|
||||
#
|
||||
# tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
# tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
# tensor_out = kp.Tensor([0, 0, 0])
|
||||
#
|
||||
# mgr = kp.Manager()
|
||||
# mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
#
|
||||
# shader_path = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv")
|
||||
#
|
||||
# mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)
|
||||
#
|
||||
# mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
#
|
||||
# assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
def test_end_to_end():
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
tensor_in_a = mgr.tensor([2, 2, 2])
|
||||
tensor_in_b = mgr.tensor([1, 2, 3])
|
||||
tensor_out_a = mgr.tensor([0, 0, 0])
|
||||
tensor_out_b = mgr.tensor([0, 0, 0])
|
||||
# Explicit type constructor supports int, in32, double, float and int
|
||||
tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
|
||||
tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
|
||||
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
|
||||
|
||||
|
|
@ -49,8 +29,8 @@ def test_end_to_end():
|
|||
// The input tensors bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
||||
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
|
||||
|
||||
// Kompute supports push constants updated on dispatch
|
||||
layout(push_constant) uniform PushConstants {
|
||||
|
|
@ -62,8 +42,8 @@ def test_end_to_end():
|
|||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
out_a[index] += in_a[index] * in_b[index];
|
||||
out_b[index] += const_one * push_const.val;
|
||||
out_a[index] += uint( in_a[index] * in_b[index] );
|
||||
out_b[index] += uint( const_one * push_const.val );
|
||||
}
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import pyshader as ps
|
||||
import numpy as np
|
||||
import kp
|
||||
|
||||
def test_logistic_regression():
|
||||
|
|
@ -46,21 +47,21 @@ def test_logistic_regression():
|
|||
mgr = kp.Manager(0)
|
||||
|
||||
# First we create input and ouput tensors for shader
|
||||
tensor_x_i = mgr.tensor([0.0, 1.0, 1.0, 1.0, 1.0])
|
||||
tensor_x_j = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
tensor_x_i = mgr.tensor(np.array([0.0, 1.0, 1.0, 1.0, 1.0]))
|
||||
tensor_x_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0]))
|
||||
|
||||
tensor_y = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
tensor_y = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0]))
|
||||
|
||||
tensor_w_in = mgr.tensor([0.001, 0.001])
|
||||
tensor_w_out_i = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_out_j = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_in = mgr.tensor(np.array([0.001, 0.001]))
|
||||
tensor_w_out_i = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
|
||||
tensor_w_out_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
|
||||
|
||||
tensor_b_in = mgr.tensor([0.0])
|
||||
tensor_b_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_b_in = mgr.tensor(np.array([0.0]))
|
||||
tensor_b_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
|
||||
|
||||
tensor_l_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_l_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
|
||||
|
||||
tensor_m = mgr.tensor([ tensor_y.size() ])
|
||||
tensor_m = mgr.tensor(np.array([ tensor_y.size() ]))
|
||||
|
||||
# We store them in an array for easier interaction
|
||||
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
|
||||
|
|
@ -91,9 +92,9 @@ def test_logistic_regression():
|
|||
|
||||
# Calculate the parameters based on the respective derivatives calculated
|
||||
for j_iter in range(tensor_b_out.size()):
|
||||
tensor_w_in[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
|
||||
tensor_w_in[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
|
||||
tensor_b_in[0] -= learning_rate * tensor_b_out.data()[j_iter]
|
||||
tensor_w_in.data()[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
|
||||
tensor_w_in.data()[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
|
||||
tensor_b_in.data()[0] -= learning_rate * tensor_b_out.data()[j_iter]
|
||||
|
||||
assert tensor_w_in.data()[0] < 0.01
|
||||
assert tensor_w_in.data()[0] > 0.0
|
||||
|
|
|
|||
206
python/test/test_tensor_types.py
Normal file
206
python/test/test_tensor_types.py
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
import pyshader as ps
|
||||
import os
|
||||
import pytest
|
||||
import kp
|
||||
import numpy as np
|
||||
|
||||
|
||||
def test_type_float():
|
||||
|
||||
shader = """
|
||||
#version 450
|
||||
layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
|
||||
layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
|
||||
layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
||||
}
|
||||
"""
|
||||
|
||||
spirv = kp.Shader.compile_source(shader)
|
||||
|
||||
arr_in_a = np.array([123., 153., 231.], dtype=np.float32)
|
||||
arr_in_b = np.array([9482, 1208, 1238], dtype=np.float32)
|
||||
arr_out = np.array([0, 0, 0], dtype=np.float32)
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
tensor_in_a = mgr.tensor(arr_in_a)
|
||||
tensor_in_b = mgr.tensor(arr_in_b)
|
||||
tensor_out = mgr.tensor(arr_out)
|
||||
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
|
||||
.record(kp.OpTensorSyncLocal([tensor_out]))
|
||||
.eval())
|
||||
|
||||
assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
|
||||
|
||||
|
||||
def test_type_float_double_incorrect():
|
||||
|
||||
shader = """
|
||||
#version 450
|
||||
layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
|
||||
layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
|
||||
layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
||||
}
|
||||
"""
|
||||
|
||||
spirv = kp.Shader.compile_source(shader)
|
||||
|
||||
arr_in_a = np.array([123., 153., 231.], dtype=np.float32)
|
||||
arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32)
|
||||
arr_out = np.array([0, 0, 0], dtype=np.float32)
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
tensor_in_a = mgr.tensor_t(arr_in_a)
|
||||
tensor_in_b = mgr.tensor_t(arr_in_b)
|
||||
tensor_out = mgr.tensor_t(arr_out)
|
||||
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
|
||||
.record(kp.OpTensorSyncLocal([tensor_out]))
|
||||
.eval())
|
||||
|
||||
assert np.all(tensor_out.data() != arr_in_a * arr_in_b)
|
||||
|
||||
@pytest.mark.skipif("swiftshader" in os.environ.get("VK_ICD_FILENAMES"),
|
||||
reason="Swiftshader doesn't support double")
|
||||
def test_type_double():
|
||||
|
||||
shader = """
|
||||
#version 450
|
||||
layout(set = 0, binding = 0) buffer tensorLhs { double valuesLhs[]; };
|
||||
layout(set = 0, binding = 1) buffer tensorRhs { double valuesRhs[]; };
|
||||
layout(set = 0, binding = 2) buffer tensorOutput { double valuesOutput[]; };
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
||||
}
|
||||
"""
|
||||
|
||||
spirv = kp.Shader.compile_source(shader)
|
||||
|
||||
arr_in_a = np.array([123., 153., 231.], dtype=np.float64)
|
||||
arr_in_b = np.array([9482, 1208, 1238], dtype=np.float64)
|
||||
arr_out = np.array([0, 0, 0], dtype=np.float64)
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
tensor_in_a = mgr.tensor_t(arr_in_a)
|
||||
tensor_in_b = mgr.tensor_t(arr_in_b)
|
||||
tensor_out = mgr.tensor_t(arr_out)
|
||||
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
|
||||
.record(kp.OpTensorSyncLocal([tensor_out]))
|
||||
.eval())
|
||||
|
||||
print(f"Dtype value {tensor_out.data().dtype}")
|
||||
|
||||
assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
|
||||
|
||||
def test_type_int():
|
||||
|
||||
shader = """
|
||||
#version 450
|
||||
layout(set = 0, binding = 0) buffer tensorLhs { int valuesLhs[]; };
|
||||
layout(set = 0, binding = 1) buffer tensorRhs { int valuesRhs[]; };
|
||||
layout(set = 0, binding = 2) buffer tensorOutput { int valuesOutput[]; };
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
||||
}
|
||||
"""
|
||||
|
||||
spirv = kp.Shader.compile_source(shader)
|
||||
|
||||
arr_in_a = np.array([123, 153, 231], dtype=np.int32)
|
||||
arr_in_b = np.array([9482, 1208, 1238], dtype=np.int32)
|
||||
arr_out = np.array([0, 0, 0], dtype=np.int32)
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
tensor_in_a = mgr.tensor_t(arr_in_a)
|
||||
tensor_in_b = mgr.tensor_t(arr_in_b)
|
||||
tensor_out = mgr.tensor_t(arr_out)
|
||||
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
|
||||
.record(kp.OpTensorSyncLocal([tensor_out]))
|
||||
.eval())
|
||||
|
||||
print(f"Dtype value {tensor_out.data().dtype}")
|
||||
|
||||
assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
|
||||
|
||||
def test_type_unsigned_int():
|
||||
|
||||
shader = """
|
||||
#version 450
|
||||
layout(set = 0, binding = 0) buffer tensorLhs { uint valuesLhs[]; };
|
||||
layout(set = 0, binding = 1) buffer tensorRhs { uint valuesRhs[]; };
|
||||
layout(set = 0, binding = 2) buffer tensorOutput { uint valuesOutput[]; };
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
||||
}
|
||||
"""
|
||||
|
||||
spirv = kp.Shader.compile_source(shader)
|
||||
|
||||
arr_in_a = np.array([123, 153, 231], dtype=np.uint32)
|
||||
arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32)
|
||||
arr_out = np.array([0, 0, 0], dtype=np.uint32)
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
tensor_in_a = mgr.tensor_t(arr_in_a)
|
||||
tensor_in_b = mgr.tensor_t(arr_in_b)
|
||||
tensor_out = mgr.tensor_t(arr_out)
|
||||
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
|
||||
.record(kp.OpTensorSyncLocal([tensor_out]))
|
||||
.eval())
|
||||
|
||||
print(f"Dtype value {tensor_out.data().dtype}")
|
||||
|
||||
assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
|
||||
|
||||
2
setup.py
2
setup.py
|
|
@ -57,7 +57,7 @@ class CMakeBuild(build_ext):
|
|||
else:
|
||||
cmake_args += ['-DKOMPUTE_EXTRA_CXX_FLAGS="-fPIC"']
|
||||
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
|
||||
build_args += ['--', '-j2']
|
||||
build_args += ['--', '-j']
|
||||
|
||||
env = os.environ.copy()
|
||||
env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
|
||||
|
|
|
|||
|
|
@ -762,7 +762,7 @@ class Shader
|
|||
* GLSL compiler
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static std::vector<uint32_t> compile_sources(
|
||||
static std::vector<uint32_t> compileSources(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files = {},
|
||||
const std::string& entryPoint = "main",
|
||||
|
|
@ -783,7 +783,7 @@ class Shader
|
|||
* GLSL compiler
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static std::vector<uint32_t> compile_source(
|
||||
static std::vector<uint32_t> compileSource(
|
||||
const std::string& source,
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string, std::string>> definitions = {},
|
||||
|
|
@ -818,6 +818,14 @@ class Tensor
|
|||
eHost = 1, ///< Type is host memory, source and destination
|
||||
eStorage = 2, ///< Type is Device memory (only)
|
||||
};
|
||||
enum class TensorDataTypes
|
||||
{
|
||||
eBool = 0,
|
||||
eInt = 1,
|
||||
eUnsignedInt = 2,
|
||||
eFloat = 3,
|
||||
eDouble = 4,
|
||||
};
|
||||
|
||||
/**
|
||||
* Constructor with data provided which would be used to create the
|
||||
|
|
@ -831,14 +839,17 @@ class Tensor
|
|||
*/
|
||||
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize,
|
||||
const TensorDataTypes& dataType,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Destructor which is in charge of freeing vulkan resources unless they
|
||||
* have been provided externally.
|
||||
*/
|
||||
~Tensor();
|
||||
virtual ~Tensor();
|
||||
|
||||
/**
|
||||
* Function to trigger reinitialisation of the tensor buffer and memory with
|
||||
|
|
@ -847,8 +858,9 @@ class Tensor
|
|||
* @param data Vector of data to use to initialise vector from
|
||||
* @param tensorType The type to use for the tensor
|
||||
*/
|
||||
void rebuild(const std::vector<float>& data,
|
||||
TensorTypes tensorType = TensorTypes::eDevice);
|
||||
void rebuild(void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize);
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
|
|
@ -862,32 +874,6 @@ class Tensor
|
|||
*/
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns the vector of data currently contained by the Tensor. It is
|
||||
* important to ensure that there is no out-of-sync data with the GPU
|
||||
* memory.
|
||||
*
|
||||
* @return Reference to vector of elements representing the data in the
|
||||
* tensor.
|
||||
*/
|
||||
std::vector<float>& data();
|
||||
/**
|
||||
* Overrides the subscript operator to expose the underlying data's
|
||||
* subscript operator which in this case would be its underlying
|
||||
* vector's.
|
||||
*
|
||||
* @param i The index where the element will be returned from.
|
||||
* @return Returns the element in the position requested.
|
||||
*/
|
||||
float& operator[](int index);
|
||||
/**
|
||||
* Returns the size/magnitude of the Tensor, which will be the total number
|
||||
* of elements across all dimensions
|
||||
*
|
||||
* @return Unsigned integer representing the total number of elements
|
||||
*/
|
||||
uint32_t size();
|
||||
|
||||
/**
|
||||
* Retrieve the tensor type of the Tensor
|
||||
*
|
||||
|
|
@ -895,12 +881,6 @@ class Tensor
|
|||
*/
|
||||
TensorTypes tensorType();
|
||||
|
||||
/**
|
||||
* Sets / resets the vector data of the tensor. This function does not
|
||||
* perform any copies into GPU memory and is only performed on the host.
|
||||
*/
|
||||
void setData(const std::vector<float>& data);
|
||||
|
||||
/**
|
||||
* Records a copy from the memory of the tensor provided to the current
|
||||
* thensor. This is intended to pass memory into a processing, to perform
|
||||
|
|
@ -963,18 +943,118 @@ class Tensor
|
|||
* @return Descriptor buffer info with own buffer
|
||||
*/
|
||||
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
|
||||
|
||||
/**
|
||||
* Maps data from the Host Visible GPU memory into the data vector. It
|
||||
* requires the Tensor to be of staging type for it to work.
|
||||
* Returns the size/magnitude of the Tensor, which will be the total number
|
||||
* of elements across all dimensions
|
||||
*
|
||||
* @return Unsigned integer representing the total number of elements
|
||||
*/
|
||||
void mapDataFromHostMemory();
|
||||
// TODO: move to cpp
|
||||
uint32_t size() {
|
||||
return this->mSize;
|
||||
}
|
||||
|
||||
// TODO: move to cpp
|
||||
uint32_t dataTypeMemorySize() {
|
||||
return this->mDataTypeMemorySize;
|
||||
}
|
||||
|
||||
// TODO: move to cpp
|
||||
uint32_t memorySize() {
|
||||
return this->mSize * this->mDataTypeMemorySize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps data from the data vector into the Host Visible GPU memory. It
|
||||
* requires the tensor to be of staging type for it to work.
|
||||
* Retrieve the underlying data type of the Tensor
|
||||
*
|
||||
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
|
||||
*/
|
||||
void mapDataIntoHostMemory();
|
||||
TensorDataTypes dataType() {
|
||||
return this->mDataType;
|
||||
}
|
||||
|
||||
void* rawData() {
|
||||
return this->mRawData;
|
||||
}
|
||||
|
||||
// TODO: move to cpp
|
||||
template <typename T>
|
||||
T* data() {
|
||||
return (T*)this->mRawData;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> vector() {
|
||||
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets / resets the vector data of the tensor. This function does not
|
||||
* perform any copies into GPU memory and is only performed on the host.
|
||||
*/
|
||||
void setRawData(const void* data)
|
||||
{
|
||||
// Copy data
|
||||
memcpy(this->mRawData, data, this->memorySize());
|
||||
}
|
||||
|
||||
protected:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
TensorTypes mTensorType;
|
||||
TensorDataTypes mDataType;
|
||||
uint32_t mSize;
|
||||
uint32_t mDataTypeMemorySize;
|
||||
void* mRawData;
|
||||
|
||||
private:
|
||||
void mapRawData() {
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
|
||||
|
||||
if (this->mTensorType == TensorTypes::eHost) {
|
||||
hostVisibleMemory = this->mPrimaryMemory;
|
||||
} else if (this->mTensorType == TensorTypes::eDevice) {
|
||||
hostVisibleMemory = this->mStagingMemory;
|
||||
} else {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor mapping data not supported on storage tensor");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
||||
// Given we request coherent host memory we don't need to invalidate / flush
|
||||
this->mRawData = this->mDevice->mapMemory(
|
||||
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
|
||||
|
||||
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
|
||||
}
|
||||
|
||||
void unmapRawData() {
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
|
||||
|
||||
if (this->mTensorType == TensorTypes::eHost) {
|
||||
hostVisibleMemory = this->mPrimaryMemory;
|
||||
} else if (this->mTensorType == TensorTypes::eDevice) {
|
||||
hostVisibleMemory = this->mStagingMemory;
|
||||
} else {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor mapping data not supported on storage tensor");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
|
||||
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
|
||||
this->mDevice->unmapMemory(*hostVisibleMemory);
|
||||
}
|
||||
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
|
|
@ -989,11 +1069,6 @@ class Tensor
|
|||
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
|
||||
bool mFreeStagingMemory = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<float> mData;
|
||||
|
||||
TensorTypes mTensorType = TensorTypes::eDevice;
|
||||
|
||||
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
|
||||
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
|
||||
vk::BufferUsageFlags bufferUsageFlags);
|
||||
|
|
@ -1012,7 +1087,60 @@ class Tensor
|
|||
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
|
||||
vk::BufferUsageFlags getStagingBufferUsageFlags();
|
||||
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
|
||||
uint64_t memorySize();
|
||||
|
||||
};
|
||||
|
||||
// TODO: Limit T to be only float, bool, double, etc
|
||||
template <typename T>
|
||||
class TensorT: public Tensor
|
||||
{
|
||||
|
||||
public:
|
||||
TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<T>& data,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice)
|
||||
: Tensor(physicalDevice,
|
||||
device,
|
||||
(void*)data.data(),
|
||||
data.size(),
|
||||
sizeof(T),
|
||||
this->dataType(),
|
||||
tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size());
|
||||
}
|
||||
|
||||
~TensorT() {
|
||||
KP_LOG_DEBUG("Kompute TensorT destructor");
|
||||
}
|
||||
|
||||
T* data() {
|
||||
return (T*)this->mRawData;
|
||||
}
|
||||
|
||||
std::vector<T> vector() {
|
||||
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
|
||||
}
|
||||
|
||||
T& operator[](int index) {
|
||||
return *(((T*)this->mRawData) + index);
|
||||
}
|
||||
|
||||
void setData(const std::vector<T>& data) {
|
||||
|
||||
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size());
|
||||
|
||||
if (data.size() != this->mSize) {
|
||||
throw std::runtime_error(
|
||||
"Kompute TensorT Cannot set data of different sizes");
|
||||
}
|
||||
|
||||
Tensor::setRawData(data.data());
|
||||
}
|
||||
|
||||
TensorDataTypes dataType();
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -1873,7 +2001,7 @@ class Manager
|
|||
* If zero (default), disables latching of timestamps.
|
||||
* @returns Shared pointer with initialised sequence
|
||||
*/
|
||||
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0);
|
||||
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0);
|
||||
|
||||
/**
|
||||
* Create a managed tensor that will be destroyed by this manager
|
||||
|
|
@ -1883,9 +2011,46 @@ class Manager
|
|||
* @param tensorType The type of tensor to initialize
|
||||
* @returns Shared pointer with initialised tensor
|
||||
*/
|
||||
std::shared_ptr<Tensor> tensor(
|
||||
template <typename T>
|
||||
std::shared_ptr<TensorT<T>> tensorT(
|
||||
const std::vector<T>& data,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
|
||||
|
||||
std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
|
||||
this->mPhysicalDevice, this->mDevice, data, tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorT<float>> tensor(
|
||||
const std::vector<float>& data,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
return this->tensorT<float>(data, tensorType);
|
||||
}
|
||||
|
||||
std::shared_ptr<Tensor> tensor(
|
||||
void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize,
|
||||
const Tensor::TensorDataTypes& dataType,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
|
||||
this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a managed algorithm that will be destroyed by this manager
|
||||
|
|
|
|||
|
|
@ -395,21 +395,6 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
|||
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
|
||||
}
|
||||
|
||||
std::shared_ptr<Tensor>
|
||||
Manager::tensor(const std::vector<float>& data, Tensor::TensorTypes tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
|
||||
|
||||
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
|
||||
this->mPhysicalDevice, this->mDevice, data, tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
std::shared_ptr<Algorithm>
|
||||
Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
|
|
|
|||
|
|
@ -13,6 +13,20 @@ OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
|
|||
throw std::runtime_error(
|
||||
"Kompute OpTensorCopy called with less than 2 tensor");
|
||||
}
|
||||
|
||||
kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType();
|
||||
uint32_t size = this->mTensors[0]->size();
|
||||
for (const std::shared_ptr<Tensor>& tensor : tensors) {
|
||||
if (tensor->dataType() != dataType) {
|
||||
throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}",
|
||||
dataType, tensor->dataType()));
|
||||
}
|
||||
if (tensor->size() != size) {
|
||||
throw std::runtime_error(fmt::format("Attempting to copy tensors of different sizes from {} to {}",
|
||||
size, tensor->size()));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OpTensorCopy::~OpTensorCopy()
|
||||
|
|
@ -43,9 +57,15 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");
|
||||
|
||||
// TODO: Simplify with a copyRawData
|
||||
uint32_t size = this->mTensors[0]->size();
|
||||
uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize();
|
||||
uint32_t memSize = size * dataTypeMemSize;
|
||||
void* data = this->mTensors[0]->rawData();
|
||||
|
||||
// Copy the data from the first tensor into all the tensors
|
||||
for (size_t i = 1; i < this->mTensors.size(); i++) {
|
||||
this->mTensors[i]->setData(this->mTensors[0]->data());
|
||||
this->mTensors[i]->setRawData(data);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -41,12 +41,6 @@ OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer)
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
|
||||
|
||||
// Performing sync of data as eval can be called multiple times with same op
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
|
||||
this->mTensors[i]->mapDataIntoHostMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -48,11 +48,6 @@ OpTensorSyncLocal::postEval(const vk::CommandBuffer& commandBuffer)
|
|||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called");
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local");
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
|
||||
this->mTensors[i]->mapDataFromHostMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
namespace kp {
|
||||
|
||||
std::vector<uint32_t>
|
||||
Shader::compile_sources(
|
||||
Shader::compileSources(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files,
|
||||
const std::string& entryPoint,
|
||||
|
|
@ -92,13 +92,13 @@ Shader::compile_sources(
|
|||
}
|
||||
|
||||
std::vector<uint32_t>
|
||||
Shader::compile_source(
|
||||
Shader::compileSource(
|
||||
const std::string& source,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string, std::string>> definitions,
|
||||
const TBuiltInResource& resource)
|
||||
{
|
||||
return compile_sources({ source },
|
||||
return compileSources({ source },
|
||||
std::vector<std::string>({}),
|
||||
entryPoint,
|
||||
definitions,
|
||||
|
|
|
|||
156
src/Tensor.cpp
156
src/Tensor.cpp
|
|
@ -5,17 +5,22 @@ namespace kp {
|
|||
|
||||
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize,
|
||||
const TensorDataTypes& dataType,
|
||||
const TensorTypes& tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
|
||||
data.size(),
|
||||
elementTotalCount,
|
||||
tensorType);
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mDataType = dataType;
|
||||
this->mTensorType = tensorType;
|
||||
|
||||
this->rebuild(data, tensorType);
|
||||
this->rebuild(data, elementTotalCount, elementMemorySize);
|
||||
}
|
||||
|
||||
Tensor::~Tensor()
|
||||
|
|
@ -29,12 +34,14 @@ Tensor::~Tensor()
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
|
||||
Tensor::rebuild(void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size());
|
||||
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount);
|
||||
|
||||
this->mData = data;
|
||||
this->mTensorType = tensorType;
|
||||
this->mSize = elementTotalCount;
|
||||
this->mDataTypeMemorySize = elementMemorySize;
|
||||
|
||||
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
|
||||
KP_LOG_DEBUG(
|
||||
|
|
@ -43,30 +50,9 @@ Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
|
|||
}
|
||||
|
||||
this->allocateMemoryCreateGPUResources();
|
||||
}
|
||||
this->mapRawData();
|
||||
|
||||
std::vector<float>&
|
||||
Tensor::data()
|
||||
{
|
||||
return this->mData;
|
||||
}
|
||||
|
||||
float&
|
||||
Tensor::operator[](int index)
|
||||
{
|
||||
return this->mData[index];
|
||||
}
|
||||
|
||||
uint64_t
|
||||
Tensor::memorySize()
|
||||
{
|
||||
return this->size() * sizeof(float);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
Tensor::size()
|
||||
{
|
||||
return static_cast<uint32_t>(this->mData.size());
|
||||
memcpy(this->mRawData, data, this->memorySize());
|
||||
}
|
||||
|
||||
Tensor::TensorTypes
|
||||
|
|
@ -78,18 +64,12 @@ Tensor::tensorType()
|
|||
bool
|
||||
Tensor::isInit()
|
||||
{
|
||||
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory;
|
||||
return this->mDevice
|
||||
&& this->mPrimaryBuffer
|
||||
&& this->mPrimaryMemory
|
||||
&& this->mRawData;
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::setData(const std::vector<float>& data)
|
||||
{
|
||||
if (data.size() != this->mData.size()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Tensor Cannot set data of different sizes");
|
||||
}
|
||||
this->mData = data;
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
|
||||
|
|
@ -195,66 +175,13 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
|||
vk::DescriptorBufferInfo
|
||||
Tensor::constructDescriptorBufferInfo()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize());
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
|
||||
0, // offset
|
||||
bufferSize);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::mapDataFromHostMemory()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
|
||||
|
||||
if (this->mTensorType == TensorTypes::eHost) {
|
||||
hostVisibleMemory = this->mPrimaryMemory;
|
||||
} else if (this->mTensorType == TensorTypes::eDevice) {
|
||||
hostVisibleMemory = this->mStagingMemory;
|
||||
} else {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor mapping data not supported on storage tensor");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
void* mapped = this->mDevice->mapMemory(
|
||||
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
|
||||
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
|
||||
this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
|
||||
memcpy(this->mData.data(), mapped, bufferSize);
|
||||
this->mDevice->unmapMemory(*hostVisibleMemory);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::mapDataIntoHostMemory()
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");
|
||||
|
||||
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
|
||||
|
||||
if (this->mTensorType == TensorTypes::eHost) {
|
||||
hostVisibleMemory = this->mPrimaryMemory;
|
||||
} else if (this->mTensorType == TensorTypes::eDevice) {
|
||||
hostVisibleMemory = this->mStagingMemory;
|
||||
} else {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor mapping data not supported on storage tensor");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
||||
void* mapped = this->mDevice->mapMemory(
|
||||
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
|
||||
memcpy(mapped, this->mData.data(), bufferSize);
|
||||
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
|
||||
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
|
||||
this->mDevice->unmapMemory(*hostVisibleMemory);
|
||||
}
|
||||
|
||||
vk::BufferUsageFlags
|
||||
Tensor::getPrimaryBufferUsageFlags()
|
||||
{
|
||||
|
|
@ -285,7 +212,8 @@ Tensor::getPrimaryMemoryPropertyFlags()
|
|||
return vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||
break;
|
||||
case TensorTypes::eHost:
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible;
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||
break;
|
||||
case TensorTypes::eStorage:
|
||||
return vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||
|
|
@ -435,12 +363,20 @@ Tensor::destroy()
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor started destroy()");
|
||||
|
||||
// Setting raw data to null regardless whether device is available to invalidate Tensor
|
||||
this->mRawData = nullptr;
|
||||
this->mSize = 0;
|
||||
this->mDataTypeMemorySize = 0;
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor destructor reached with null Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
// Unmap the current memory data
|
||||
this->unmapRawData();
|
||||
|
||||
if (this->mFreePrimaryBuffer) {
|
||||
if (!this->mPrimaryBuffer) {
|
||||
KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer "
|
||||
|
|
@ -504,4 +440,34 @@ Tensor::destroy()
|
|||
KP_LOG_DEBUG("Kompute Tensor successful destroy()");
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<bool>::dataType() {
|
||||
return Tensor::TensorDataTypes::eBool;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<int32_t>::dataType() {
|
||||
return Tensor::TensorDataTypes::eInt;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<uint32_t>::dataType() {
|
||||
return Tensor::TensorDataTypes::eUnsignedInt;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<float>::dataType() {
|
||||
return Tensor::TensorDataTypes::eFloat;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<double>::dataType() {
|
||||
return Tensor::TensorDataTypes::eDouble;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,9 +74,46 @@ class Manager
|
|||
* @param tensorType The type of tensor to initialize
|
||||
* @returns Shared pointer with initialised tensor
|
||||
*/
|
||||
std::shared_ptr<Tensor> tensor(
|
||||
template <typename T>
|
||||
std::shared_ptr<TensorT<T>> tensorT(
|
||||
const std::vector<T>& data,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
|
||||
|
||||
std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
|
||||
this->mPhysicalDevice, this->mDevice, data, tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorT<float>> tensor(
|
||||
const std::vector<float>& data,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
return this->tensorT<float>(data, tensorType);
|
||||
}
|
||||
|
||||
std::shared_ptr<Tensor> tensor(
|
||||
void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize,
|
||||
const Tensor::TensorDataTypes& dataType,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
|
||||
this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a managed algorithm that will be destroyed by this manager
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class Shader
|
|||
* GLSL compiler
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static std::vector<uint32_t> compile_sources(
|
||||
static std::vector<uint32_t> compileSources(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files = {},
|
||||
const std::string& entryPoint = "main",
|
||||
|
|
@ -60,7 +60,7 @@ class Shader
|
|||
* GLSL compiler
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static std::vector<uint32_t> compile_source(
|
||||
static std::vector<uint32_t> compileSource(
|
||||
const std::string& source,
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string, std::string>> definitions = {},
|
||||
|
|
|
|||
|
|
@ -27,6 +27,14 @@ class Tensor
|
|||
eHost = 1, ///< Type is host memory, source and destination
|
||||
eStorage = 2, ///< Type is Device memory (only)
|
||||
};
|
||||
enum class TensorDataTypes
|
||||
{
|
||||
eBool = 0,
|
||||
eInt = 1,
|
||||
eUnsignedInt = 2,
|
||||
eFloat = 3,
|
||||
eDouble = 4,
|
||||
};
|
||||
|
||||
/**
|
||||
* Constructor with data provided which would be used to create the
|
||||
|
|
@ -40,14 +48,17 @@ class Tensor
|
|||
*/
|
||||
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize,
|
||||
const TensorDataTypes& dataType,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Destructor which is in charge of freeing vulkan resources unless they
|
||||
* have been provided externally.
|
||||
*/
|
||||
~Tensor();
|
||||
virtual ~Tensor();
|
||||
|
||||
/**
|
||||
* Function to trigger reinitialisation of the tensor buffer and memory with
|
||||
|
|
@ -56,8 +67,9 @@ class Tensor
|
|||
* @param data Vector of data to use to initialise vector from
|
||||
* @param tensorType The type to use for the tensor
|
||||
*/
|
||||
void rebuild(const std::vector<float>& data,
|
||||
TensorTypes tensorType = TensorTypes::eDevice);
|
||||
void rebuild(void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize);
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
|
|
@ -71,32 +83,6 @@ class Tensor
|
|||
*/
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns the vector of data currently contained by the Tensor. It is
|
||||
* important to ensure that there is no out-of-sync data with the GPU
|
||||
* memory.
|
||||
*
|
||||
* @return Reference to vector of elements representing the data in the
|
||||
* tensor.
|
||||
*/
|
||||
std::vector<float>& data();
|
||||
/**
|
||||
* Overrides the subscript operator to expose the underlying data's
|
||||
* subscript operator which in this case would be its underlying
|
||||
* vector's.
|
||||
*
|
||||
* @param i The index where the element will be returned from.
|
||||
* @return Returns the element in the position requested.
|
||||
*/
|
||||
float& operator[](int index);
|
||||
/**
|
||||
* Returns the size/magnitude of the Tensor, which will be the total number
|
||||
* of elements across all dimensions
|
||||
*
|
||||
* @return Unsigned integer representing the total number of elements
|
||||
*/
|
||||
uint32_t size();
|
||||
|
||||
/**
|
||||
* Retrieve the tensor type of the Tensor
|
||||
*
|
||||
|
|
@ -104,12 +90,6 @@ class Tensor
|
|||
*/
|
||||
TensorTypes tensorType();
|
||||
|
||||
/**
|
||||
* Sets / resets the vector data of the tensor. This function does not
|
||||
* perform any copies into GPU memory and is only performed on the host.
|
||||
*/
|
||||
void setData(const std::vector<float>& data);
|
||||
|
||||
/**
|
||||
* Records a copy from the memory of the tensor provided to the current
|
||||
* thensor. This is intended to pass memory into a processing, to perform
|
||||
|
|
@ -172,18 +152,118 @@ class Tensor
|
|||
* @return Descriptor buffer info with own buffer
|
||||
*/
|
||||
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
|
||||
|
||||
/**
|
||||
* Maps data from the Host Visible GPU memory into the data vector. It
|
||||
* requires the Tensor to be of staging type for it to work.
|
||||
* Returns the size/magnitude of the Tensor, which will be the total number
|
||||
* of elements across all dimensions
|
||||
*
|
||||
* @return Unsigned integer representing the total number of elements
|
||||
*/
|
||||
void mapDataFromHostMemory();
|
||||
// TODO: move to cpp
|
||||
uint32_t size() {
|
||||
return this->mSize;
|
||||
}
|
||||
|
||||
// TODO: move to cpp
|
||||
uint32_t dataTypeMemorySize() {
|
||||
return this->mDataTypeMemorySize;
|
||||
}
|
||||
|
||||
// TODO: move to cpp
|
||||
uint32_t memorySize() {
|
||||
return this->mSize * this->mDataTypeMemorySize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps data from the data vector into the Host Visible GPU memory. It
|
||||
* requires the tensor to be of staging type for it to work.
|
||||
* Retrieve the underlying data type of the Tensor
|
||||
*
|
||||
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
|
||||
*/
|
||||
void mapDataIntoHostMemory();
|
||||
TensorDataTypes dataType() {
|
||||
return this->mDataType;
|
||||
}
|
||||
|
||||
void* rawData() {
|
||||
return this->mRawData;
|
||||
}
|
||||
|
||||
// TODO: move to cpp
|
||||
template <typename T>
|
||||
T* data() {
|
||||
return (T*)this->mRawData;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> vector() {
|
||||
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets / resets the vector data of the tensor. This function does not
|
||||
* perform any copies into GPU memory and is only performed on the host.
|
||||
*/
|
||||
void setRawData(const void* data)
|
||||
{
|
||||
// Copy data
|
||||
memcpy(this->mRawData, data, this->memorySize());
|
||||
}
|
||||
|
||||
protected:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
TensorTypes mTensorType;
|
||||
TensorDataTypes mDataType;
|
||||
uint32_t mSize;
|
||||
uint32_t mDataTypeMemorySize;
|
||||
void* mRawData;
|
||||
|
||||
private:
|
||||
void mapRawData() {
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
|
||||
|
||||
if (this->mTensorType == TensorTypes::eHost) {
|
||||
hostVisibleMemory = this->mPrimaryMemory;
|
||||
} else if (this->mTensorType == TensorTypes::eDevice) {
|
||||
hostVisibleMemory = this->mStagingMemory;
|
||||
} else {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor mapping data not supported on storage tensor");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
||||
// Given we request coherent host memory we don't need to invalidate / flush
|
||||
this->mRawData = this->mDevice->mapMemory(
|
||||
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
|
||||
|
||||
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
|
||||
}
|
||||
|
||||
void unmapRawData() {
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
|
||||
|
||||
if (this->mTensorType == TensorTypes::eHost) {
|
||||
hostVisibleMemory = this->mPrimaryMemory;
|
||||
} else if (this->mTensorType == TensorTypes::eDevice) {
|
||||
hostVisibleMemory = this->mStagingMemory;
|
||||
} else {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor mapping data not supported on storage tensor");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
|
||||
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
|
||||
this->mDevice->unmapMemory(*hostVisibleMemory);
|
||||
}
|
||||
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
|
|
@ -198,11 +278,6 @@ class Tensor
|
|||
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
|
||||
bool mFreeStagingMemory = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<float> mData;
|
||||
|
||||
TensorTypes mTensorType = TensorTypes::eDevice;
|
||||
|
||||
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
|
||||
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
|
||||
vk::BufferUsageFlags bufferUsageFlags);
|
||||
|
|
@ -221,7 +296,60 @@ class Tensor
|
|||
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
|
||||
vk::BufferUsageFlags getStagingBufferUsageFlags();
|
||||
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
|
||||
uint64_t memorySize();
|
||||
|
||||
};
|
||||
|
||||
// TODO: Limit T to be only float, bool, double, etc
|
||||
template <typename T>
|
||||
class TensorT: public Tensor
|
||||
{
|
||||
|
||||
public:
|
||||
TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<T>& data,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice)
|
||||
: Tensor(physicalDevice,
|
||||
device,
|
||||
(void*)data.data(),
|
||||
data.size(),
|
||||
sizeof(T),
|
||||
this->dataType(),
|
||||
tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size());
|
||||
}
|
||||
|
||||
~TensorT() {
|
||||
KP_LOG_DEBUG("Kompute TensorT destructor");
|
||||
}
|
||||
|
||||
T* data() {
|
||||
return (T*)this->mRawData;
|
||||
}
|
||||
|
||||
std::vector<T> vector() {
|
||||
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
|
||||
}
|
||||
|
||||
T& operator[](int index) {
|
||||
return *(((T*)this->mRawData) + index);
|
||||
}
|
||||
|
||||
void setData(const std::vector<T>& data) {
|
||||
|
||||
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size());
|
||||
|
||||
if (data.size() != this->mSize) {
|
||||
throw std::runtime_error(
|
||||
"Kompute TensorT Cannot set data of different sizes");
|
||||
}
|
||||
|
||||
Tensor::setRawData(data.data());
|
||||
}
|
||||
|
||||
TensorDataTypes dataType();
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultSync(size, 100000000);
|
||||
|
|
@ -73,7 +73,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
sq->eval<kp::OpTensorSyncLocal>(inputsSyncB);
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
EXPECT_EQ(inputsSyncB[i]->data(), resultSync);
|
||||
EXPECT_EQ(inputsSyncB[i]->vector<float>(), resultSync);
|
||||
}
|
||||
|
||||
kp::Manager mgrAsync(0, { 0, 2 });
|
||||
|
|
@ -111,7 +111,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
sq->eval<kp::OpTensorSyncLocal>({ inputsAsyncB });
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync);
|
||||
EXPECT_EQ((inputsAsyncB[i]->vector<float>()), resultAsync);
|
||||
}
|
||||
|
||||
// The speedup should be at least 40%
|
||||
|
|
@ -145,15 +145,15 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultAsync(size, 100000000);
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(data);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(data);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(data);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(data);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq1 = mgr.sequence();
|
||||
std::shared_ptr<kp::Sequence> sq2 = mgr.sequence();
|
||||
|
|
@ -172,6 +172,6 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
|
|||
sq1->evalAsync<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
sq1->evalAwait();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), resultAsync);
|
||||
EXPECT_EQ(tensorB->data(), resultAsync);
|
||||
EXPECT_EQ(tensorA->vector(), resultAsync);
|
||||
EXPECT_EQ(tensorB->vector(), resultAsync);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
TEST(TestDestroy, TestDestroyTensorSingle)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
|
|
@ -16,7 +16,7 @@ TEST(TestDestroy, TestDestroyTensorSingle)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
|
@ -34,18 +34,19 @@ TEST(TestDestroy, TestDestroyTensorSingle)
|
|||
->eval()
|
||||
->eval<kp::OpTensorSyncLocal>(algo->getTensors());
|
||||
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
|
||||
|
||||
tensorA->destroy();
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroyTensorVector)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
std::shared_ptr<kp::Tensor> tensorB = nullptr;
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -57,7 +58,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
pa[index] = pa[index] + 1;
|
||||
pb[index] = pb[index] + 2;
|
||||
})");
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
|
@ -77,6 +78,9 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
->record<kp::OpTensorSyncLocal>(algo->getTensors())
|
||||
->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 2, 2, 2 }));
|
||||
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 3, 3 }));
|
||||
|
||||
tensorA->destroy();
|
||||
tensorB->destroy();
|
||||
|
||||
|
|
@ -84,13 +88,11 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
EXPECT_FALSE(tensorB->isInit());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceSingle)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -101,7 +103,7 @@ TEST(TestDestroy, TestDestroySequenceSingle)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
|
@ -121,7 +123,8 @@ TEST(TestDestroy, TestDestroySequenceSingle)
|
|||
sq->destroy();
|
||||
|
||||
EXPECT_FALSE(sq->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,19 +14,19 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
std::shared_ptr<kp::TensorT<float>> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::TensorT<float>> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
std::shared_ptr<kp::TensorT<float>> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
|
|
@ -88,21 +88,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
std::shared_ptr<kp::TensorT<float>> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::TensorT<float>> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
std::shared_ptr<kp::TensorT<float>> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn =
|
||||
std::shared_ptr<kp::TensorT<float>> wIn =
|
||||
mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn =
|
||||
std::shared_ptr<kp::TensorT<float>> bIn =
|
||||
mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
|
|
@ -136,8 +136,6 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
wIn->data()[1] -= learningRate * wOutJ->data()[j];
|
||||
bIn->data()[0] -= learningRate * bOut->data()[j];
|
||||
}
|
||||
wIn->mapDataIntoHostMemory();
|
||||
bIn->mapDataIntoHostMemory();
|
||||
}
|
||||
|
||||
// Based on the inputs the outputs should be at least:
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@ TEST(TestManager, EndToEndOpMultEvalFlow)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
|
||||
tensorRHS,
|
||||
|
|
@ -20,16 +20,16 @@ TEST(TestManager, EndToEndOpMultEvalFlow)
|
|||
->eval<kp::OpMult>(params, mgr.algorithm())
|
||||
->eval<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
|
||||
EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
|
||||
}
|
||||
|
||||
TEST(TestManager, EndToEndOpMultSeqFlow)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
|
||||
tensorRHS,
|
||||
|
|
@ -41,16 +41,16 @@ TEST(TestManager, EndToEndOpMultSeqFlow)
|
|||
->record<kp::OpTensorSyncLocal>(params)
|
||||
->eval();
|
||||
|
||||
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
|
||||
EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
|
||||
}
|
||||
|
||||
TEST(TestManager, TestMultipleSequences)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
|
||||
tensorRHS,
|
||||
|
|
@ -60,5 +60,5 @@ TEST(TestManager, TestMultipleSequences)
|
|||
mgr.sequence()->eval<kp::OpMult>(params, mgr.algorithm());
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
|
||||
EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,10 +8,12 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
// Default tensor constructor simplifies creation of float values
|
||||
auto tensorInA = mgr.tensor({ 2., 2., 2. });
|
||||
auto tensorInB = mgr.tensor({ 1., 2., 3. });
|
||||
auto tensorOutA = mgr.tensor({ 0., 0., 0. });
|
||||
auto tensorOutB = mgr.tensor({ 0., 0., 0. });
|
||||
// Explicit type constructor supports int, in32, double, float and int
|
||||
auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
|
||||
auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
|
||||
|
||||
std::string shader = (R"(
|
||||
#version 450
|
||||
|
|
@ -21,8 +23,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
|
|||
// The input tensors bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
||||
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
|
||||
|
||||
// Kompute supports push constants updated on dispatch
|
||||
layout(push_constant) uniform PushConstants {
|
||||
|
|
@ -34,8 +36,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
|
|||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
out_a[index] += in_a[index] * in_b[index];
|
||||
out_b[index] += const_one * push_const.val;
|
||||
out_a[index] += uint( in_a[index] * in_b[index] );
|
||||
out_b[index] += uint( const_one * push_const.val );
|
||||
}
|
||||
)");
|
||||
|
||||
|
|
@ -49,7 +51,7 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
|
|||
kp::Constants pushConstsB({ 3.0 });
|
||||
|
||||
auto algorithm = mgr.algorithm(
|
||||
params, kp::Shader::compile_source(shader), workgroup, specConsts, pushConstsA);
|
||||
params, kp::Shader::compileSource(shader), workgroup, specConsts, pushConstsA);
|
||||
|
||||
// 3. Run operation with string shader synchronously
|
||||
mgr.sequence()
|
||||
|
|
@ -64,8 +66,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
|
|||
|
||||
sq->evalAwait();
|
||||
|
||||
EXPECT_EQ(tensorOutA->data(), std::vector<float>({ 4, 8, 12 }));
|
||||
EXPECT_EQ(tensorOutB->data(), std::vector<float>({ 10, 10, 10 }));
|
||||
EXPECT_EQ(tensorOutA->vector(), std::vector<uint32_t>({ 4, 8, 12 }));
|
||||
EXPECT_EQ(tensorOutB->vector(), std::vector<uint32_t>({ 10, 10, 10 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
|
||||
|
|
@ -73,7 +75,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -84,7 +86,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
{
|
||||
mgr.sequence()
|
||||
|
|
@ -96,14 +98,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
|
|||
->eval();
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -114,7 +116,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
|
@ -131,7 +133,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
|
|||
|
||||
mgr.sequence()->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
||||
|
|
@ -139,7 +141,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -150,7 +152,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
|
@ -167,14 +169,14 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
|||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -185,7 +187,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
|
@ -198,43 +200,6 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
|||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
||||
sq = mgr.sequence();
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
|
||||
|
||||
sq->record<kp::OpAlgoDispatch>(algorithm)->eval()->eval()->eval();
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 3, 4, 5 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -27,7 +27,7 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA, tensorB };
|
||||
|
||||
|
|
@ -36,16 +36,16 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
|
|||
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
|
||||
->eval<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
|
||||
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
|
||||
}
|
||||
|
||||
TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 3, 4, 5 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)
|
||||
|
|
@ -62,8 +62,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
|
|||
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
|
||||
->eval<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
|
||||
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
|
||||
}
|
||||
|
||||
// TODO: Add support to read from file for shader
|
||||
|
|
@ -71,8 +71,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
|
|||
//{
|
||||
// kp::Manager mgr;
|
||||
//
|
||||
// std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
// std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
// std::shared_ptr<kp::TensorT<float>> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
// std::shared_ptr<kp::TensorT<float>> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
// mgr.rebuild({ tensorA, tensorB });
|
||||
//
|
||||
// mgr.evalOpDefault<kp::OpAlgoCreate>(
|
||||
|
|
@ -81,6 +81,6 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
|
|||
//
|
||||
// mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
//
|
||||
// EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
// EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
|
||||
// EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
|
||||
// EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
|
||||
//}
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
|
|||
std::vector<float> testVecA{ 1, 2, 3 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -22,8 +22,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
|
|||
->eval<kp::OpTensorCopy>({ tensorA, tensorB })
|
||||
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
// Making sure the GPU holds the same vector
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
|
||||
|
|
@ -35,9 +35,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
|
|||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
std::vector<float> testVecC{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
|
||||
std::shared_ptr<kp::Tensor> tensorC = mgr.tensor(testVecC);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorC = mgr.tensor(testVecC);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -47,14 +47,14 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
|
|||
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC })
|
||||
->eval<kp::OpTensorCopy>({ tensorA, tensorB, tensorC });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->data(), tensorC->data());
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
EXPECT_EQ(tensorA->vector(), tensorC->vector());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
// Making sure the GPU holds the same vector
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB, tensorC });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->data(), tensorC->data());
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
EXPECT_EQ(tensorA->vector(), tensorC->vector());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
|
||||
|
|
@ -65,8 +65,8 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
|
|||
std::vector<float> testVecA{ 3, 4, 5 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::Tensor> tensorB =
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB =
|
||||
mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
// Only calling sync on device type tensor
|
||||
|
|
@ -77,11 +77,11 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
|
|||
|
||||
mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
// Making sure the GPU holds the same vector
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
|
||||
|
|
@ -92,9 +92,9 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
|
|||
std::vector<float> testVecA{ 4, 5, 6 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA =
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA =
|
||||
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
|
||||
|
||||
// Only calling sync on device type tensor
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB });
|
||||
|
|
@ -104,11 +104,11 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
|
|||
|
||||
mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
// Making sure the GPU holds the same vector
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, CopyHostToHostTensor)
|
||||
|
|
@ -119,9 +119,9 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
|
|||
std::vector<float> testVecA{ 5, 6, 7 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA =
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA =
|
||||
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> tensorB =
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB =
|
||||
mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
|
@ -131,11 +131,11 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
|
|||
->eval<kp::OpTensorSyncDevice>({ tensorA })
|
||||
->eval<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
// Making sure the GPU holds the same vector
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->vector(), tensorB->vector());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, SingleTensorShouldFail)
|
||||
|
|
@ -145,7 +145,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
|
|||
|
||||
std::vector<float> testVecA{ 6, 7, 8 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA =
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA =
|
||||
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
|
||||
{
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
|
@ -15,7 +15,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
|
|||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorA->vector(), testVecA);
|
||||
}
|
||||
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
|
|
@ -29,11 +29,11 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
EXPECT_EQ(tensorA->vector(), testVecA);
|
||||
EXPECT_EQ(tensorB->vector(), testVecB);
|
||||
|
||||
tensorA->destroy();
|
||||
tensorB->destroy();
|
||||
|
|
@ -49,7 +49,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
|
|||
kp::Manager mgr;
|
||||
|
||||
try {
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
|
||||
} catch (const std::runtime_error& err) {
|
||||
// check exception
|
||||
ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
|
|||
std::vector<float> testVecPreA{ 0, 0, 0 };
|
||||
std::vector<float> testVecPostA{ 9, 8, 7 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecPreA);
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecPreA);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
|
|
@ -21,7 +21,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
|
|||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecPostA);
|
||||
EXPECT_EQ(tensorA->vector(), testVecPostA);
|
||||
}
|
||||
|
||||
TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
|
||||
|
|
@ -31,9 +31,9 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
|
|||
|
||||
std::vector<float> testVec{ 9, 8, 7 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> tensorC = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorC = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -47,7 +47,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
|
|||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVec);
|
||||
EXPECT_EQ(tensorB->data(), testVec);
|
||||
EXPECT_EQ(tensorC->data(), testVec);
|
||||
EXPECT_EQ(tensorA->vector(), testVec);
|
||||
EXPECT_EQ(tensorB->vector(), testVec);
|
||||
EXPECT_EQ(tensorC->vector(), testVec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,14 +22,14 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
|
|||
pa[2] += pcs.z;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 });
|
||||
|
|
@ -42,7 +42,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
|
|||
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
||||
EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
|
||||
EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -65,14 +65,14 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
|
|||
pa[2] += pcs.z;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 });
|
||||
|
|
@ -85,7 +85,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
|
|||
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
||||
EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
|
||||
EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -108,14 +108,14 @@ TEST(TestPushConstants, TestConstantsWrongSize)
|
|||
pa[2] += pcs.z;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
|
||||
|
|
|
|||
|
|
@ -60,13 +60,13 @@ TEST(TestSequence, RerecordSequence)
|
|||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({1, 2, 3});
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({2, 2, 2});
|
||||
std::shared_ptr<kp::Tensor> tensorOut = mgr.tensor({0, 0, 0});
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({1, 2, 3});
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({2, 2, 2});
|
||||
std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensor({0, 0, 0});
|
||||
|
||||
sq->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB, tensorOut });
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(R"(
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(R"(
|
||||
#version 450
|
||||
|
||||
layout (local_size_x = 1) in;
|
||||
|
|
@ -90,7 +90,7 @@ TEST(TestSequence, RerecordSequence)
|
|||
|
||||
sq->eval();
|
||||
|
||||
EXPECT_EQ(tensorOut->data(), std::vector<float>({2, 4, 6}));
|
||||
EXPECT_EQ(tensorOut->vector(), std::vector<float>({2, 4, 6}));
|
||||
|
||||
algo->rebuild({tensorOut, tensorA, tensorB}, spirv);
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ TEST(TestSequence, RerecordSequence)
|
|||
sq->rerecord();
|
||||
sq->eval();
|
||||
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({2, 8, 18}));
|
||||
EXPECT_EQ(tensorB->vector(), std::vector<float>({2, 8, 18}));
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -117,7 +117,7 @@ TEST(TestSequence, SequenceTimestamps)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
auto seq = mgr.sequence(0, 100); //100 timestamps
|
||||
seq->record<kp::OpTensorSyncDevice>({ tensorA })
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ static const std::string shaderString = (R"(
|
|||
)");
|
||||
|
||||
void compileShaderWithGivenResources(const std::string shaderString, const TBuiltInResource resources) {
|
||||
kp::Shader::compile_source(shaderString, std::string("main"), std::vector<std::pair<std::string,std::string>>({}), resources);
|
||||
kp::Shader::compileSource(shaderString, std::string("main"), std::vector<std::pair<std::string,std::string>>({}), resources);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -18,15 +18,15 @@ TEST(TestSpecializationConstants, TestTwoConstants)
|
|||
pb[index] = cTwo;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
|
||||
tensorB };
|
||||
|
|
@ -42,8 +42,8 @@ TEST(TestSpecializationConstants, TestTwoConstants)
|
|||
->record<kp::OpTensorSyncLocal>(params)
|
||||
->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 5, 5, 5 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0.3, 0.3, 0.3 }));
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 5, 5, 5 }));
|
||||
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 0.3, 0.3, 0.3 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ TEST(TestTensor, ConstructorData)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
std::vector<float> vec{ 0, 1, 2 };
|
||||
std::shared_ptr<kp::Tensor> tensor = mgr.tensor(vec);
|
||||
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor(vec);
|
||||
EXPECT_EQ(tensor->size(), vec.size());
|
||||
EXPECT_EQ(tensor->data(), vec);
|
||||
EXPECT_EQ(tensor->vector(), vec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@
|
|||
|
||||
TEST(TestWorkgroup, TestSimpleWorkgroup)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
std::shared_ptr<kp::Tensor> tensorB = nullptr;
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = nullptr;
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
|
|
@ -39,29 +39,29 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
|
|||
sq->record<kp::OpAlgoDispatch>(algorithm);
|
||||
sq->record<kp::OpTensorSyncLocal>(params);
|
||||
sq->eval();
|
||||
|
||||
std::vector<float> expectedA = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15
|
||||
};
|
||||
|
||||
std::vector<float> expectedB = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
|
||||
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
|
||||
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
};
|
||||
|
||||
EXPECT_EQ(tensorA->vector(), expectedA);
|
||||
EXPECT_EQ(tensorB->vector(), expectedB);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> expectedA = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15
|
||||
};
|
||||
|
||||
std::vector<float> expectedB = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
|
||||
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
|
||||
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
};
|
||||
|
||||
EXPECT_EQ(tensorA->data(), expectedA);
|
||||
EXPECT_EQ(tensorB->data(), expectedB);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue