diff --git a/README.md b/README.md index 41596cb00..7a7375a6a 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,13 @@ void kompute(const std::string& shader) { kp::Manager mgr; // 2. Create and initialise Kompute Tensors through manager + + // Default tensor constructor simplifies creation of float values auto tensorInA = mgr.tensor({ 2., 2., 2. }); auto tensorInB = mgr.tensor({ 1., 2., 3. }); - auto tensorOutA = mgr.tensor({ 0., 0., 0. }); - auto tensorOutB = mgr.tensor({ 0., 0., 0. }); + // Explicit type constructor supports uint32, int32, double, float and bool + auto tensorOutA = mgr.tensorT({ 0, 0, 0 }); + auto tensorOutB = mgr.tensorT({ 0, 0, 0 }); std::vector> params = {tensorInA, tensorInB, tensorOutA, tensorOutB}; @@ -109,8 +112,8 @@ int main() { // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; - layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; }; - layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; }; + layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; + layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { @@ -122,8 +125,8 @@ int main() { void main() { uint index = gl_GlobalInvocationID.x; - out_a[index] += in_a[index] * in_b[index]; - out_b[index] += const_one * push_const.val; + out_a[index] += uint( in_a[index] * in_b[index] ); + out_b[index] += uint( const_one * push_const.val ); } )"); @@ -144,10 +147,13 @@ def kompute(shader): mgr = kp.Manager() # 2. Create and initialise Kompute Tensors through manager + + # Default tensor constructor simplifies creation of float values tensor_in_a = mgr.tensor([2, 2, 2]) tensor_in_b = mgr.tensor([1, 2, 3]) - tensor_out_a = mgr.tensor([0, 0, 0]) - tensor_out_b = mgr.tensor([0, 0, 0]) + # Explicit type constructor supports uint32, int32, double, float and bool + tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32)) + tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32)) params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b] @@ -194,8 +200,8 @@ if __name__ == "__main__": // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; - layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; }; - layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; }; + layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; + layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { @@ -207,8 +213,8 @@ if __name__ == "__main__": void main() { uint index = gl_GlobalInvocationID.x; - out_a[index] += in_a[index] * in_b[index]; - out_b[index] += const_one * push_const.val; + out_a[index] += uint( in_a[index] * in_b[index] ); + out_b[index] += uint( const_one * push_const.val ); } """ diff --git a/examples/array_multiplication/CMakeLists.txt b/examples/array_multiplication/CMakeLists.txt index 0b648382e..bfc4c1c79 100644 --- a/examples/array_multiplication/CMakeLists.txt +++ b/examples/array_multiplication/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.17.0) +cmake_minimum_required(VERSION 3.4.1) project(kompute_array_mult VERSION 0.1.0) set(CMAKE_CXX_STANDARD 14) @@ -23,10 +23,6 @@ endif() find_package(Vulkan REQUIRED) -if(KOMPUTE_OPT_ENABLE_SPDLOG) - find_package(spdlog REQUIRED) -endif() - add_executable(kompute_array_mult src/Main.cpp) diff --git a/examples/array_multiplication/README.md b/examples/array_multiplication/README.md index 931c7d639..d4082c713 100644 --- a/examples/array_multiplication/README.md +++ b/examples/array_multiplication/README.md @@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to To build you just need to run the cmake command in this folder as follows: ``` -cmake \ - -Bbuild +cmake -Bbuild/ \ + -DCMAKE_BUILD_TYPE=Debug \ + -DKOMPUTE_OPT_INSTALL=0 \ + -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \ + -DKOMPUTE_OPT_ENABLE_SPDLOG=1 ``` You can pass the following optional parameters based on your desired configuration: diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index fd823bca8..95e0781ad 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -39,16 +39,17 @@ int main() std::vector> params = { tensorInA, tensorInB, tensorOut }; - std::shared_ptr algo = mgr.algorithm(params, kp::Shader::compile_source(shader)); + std::shared_ptr algo = mgr.algorithm(params, kp::Shader::compileSource(shader)); mgr.sequence() ->record(params) ->record(algo) - ->record(params); + ->record(params) + ->eval(); // prints "Output { 0 4 12 }" std::cout<< "Output: { "; - for (const float& elem : tensorOut->data()) { + for (const float& elem : tensorOut->vector()) { std::cout << elem << " "; } std::cout << "}" << std::endl; diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp index f50c56d5c..e901ef816 100644 --- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp +++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp @@ -54,7 +54,7 @@ void KomputeSummatorNode::_init() { std::shared_ptr algo = mgr.algorithm( { this->mPrimaryTensor, this->mSecondaryTensor }, - kp::Shader::compile_source(shader)); + kp::Shader::compileSource(shader)); // First we ensure secondary tensor loads to GPU diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp index ece095c8e..99aabb338 100644 --- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp +++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp @@ -58,7 +58,7 @@ void KomputeSummator::_init() { // Then we run the operation with both tensors this->mSequence->record( { this->mPrimaryTensor, this->mSecondaryTensor }, - kp::Shader::compile_source(shader)); + kp::Shader::compileSource(shader)); // We map the result back to local this->mSequence->record( diff --git a/examples/logistic_regression/CMakeLists.txt b/examples/logistic_regression/CMakeLists.txt index f918bbf21..8c8e0eb8f 100644 --- a/examples/logistic_regression/CMakeLists.txt +++ b/examples/logistic_regression/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.17.0) +cmake_minimum_required(VERSION 3.4.1) project(kompute_linear_reg VERSION 0.1.0) set(CMAKE_CXX_STANDARD 14) @@ -23,10 +23,6 @@ endif() find_package(Vulkan REQUIRED) -if(KOMPUTE_OPT_ENABLE_SPDLOG) - find_package(spdlog REQUIRED) -endif() - add_executable(kompute_linear_reg src/Main.cpp) @@ -39,7 +35,7 @@ include_directories( ../../single_include/) if(KOMPUTE_OPT_ENABLE_SPDLOG) - target_link_libraries(kompute_array_mult + target_link_libraries(kompute_linear_reg spdlog::spdlog) endif() diff --git a/examples/logistic_regression/README.md b/examples/logistic_regression/README.md index 0de7ee30a..342bbfca1 100644 --- a/examples/logistic_regression/README.md +++ b/examples/logistic_regression/README.md @@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to To build you just need to run the cmake command in this folder as follows: ``` -cmake \ - -Bbuild +cmake -Bbuild/ \ + -DCMAKE_BUILD_TYPE=Debug \ + -DKOMPUTE_OPT_INSTALL=0 \ + -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \ + -DKOMPUTE_OPT_ENABLE_SPDLOG=1 ``` You can pass the following optional parameters based on your desired configuration: diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index c435575e2..c7cc827ba 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -17,19 +17,19 @@ int main() kp::Manager mgr; - std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); - std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); + auto xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + auto xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); + auto y = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); - std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto wIn = mgr.tensor({ 0.001, 0.001 }); + auto wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr bIn = mgr.tensor({ 0 }); - std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto bIn = mgr.tensor({ 0 }); + auto bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, @@ -40,7 +40,8 @@ int main() (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); - std::shared_ptr algo = mgr.algorithm(params, spirv); + std::shared_ptr algo = mgr.algorithm( + params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); mgr.sequence()->eval(params); diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index bf98e6581..d4593edb8 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -247,10 +247,16 @@ static const char *__doc_kp_Manager_sequence = R"doc(Create a managed sequence that will be destroyed by this manager if it hasn't been destroyed by its reference count going to zero. -@param queueIndex The queue to use from the available queues @returns -Shared pointer with initialised sequence)doc"; +@param queueIndex The queue to use from the available queues @param +nrOfTimestamps The maximum number of timestamps to allocate. If zero +(default), disables latching of timestamps. @returns Shared pointer +with initialised sequence)doc"; -static const char *__doc_kp_Manager_tensor = +static const char *__doc_kp_Manager_tensor = R"doc()doc"; + +static const char *__doc_kp_Manager_tensor_2 = R"doc()doc"; + +static const char *__doc_kp_Manager_tensorT = R"doc(Create a managed tensor that will be destroyed by this manager if it hasn't been destroyed by its reference count going to zero. @@ -264,18 +270,26 @@ of algorithm and parameter components which can be used with shaders. By default it enables the user to provide a dynamic number of tensors which are then passed as inputs.)doc"; -static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = R"doc()doc"; +static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = +R"doc(Constructor that stores the algorithm to use as well as the relevant +push constants to override when recording. + +@param algorithm The algorithm object to use for dispatch @param +pushConstants The push constants to use for override)doc"; static const char *__doc_kp_OpAlgoDispatch_mAlgorithm = R"doc()doc"; static const char *__doc_kp_OpAlgoDispatch_mPushConstants = R"doc()doc"; static const char *__doc_kp_OpAlgoDispatch_postEval = -R"doc(Executes after the recorded commands are submitted, and performs a -copy of the GPU Device memory into the staging buffer so the output -data can be retrieved.)doc"; +R"doc(Does not perform any postEval commands. -static const char *__doc_kp_OpAlgoDispatch_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpAlgoDispatch_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpAlgoDispatch_record = R"doc(This records the commands that are to be sent to the GPU. This @@ -283,7 +297,9 @@ includes the barriers that ensure the memory has been copied before going in and out of the shader, as well as the dispatch operation that sends the shader processing to the gpu. This function also records the GPU memory copy of the output data for the staging buffer so it can be -read by the host.)doc"; +read by the host. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase = R"doc(Base Operation which provides the high level interface that Kompute @@ -299,7 +315,9 @@ the commands to the GPU for processing, and can be used to perform any tear-down steps required as the computation iteration finishes. It's worth noting that there are situations where eval can be called multiple times, so the resources that are destroyed should not require -a re-init unless explicitly provided by the user.)doc"; +a re-init unless explicitly provided by the user. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase_preEval = R"doc(Pre eval is called before the Sequence has called eval and submitted @@ -307,12 +325,16 @@ the commands to the GPU for processing, and can be used to perform any per-eval setup steps required as the computation iteration begins. It's worth noting that there are situations where eval can be called multiple times, so the resources that are created should be idempotent -in case it's called multiple times in a row.)doc"; +in case it's called multiple times in a row. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase_record = R"doc(The record function is intended to only send a record command or run commands that are expected to record operations that are to be -submitted as a batch into the GPU.)doc"; +submitted as a batch into the GPU. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpMult = R"doc(Operation that performs multiplication on two tensors and outpus on @@ -323,12 +345,9 @@ R"doc(Default constructor with parameters that provides the bare minimum requirements for the operations to be able to create and manage their sub-components. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that are to be used in this operation @param -komputeWorkgroup Optional parameter to specify the layout for -processing)doc"; +algorithm An algorithm that will be overridden with the OpMult shader +data and the tensors provided which are expected to be 3)doc"; static const char *__doc_kp_OpTensorCopy = R"doc(Operation that copies the data from the first tensor to the rest of @@ -340,84 +359,95 @@ static const char *__doc_kp_OpTensorCopy_OpTensorCopy = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorCopy_mTensors = R"doc()doc"; static const char *__doc_kp_OpTensorCopy_postEval = R"doc(Copies the local vectors for all the tensors to sync the data with the -gpu.)doc"; +gpu. -static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorCopy_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorCopy_record = R"doc(Records the copy commands from the first tensor into all the other -tensors provided. Also optionally records a barrier.)doc"; +tensors provided. Also optionally records a barrier. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice = R"doc(Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For -TensorTypes::eStaging it will only map the data into host memory which +TensorTypes::eHost it will only map the data into host memory which will happen during preEval before the recorded commands are -dispatched. This operation won't have any effect on -TensorTypes::eStaging.)doc"; +dispatched.)doc"; static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorSyncDevice_mTensors = R"doc()doc"; -static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc"; +static const char *__doc_kp_OpTensorSyncDevice_postEval = +R"doc(Does not perform any postEval commands. -static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice_record = R"doc(For device tensors, it records the copy command for the tensor to copy -the data from its staging to device memory.)doc"; +the data from its staging to device memory. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal = R"doc(Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For -TensorTypes::eStaging it will only map the data into host memory which +TensorTypes::eHost it will only map the data into host memory which will happen during preEval before the recorded commands are -dispatched. This operation won't have any effect on -TensorTypes::eStaging.)doc"; +dispatched.)doc"; static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorSyncLocal_mTensors = R"doc()doc"; static const char *__doc_kp_OpTensorSyncLocal_postEval = R"doc(For host tensors it performs the map command from the host memory into -local memory.)doc"; +local memory. -static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal_record = R"doc(For device tensors, it records the copy command for the tensor to copy -the data from its device to staging memory.)doc"; +the data from its device to staging memory. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc"; @@ -427,7 +457,8 @@ generate all dependent resources. @param physicalDevice Vulkan physical device @param device Vulkan logical device @param computeQueue Vulkan compute queue @param -queueIndex Vulkan compute queue index in device)doc"; +queueIndex Vulkan compute queue index in device @param totalTimestamps +Maximum number of timestamps to allocate)doc"; static const char *__doc_kp_Sequence_begin = R"doc(Begins recording commands for commands to be submitted into the @@ -443,6 +474,8 @@ static const char *__doc_kp_Sequence_createCommandBuffer = R"doc()doc"; static const char *__doc_kp_Sequence_createCommandPool = R"doc()doc"; +static const char *__doc_kp_Sequence_createTimestampQueryPool = R"doc()doc"; + static const char *__doc_kp_Sequence_destroy = R"doc(Destroys and frees the GPU resources which include the buffer and memory and sets the sequence as init=False.)doc"; @@ -528,6 +561,10 @@ finishes, it runs the postEval of all operations. @param waitFor Number of milliseconds to wait before timing out. @return shared_ptr of the Sequence class itself)doc"; +static const char *__doc_kp_Sequence_getTimestamps = +R"doc(Return the timestamps that were latched at the beginning and after +each operation during the last eval() call.)doc"; + static const char *__doc_kp_Sequence_isInit = R"doc(Returns true if the sequence has been initialised, and it's based on the GPU resources being refrenced. @@ -607,9 +644,11 @@ R"doc(Clears command buffer and triggers re-record of all the current operations saved, which is useful if the underlying kp::Tensors or kp::Algorithms are modified and need to be re-recorded.)doc"; +static const char *__doc_kp_Sequence_timestampQueryPool = R"doc()doc"; + static const char *__doc_kp_Shader = R"doc(Shader utily class with functions to compile and process glsl files.)doc"; -static const char *__doc_kp_Shader_compile_source = +static const char *__doc_kp_Shader_compileSource = R"doc(Compile a single glslang source from string value. Currently this function uses the glslang C++ interface which is not thread safe so this funciton should not be called from multiple threads concurrently. @@ -622,7 +661,7 @@ List of pairs containing key value definitions @param resourcesLimit A list that contains the resource limits for the GLSL compiler @return The compiled SPIR-V binary in unsigned int32 format)doc"; -static const char *__doc_kp_Shader_compile_sources = +static const char *__doc_kp_Shader_compileSources = R"doc(Compile multiple sources with optional filenames. Currently this function uses the glslang C++ interface which is not thread safe so this funciton should not be called from multiple threads concurrently. @@ -644,14 +683,42 @@ across GPUs. Each tensor would have a respective Vulkan memory and buffer, which would be used to store their respective data. The tensors can be used for GPU data storage or transfer.)doc"; -static const char *__doc_kp_Tensor_Tensor = -R"doc(Default constructor with data provided which would be used to create -the respective vulkan buffer and memory. +static const char *__doc_kp_TensorT = R"doc()doc"; +static const char *__doc_kp_TensorT_TensorT = R"doc()doc"; + +static const char *__doc_kp_TensorT_data = R"doc()doc"; + +static const char *__doc_kp_TensorT_dataType = R"doc()doc"; + +static const char *__doc_kp_TensorT_operator_array = R"doc()doc"; + +static const char *__doc_kp_TensorT_setData = R"doc()doc"; + +static const char *__doc_kp_TensorT_vector = R"doc()doc"; + +static const char *__doc_kp_Tensor_Tensor = +R"doc(Constructor with data provided which would be used to create the +respective vulkan buffer and memory. + +@param physicalDevice The physical device to use to fetch properties +@param device The device to use to create the buffer and memory from @param data Non-zero-sized vector of data that will be used by the -tensor @param tensorType Type for the tensor which is of type +tensor @param tensorTypes Type for the tensor which is of type TensorTypes)doc"; +static const char *__doc_kp_Tensor_TensorDataTypes = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eBool = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eDouble = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eFloat = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eInt = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eUnsignedInt = R"doc()doc"; + static const char *__doc_kp_Tensor_TensorTypes = R"doc(Type for tensors created: Device allows memory to be transferred from staging buffers. Staging are host memory visible. Storage are device @@ -677,13 +744,14 @@ without exposing it. static const char *__doc_kp_Tensor_createBuffer = R"doc()doc"; -static const char *__doc_kp_Tensor_data = -R"doc(Returns the vector of data currently contained by the Tensor. It is -important to ensure that there is no out-of-sync data with the GPU -memory. +static const char *__doc_kp_Tensor_data = R"doc()doc"; -@return Reference to vector of elements representing the data in the -tensor.)doc"; +static const char *__doc_kp_Tensor_dataType = +R"doc(Retrieve the underlying data type of the Tensor + +@return Data type of tensor of type kp::Tensor::TensorDataTypes)doc"; + +static const char *__doc_kp_Tensor_dataTypeMemorySize = R"doc()doc"; static const char *__doc_kp_Tensor_destroy = R"doc(Destroys and frees the GPU resources which include the buffer and @@ -697,9 +765,15 @@ static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc"; static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc"; -static const char *__doc_kp_Tensor_isInit = R"doc()doc"; +static const char *__doc_kp_Tensor_isInit = +R"doc(Check whether tensor is initialized based on the created gpu +resources. -static const char *__doc_kp_Tensor_mData = R"doc()doc"; +@returns Boolean stating whether tensor is initialized)doc"; + +static const char *__doc_kp_Tensor_mDataType = R"doc()doc"; + +static const char *__doc_kp_Tensor_mDataTypeMemorySize = R"doc()doc"; static const char *__doc_kp_Tensor_mDevice = R"doc()doc"; @@ -717,36 +791,28 @@ static const char *__doc_kp_Tensor_mPrimaryBuffer = R"doc()doc"; static const char *__doc_kp_Tensor_mPrimaryMemory = R"doc()doc"; +static const char *__doc_kp_Tensor_mRawData = R"doc()doc"; + +static const char *__doc_kp_Tensor_mSize = R"doc()doc"; + static const char *__doc_kp_Tensor_mStagingBuffer = R"doc()doc"; static const char *__doc_kp_Tensor_mStagingMemory = R"doc()doc"; static const char *__doc_kp_Tensor_mTensorType = R"doc()doc"; -static const char *__doc_kp_Tensor_mapDataFromHostMemory = -R"doc(Maps data from the Host Visible GPU memory into the data vector. It -requires the Tensor to be of staging type for it to work.)doc"; - -static const char *__doc_kp_Tensor_mapDataIntoHostMemory = -R"doc(Maps data from the data vector into the Host Visible GPU memory. It -requires the tensor to be of staging type for it to work.)doc"; +static const char *__doc_kp_Tensor_mapRawData = R"doc()doc"; static const char *__doc_kp_Tensor_memorySize = R"doc()doc"; -static const char *__doc_kp_Tensor_operator_array = -R"doc(Overrides the subscript operator to expose the underlying data's -subscript operator which in this case would be its underlying -vector's. - -@param i The index where the element will be returned from. @return -Returns the element in the position requested.)doc"; +static const char *__doc_kp_Tensor_rawData = R"doc()doc"; static const char *__doc_kp_Tensor_rebuild = -R"doc(Initialiser which calls the initialisation for all the respective -tensors as well as creates the respective staging tensors. The staging -tensors would only be created for the tensors of type -TensorType::eDevice as otherwise there is no need to copy from host -memory.)doc"; +R"doc(Function to trigger reinitialisation of the tensor buffer and memory +with new data as well as new potential device type. + +@param data Vector of data to use to initialise vector from @param +tensorType The type to use for the tensor)doc"; static const char *__doc_kp_Tensor_recordBufferMemoryBarrier = R"doc(Records the buffer memory barrier into the command buffer which @@ -788,7 +854,7 @@ would only be relevant for kp::Tensors of type eDevice. @param createBarrier Whether to create a barrier that ensures the data is copied before further operations. Default is true.)doc"; -static const char *__doc_kp_Tensor_setData = +static const char *__doc_kp_Tensor_setRawData = R"doc(Sets / resets the vector data of the tensor. This function does not perform any copies into GPU memory and is only performed on the host.)doc"; @@ -803,6 +869,10 @@ R"doc(Retrieve the tensor type of the Tensor @return Tensor type of tensor)doc"; +static const char *__doc_kp_Tensor_unmapRawData = R"doc()doc"; + +static const char *__doc_kp_Tensor_vector = R"doc()doc"; + #if defined(__GNUG__) #pragma GCC diagnostic pop #endif diff --git a/python/src/main.cpp b/python/src/main.cpp index 7165d41e7..9e065c213 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -26,9 +26,9 @@ PYBIND11_MODULE(kp, m) { py::module_ np = py::module_::import("numpy"); py::enum_(m, "TensorTypes") - .value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.") - .value("host", kp::Tensor::TensorTypes::eHost, "Tensor used for CPU visible GPU data.") - .value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.") + .value("device", kp::Tensor::TensorTypes::eDevice, DOC(kp, Tensor, TensorTypes, eDevice)) + .value("host", kp::Tensor::TensorTypes::eHost, DOC(kp, Tensor, TensorTypes, eHost)) + .value("storage", kp::Tensor::TensorTypes::eStorage, DOC(kp, Tensor, TensorTypes, eStorage)) .export_values(); #if !defined(KOMPUTE_DISABLE_SHADER_UTILS) || !KOMPUTE_DISABLE_SHADER_UTILS @@ -37,119 +37,168 @@ PYBIND11_MODULE(kp, m) { const std::string& source, const std::string& entryPoint, const std::vector>& definitions) { - std::vector spirv = kp::Shader::compile_source(source, entryPoint, definitions); + std::vector spirv = kp::Shader::compileSource(source, entryPoint, definitions); return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t)); }, - "Compiles string source provided and returns the value in bytes", - py::arg("source"), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector>() ) + DOC(kp, Shader, compileSource), + py::arg("source"), + py::arg("entryPoint") = "main", + py::arg("definitions") = std::vector>() ) .def_static("compile_sources", []( const std::vector& source, const std::vector& files, const std::string& entryPoint, const std::vector>& definitions) { - std::vector spirv = kp::Shader::compile_sources(source, files, entryPoint, definitions); + std::vector spirv = kp::Shader::compileSources(source, files, entryPoint, definitions); return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t)); }, - "Compiles sources provided with file names and returns the value in bytes", - py::arg("sources"), py::arg("files") = std::vector(), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector>() ); + DOC(kp, Shader, compileSources), + py::arg("sources"), + py::arg("files") = std::vector(), + py::arg("entryPoint") = "main", + py::arg("definitions") = std::vector>() ); #endif // KOMPUTE_DISABLE_SHADER_UTILS - py::class_>(m, "OpBase"); + py::class_>(m, "OpBase", DOC(kp, OpBase)); - py::class_>(m, "OpTensorSyncDevice", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorSyncDevice", py::base(), DOC(kp, OpTensorSyncDevice)) + .def(py::init>&>(), DOC(kp, OpTensorSyncDevice, OpTensorSyncDevice)); - py::class_>(m, "OpTensorSyncLocal", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorSyncLocal", py::base(), DOC(kp, OpTensorSyncLocal)) + .def(py::init>&>(), DOC(kp, OpTensorSyncLocal, OpTensorSyncLocal)); - py::class_>(m, "OpTensorCopy", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorCopy", py::base(), DOC(kp, OpTensorCopy)) + .def(py::init>&>(), DOC(kp, OpTensorCopy, OpTensorCopy)); - py::class_>(m, "OpAlgoDispatch", py::base()) + py::class_>( + m, "OpAlgoDispatch", py::base(), DOC(kp, OpAlgoDispatch)) .def(py::init&,const kp::Constants&>(), + DOC(kp, OpAlgoDispatch, OpAlgoDispatch), py::arg("algorithm"), py::arg("push_consts") = kp::Constants()); - py::class_>(m, "OpMult", py::base()) - .def(py::init>&,const std::shared_ptr&>()); + py::class_>( + m, "OpMult", py::base(), DOC(kp, OpMult)) + .def(py::init>&,const std::shared_ptr&>(), + DOC(kp, OpMult, OpMult)); - py::class_>(m, "Algorithm") - .def("get_tensors", &kp::Algorithm::getTensors) - .def("destroy", &kp::Algorithm::destroy) - .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants) - .def("is_init", &kp::Algorithm::isInit); + py::class_>(m, "Algorithm", DOC(kp, Algorithm, Algorithm)) + .def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors)) + .def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy)) + .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants)) + .def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit)); py::class_>(m, "Tensor", DOC(kp, Tensor)) .def("data", [](kp::Tensor& self) { - return py::array(self.data().size(), self.data().data()); - }, "Returns stored data as a new numpy array.") - .def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; }, - "When only an index is necessary") - .def("__setitem__", [](kp::Tensor &self, size_t index, float value) { - self.data()[index] = value; }) - .def("set_data", [np](kp::Tensor &self, const py::array_t data){ - const py::array_t flatdata = np.attr("ravel")(data); - const py::buffer_info info = flatdata.request(); - const float* ptr = (float*) info.ptr; - self.setData(std::vector(ptr, ptr+flatdata.size())); - }, "Overrides the data in the local Tensor memory.") - .def("__iter__", [](kp::Tensor &self) { - return py::make_iterator(self.data().begin(), self.data().end()); - }, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists - "Iterator to enable looping within data structure as required.") - .def("__contains__", [](kp::Tensor &self, float v) { - for (size_t i = 0; i < self.data().size(); ++i) { - if (v == self.data()[i]) { - return true; - } - } - return false; - }) - .def("__reversed__", [](kp::Tensor &self) { - size_t size = self.data().size(); - std::vector reversed(size); - for (size_t i = 0; i < size; i++) { - reversed[size - i - 1] = self.data()[i]; + // Non-owning container exposing the underlying pointer + py::str dummyDataOwner; // Explicitly request data to not be owned by np + switch (self.dataType()) { + case kp::Tensor::TensorDataTypes::eFloat: + return py::array(self.size(), self.data(), dummyDataOwner); + case kp::Tensor::TensorDataTypes::eUnsignedInt: + return py::array(self.size(), self.data(), dummyDataOwner); + case kp::Tensor::TensorDataTypes::eInt: + return py::array(self.size(), self.data(), dummyDataOwner); + case kp::Tensor::TensorDataTypes::eDouble: + return py::array(self.size(), self.data(), dummyDataOwner); + case kp::Tensor::TensorDataTypes::eBool: + return py::array(self.size(), self.data(), dummyDataOwner); + default: + throw std::runtime_error("Kompute Python data type not supported"); } - return reversed; - }) - .def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.") - .def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.") - .def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.") - .def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.") - .def("destroy", &kp::Tensor::destroy, "Destroy tensor GPU resources."); + }, DOC(kp, Tensor, data)) + .def("size", &kp::Tensor::size, DOC(kp, Tensor, size)) + .def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size)) + .def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType)) + .def("data_type", &kp::Tensor::dataType, DOC(kp, Tensor, dataType)) + .def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit)) + .def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy)); py::class_>(m, "Sequence") - .def("record", [](kp::Sequence& self, std::shared_ptr op) { return self.record(op); }) - .def("eval", [](kp::Sequence& self) { return self.eval(); }) - .def("eval", [](kp::Sequence& self, std::shared_ptr op) { return self.eval(op); }) - .def("eval_async", [](kp::Sequence& self) { return self.eval(); }) - .def("eval_async", [](kp::Sequence& self, std::shared_ptr op) { return self.evalAsync(op); }) - .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); }) - .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); }) - .def("is_recording", &kp::Sequence::isRecording) - .def("is_running", &kp::Sequence::isRunning) - .def("is_init", &kp::Sequence::isInit) - .def("get_timestamps", &kp::Sequence::getTimestamps) - .def("clear", &kp::Sequence::clear) - .def("destroy", &kp::Sequence::destroy); + .def("record", [](kp::Sequence& self, std::shared_ptr op) { return self.record(op); }, + DOC(kp, Sequence, record)) + .def("eval", [](kp::Sequence& self) { return self.eval(); }, + DOC(kp, Sequence, eval)) + .def("eval", [](kp::Sequence& self, std::shared_ptr op) { return self.eval(op); }, + DOC(kp, Sequence, eval_2)) + .def("eval_async", [](kp::Sequence& self) { return self.eval(); }, + DOC(kp, Sequence, evalAwait)) + .def("eval_async", [](kp::Sequence& self, std::shared_ptr op) { return self.evalAsync(op); }, + DOC(kp, Sequence, evalAsync)) + .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); }, + DOC(kp, Sequence, evalAwait)) + .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); }, + DOC(kp, Sequence, evalAwait)) + .def("is_recording", &kp::Sequence::isRecording, + DOC(kp, Sequence, isRecording)) + .def("is_running", &kp::Sequence::isRunning, + DOC(kp, Sequence, isRunning)) + .def("is_init", &kp::Sequence::isInit, + DOC(kp, Sequence, isInit)) + .def("clear", &kp::Sequence::clear, + DOC(kp, Sequence, clear)) + .def("rerecord", &kp::Sequence::rerecord, + DOC(kp, Sequence, rerecord)) + .def("get_timestamps", &kp::Sequence::getTimestamps, + DOC(kp, Sequence, getTimestamps)) + .def("destroy", &kp::Sequence::destroy, + DOC(kp, Sequence, destroy)); - py::class_>(m, "Manager") - .def(py::init()) - .def(py::init()) + py::class_>(m, "Manager", DOC(kp, Manager)) + .def(py::init(), DOC(kp, Manager, Manager)) + .def(py::init(), DOC(kp, Manager, Manager_2)) .def(py::init&,const std::vector&>(), + DOC(kp, Manager, Manager_2), py::arg("device") = 0, py::arg("family_queue_indices") = std::vector(), py::arg("desired_extensions") = std::vector()) - .def("sequence", &kp::Manager::sequence, py::arg("queue_index") = 0, py::arg("total_timestamps") = 0) + .def("sequence", &kp::Manager::sequence, DOC(kp, Manager, sequence), + py::arg("queue_index") = 0, py::arg("total_timestamps") = 0) .def("tensor", [np](kp::Manager& self, - const py::array_t data, + const py::array_t& data, kp::Tensor::TensorTypes tensor_type) { - const py::array_t flatdata = np.attr("ravel")(data); + const py::array_t& flatdata = np.attr("ravel")(data); const py::buffer_info info = flatdata.request(); - const float* ptr = (float*) info.ptr; - return self.tensor(std::vector(ptr, ptr+flatdata.size()), tensor_type); + KP_LOG_DEBUG("Kompute Python Manager tensor() creating tensor float with data size {}", flatdata.size()); + return self.tensor( + info.ptr, + flatdata.size(), + sizeof(float), + kp::Tensor::TensorDataTypes::eFloat, + tensor_type); }, - "Tensor initialisation function with data and tensor type", + DOC(kp, Manager, tensor), + py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) + .def("tensor_t", [np](kp::Manager& self, + const py::array& data, + kp::Tensor::TensorTypes tensor_type) { + // TODO: Suppport strides in numpy format + const py::array& flatdata = np.attr("ravel")(data); + const py::buffer_info info = flatdata.request(); + KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with data size {} dtype {}", + flatdata.size(), std::string(py::str(flatdata.dtype()))); + if (flatdata.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type); + } else if (flatdata.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, flatdata.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type); + } else if (flatdata.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, flatdata.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type); + } else if (flatdata.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, flatdata.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type); + } else if (flatdata.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, flatdata.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type); + } else { + throw std::runtime_error("Kompute Python no valid dtype supported"); + } + }, + DOC(kp, Manager, tensorT), py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) .def("algorithm", [](kp::Manager& self, const std::vector>& tensors, @@ -163,8 +212,12 @@ PYBIND11_MODULE(kp, m) { std::vector spirvVec((uint32_t*)data, (uint32_t*)(data + length)); return self.algorithm(tensors, spirvVec, workgroup, spec_consts, push_consts); }, - "Algorithm initialisation function", - py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), py::arg("spec_consts") = kp::Constants(), py::arg("push_consts") = kp::Constants()); + DOC(kp, Manager, algorithm), + py::arg("tensors"), + py::arg("spirv"), + py::arg("workgroup") = kp::Workgroup(), + py::arg("spec_consts") = kp::Constants(), + py::arg("push_consts") = kp::Constants()); #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; diff --git a/python/test/test_array_multiplication.py b/python/test/test_array_multiplication.py index 0dab581c6..e8de68328 100644 --- a/python/test/test_array_multiplication.py +++ b/python/test/test_array_multiplication.py @@ -9,9 +9,9 @@ def test_array_multiplication(): mgr = kp.Manager() # 2. Create Kompute Tensors to hold data - tensor_in_a = mgr.tensor([2, 2, 2]) - tensor_in_b = mgr.tensor([1, 2, 3]) - tensor_out = mgr.tensor([0, 0, 0]) + tensor_in_a = mgr.tensor(np.array([2, 2, 2])) + tensor_in_b = mgr.tensor(np.array([1, 2, 3])) + tensor_out = mgr.tensor(np.array([0, 0, 0])) params = [tensor_in_a, tensor_in_b, tensor_out] diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 47887930a..e1bcee940 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -9,35 +9,15 @@ DIRNAME = os.path.dirname(os.path.abspath(__file__)) kp_log = logging.getLogger("kp") -# TODO: Add example with file -#def test_opalgobase_file(): -# """ -# Test basic OpMult operation -# """ -# -# tensor_in_a = kp.Tensor([2, 2, 2]) -# tensor_in_b = kp.Tensor([1, 2, 3]) -# tensor_out = kp.Tensor([0, 0, 0]) -# -# mgr = kp.Manager() -# mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) -# -# shader_path = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv") -# -# mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path) -# -# mgr.eval_tensor_sync_local_def([tensor_out]) -# -# assert tensor_out.data() == [2.0, 4.0, 6.0] - def test_end_to_end(): mgr = kp.Manager() tensor_in_a = mgr.tensor([2, 2, 2]) tensor_in_b = mgr.tensor([1, 2, 3]) - tensor_out_a = mgr.tensor([0, 0, 0]) - tensor_out_b = mgr.tensor([0, 0, 0]) + # Explicit type constructor supports int, in32, double, float and int + tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32)) + tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32)) params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b] @@ -49,8 +29,8 @@ def test_end_to_end(): // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; - layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; }; - layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; }; + layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; + layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { @@ -62,8 +42,8 @@ def test_end_to_end(): void main() { uint index = gl_GlobalInvocationID.x; - out_a[index] += in_a[index] * in_b[index]; - out_b[index] += const_one * push_const.val; + out_a[index] += uint( in_a[index] * in_b[index] ); + out_b[index] += uint( const_one * push_const.val ); } """ diff --git a/python/test/test_logistic_regression.py b/python/test/test_logistic_regression.py index 4bd0c28fa..862758413 100644 --- a/python/test/test_logistic_regression.py +++ b/python/test/test_logistic_regression.py @@ -1,4 +1,5 @@ import pyshader as ps +import numpy as np import kp def test_logistic_regression(): @@ -46,21 +47,21 @@ def test_logistic_regression(): mgr = kp.Manager(0) # First we create input and ouput tensors for shader - tensor_x_i = mgr.tensor([0.0, 1.0, 1.0, 1.0, 1.0]) - tensor_x_j = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + tensor_x_i = mgr.tensor(np.array([0.0, 1.0, 1.0, 1.0, 1.0])) + tensor_x_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0])) - tensor_y = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + tensor_y = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0])) - tensor_w_in = mgr.tensor([0.001, 0.001]) - tensor_w_out_i = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) - tensor_w_out_j = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + tensor_w_in = mgr.tensor(np.array([0.001, 0.001])) + tensor_w_out_i = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0])) + tensor_w_out_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0])) - tensor_b_in = mgr.tensor([0.0]) - tensor_b_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + tensor_b_in = mgr.tensor(np.array([0.0])) + tensor_b_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0])) - tensor_l_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + tensor_l_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0])) - tensor_m = mgr.tensor([ tensor_y.size() ]) + tensor_m = mgr.tensor(np.array([ tensor_y.size() ])) # We store them in an array for easier interaction params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, @@ -91,9 +92,9 @@ def test_logistic_regression(): # Calculate the parameters based on the respective derivatives calculated for j_iter in range(tensor_b_out.size()): - tensor_w_in[0] -= learning_rate * tensor_w_out_i.data()[j_iter] - tensor_w_in[1] -= learning_rate * tensor_w_out_j.data()[j_iter] - tensor_b_in[0] -= learning_rate * tensor_b_out.data()[j_iter] + tensor_w_in.data()[0] -= learning_rate * tensor_w_out_i.data()[j_iter] + tensor_w_in.data()[1] -= learning_rate * tensor_w_out_j.data()[j_iter] + tensor_b_in.data()[0] -= learning_rate * tensor_b_out.data()[j_iter] assert tensor_w_in.data()[0] < 0.01 assert tensor_w_in.data()[0] > 0.0 diff --git a/python/test/test_tensor_types.py b/python/test/test_tensor_types.py new file mode 100644 index 000000000..b1d90fe03 --- /dev/null +++ b/python/test/test_tensor_types.py @@ -0,0 +1,206 @@ +import pyshader as ps +import os +import pytest +import kp +import numpy as np + + +def test_type_float(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];}; + layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];}; + layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];}; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123., 153., 231.], dtype=np.float32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.float32) + arr_out = np.array([0, 0, 0], dtype=np.float32) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor(arr_in_a) + tensor_in_b = mgr.tensor(arr_in_b) + tensor_out = mgr.tensor(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + assert np.all(tensor_out.data() == arr_in_a * arr_in_b) + + +def test_type_float_double_incorrect(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];}; + layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];}; + layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];}; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123., 153., 231.], dtype=np.float32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32) + arr_out = np.array([0, 0, 0], dtype=np.float32) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor_t(arr_in_a) + tensor_in_b = mgr.tensor_t(arr_in_b) + tensor_out = mgr.tensor_t(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + assert np.all(tensor_out.data() != arr_in_a * arr_in_b) + +@pytest.mark.skipif("swiftshader" in os.environ.get("VK_ICD_FILENAMES"), + reason="Swiftshader doesn't support double") +def test_type_double(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs { double valuesLhs[]; }; + layout(set = 0, binding = 1) buffer tensorRhs { double valuesRhs[]; }; + layout(set = 0, binding = 2) buffer tensorOutput { double valuesOutput[]; }; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123., 153., 231.], dtype=np.float64) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.float64) + arr_out = np.array([0, 0, 0], dtype=np.float64) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor_t(arr_in_a) + tensor_in_b = mgr.tensor_t(arr_in_b) + tensor_out = mgr.tensor_t(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + print(f"Dtype value {tensor_out.data().dtype}") + + assert np.all(tensor_out.data() == arr_in_a * arr_in_b) + +def test_type_int(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs { int valuesLhs[]; }; + layout(set = 0, binding = 1) buffer tensorRhs { int valuesRhs[]; }; + layout(set = 0, binding = 2) buffer tensorOutput { int valuesOutput[]; }; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123, 153, 231], dtype=np.int32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.int32) + arr_out = np.array([0, 0, 0], dtype=np.int32) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor_t(arr_in_a) + tensor_in_b = mgr.tensor_t(arr_in_b) + tensor_out = mgr.tensor_t(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + print(f"Dtype value {tensor_out.data().dtype}") + + assert np.all(tensor_out.data() == arr_in_a * arr_in_b) + +def test_type_unsigned_int(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs { uint valuesLhs[]; }; + layout(set = 0, binding = 1) buffer tensorRhs { uint valuesRhs[]; }; + layout(set = 0, binding = 2) buffer tensorOutput { uint valuesOutput[]; }; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123, 153, 231], dtype=np.uint32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32) + arr_out = np.array([0, 0, 0], dtype=np.uint32) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor_t(arr_in_a) + tensor_in_b = mgr.tensor_t(arr_in_b) + tensor_out = mgr.tensor_t(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + print(f"Dtype value {tensor_out.data().dtype}") + + assert np.all(tensor_out.data() == arr_in_a * arr_in_b) + diff --git a/setup.py b/setup.py index ee3521064..733c4c185 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ class CMakeBuild(build_ext): else: cmake_args += ['-DKOMPUTE_EXTRA_CXX_FLAGS="-fPIC"'] cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] - build_args += ['--', '-j2'] + build_args += ['--', '-j'] env = os.environ.copy() env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 38213bb6e..9b41e1ead 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -762,7 +762,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_sources( + static std::vector compileSources( const std::vector& sources, const std::vector& files = {}, const std::string& entryPoint = "main", @@ -783,7 +783,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_source( + static std::vector compileSource( const std::string& source, const std::string& entryPoint = "main", std::vector> definitions = {}, @@ -818,6 +818,14 @@ class Tensor eHost = 1, ///< Type is host memory, source and destination eStorage = 2, ///< Type is Device memory (only) }; + enum class TensorDataTypes + { + eBool = 0, + eInt = 1, + eUnsignedInt = 2, + eFloat = 3, + eDouble = 4, + }; /** * Constructor with data provided which would be used to create the @@ -831,14 +839,17 @@ class Tensor */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, - const std::vector& data, + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType, const TensorTypes& tensorType = TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they * have been provided externally. */ - ~Tensor(); + virtual ~Tensor(); /** * Function to trigger reinitialisation of the tensor buffer and memory with @@ -847,8 +858,9 @@ class Tensor * @param data Vector of data to use to initialise vector from * @param tensorType The type to use for the tensor */ - void rebuild(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice); + void rebuild(void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -862,32 +874,6 @@ class Tensor */ bool isInit(); - /** - * Returns the vector of data currently contained by the Tensor. It is - * important to ensure that there is no out-of-sync data with the GPU - * memory. - * - * @return Reference to vector of elements representing the data in the - * tensor. - */ - std::vector& data(); - /** - * Overrides the subscript operator to expose the underlying data's - * subscript operator which in this case would be its underlying - * vector's. - * - * @param i The index where the element will be returned from. - * @return Returns the element in the position requested. - */ - float& operator[](int index); - /** - * Returns the size/magnitude of the Tensor, which will be the total number - * of elements across all dimensions - * - * @return Unsigned integer representing the total number of elements - */ - uint32_t size(); - /** * Retrieve the tensor type of the Tensor * @@ -895,12 +881,6 @@ class Tensor */ TensorTypes tensorType(); - /** - * Sets / resets the vector data of the tensor. This function does not - * perform any copies into GPU memory and is only performed on the host. - */ - void setData(const std::vector& data); - /** * Records a copy from the memory of the tensor provided to the current * thensor. This is intended to pass memory into a processing, to perform @@ -963,18 +943,118 @@ class Tensor * @return Descriptor buffer info with own buffer */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); + /** - * Maps data from the Host Visible GPU memory into the data vector. It - * requires the Tensor to be of staging type for it to work. + * Returns the size/magnitude of the Tensor, which will be the total number + * of elements across all dimensions + * + * @return Unsigned integer representing the total number of elements */ - void mapDataFromHostMemory(); + // TODO: move to cpp + uint32_t size() { + return this->mSize; + } + + // TODO: move to cpp + uint32_t dataTypeMemorySize() { + return this->mDataTypeMemorySize; + } + + // TODO: move to cpp + uint32_t memorySize() { + return this->mSize * this->mDataTypeMemorySize; + } + /** - * Maps data from the data vector into the Host Visible GPU memory. It - * requires the tensor to be of staging type for it to work. + * Retrieve the underlying data type of the Tensor + * + * @return Data type of tensor of type kp::Tensor::TensorDataTypes */ - void mapDataIntoHostMemory(); + TensorDataTypes dataType() { + return this->mDataType; + } + + void* rawData() { + return this->mRawData; + } + + // TODO: move to cpp + template + T* data() { + return (T*)this->mRawData; + } + + template + std::vector vector() { + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; + } + + /** + * Sets / resets the vector data of the tensor. This function does not + * perform any copies into GPU memory and is only performed on the host. + */ + void setRawData(const void* data) + { + // Copy data + memcpy(this->mRawData, data, this->memorySize()); + } + + protected: + // -------------- ALWAYS OWNED RESOURCES + TensorTypes mTensorType; + TensorDataTypes mDataType; + uint32_t mSize; + uint32_t mDataTypeMemorySize; + void* mRawData; private: + void mapRawData() { + + KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + + // Given we request coherent host memory we don't need to invalidate / flush + this->mRawData = this->mDevice->mapMemory( + *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + + vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); + } + + void unmapRawData() { + + KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); + this->mDevice->flushMappedMemoryRanges(1, &mappedRange); + this->mDevice->unmapMemory(*hostVisibleMemory); + } + // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; @@ -989,11 +1069,6 @@ class Tensor std::shared_ptr mStagingMemory; bool mFreeStagingMemory = false; - // -------------- ALWAYS OWNED RESOURCES - std::vector mData; - - TensorTypes mTensorType = TensorTypes::eDevice; - void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); @@ -1012,7 +1087,60 @@ class Tensor vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); - uint64_t memorySize(); + +}; + +// TODO: Limit T to be only float, bool, double, etc +template +class TensorT: public Tensor +{ + + public: + TensorT(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType = TensorTypes::eDevice) + : Tensor(physicalDevice, + device, + (void*)data.data(), + data.size(), + sizeof(T), + this->dataType(), + tensorType) + { + KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size()); + } + + ~TensorT() { + KP_LOG_DEBUG("Kompute TensorT destructor"); + } + + T* data() { + return (T*)this->mRawData; + } + + std::vector vector() { + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; + } + + T& operator[](int index) { + return *(((T*)this->mRawData) + index); + } + + void setData(const std::vector& data) { + + KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size()); + + if (data.size() != this->mSize) { + throw std::runtime_error( + "Kompute TensorT Cannot set data of different sizes"); + } + + Tensor::setRawData(data.data()); + } + + TensorDataTypes dataType(); + }; } // End namespace kp @@ -1873,7 +2001,7 @@ class Manager * If zero (default), disables latching of timestamps. * @returns Shared pointer with initialised sequence */ - std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0); /** * Create a managed tensor that will be destroyed by this manager @@ -1883,9 +2011,46 @@ class Manager * @param tensorType The type of tensor to initialize * @returns Shared pointer with initialised tensor */ - std::shared_ptr tensor( + template + std::shared_ptr> tensorT( + const std::vector& data, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); + + std::shared_ptr> tensor{ new kp::TensorT( + this->mPhysicalDevice, this->mDevice, data, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } + + std::shared_ptr> tensor( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice); + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + return this->tensorT(data, tensorType); + } + + std::shared_ptr tensor( + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const Tensor::TensorDataTypes& dataType, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + std::shared_ptr tensor{ new kp::Tensor( + this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } /** * Create a managed algorithm that will be destroyed by this manager diff --git a/src/Manager.cpp b/src/Manager.cpp index e3bdbb2d9..5d6bf4cd4 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -395,21 +395,6 @@ Manager::createDevice(const std::vector& familyQueueIndices, KP_LOG_DEBUG("Kompute Manager compute queue obtained"); } -std::shared_ptr -Manager::tensor(const std::vector& data, Tensor::TensorTypes tensorType) -{ - KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - - std::shared_ptr tensor{ new kp::Tensor( - this->mPhysicalDevice, this->mDevice, data, tensorType) }; - - if (this->mManageResources) { - this->mManagedTensors.push_back(tensor); - } - - return tensor; -} - std::shared_ptr Manager::algorithm(const std::vector>& tensors, const std::vector& spirv, diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 6950a4cd2..c93830902 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -13,6 +13,20 @@ OpTensorCopy::OpTensorCopy(const std::vector>& tensors) throw std::runtime_error( "Kompute OpTensorCopy called with less than 2 tensor"); } + + kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType(); + uint32_t size = this->mTensors[0]->size(); + for (const std::shared_ptr& tensor : tensors) { + if (tensor->dataType() != dataType) { + throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}", + dataType, tensor->dataType())); + } + if (tensor->size() != size) { + throw std::runtime_error(fmt::format("Attempting to copy tensors of different sizes from {} to {}", + size, tensor->size())); + + } + } } OpTensorCopy::~OpTensorCopy() @@ -43,9 +57,15 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorCopy postEval called"); + // TODO: Simplify with a copyRawData + uint32_t size = this->mTensors[0]->size(); + uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize(); + uint32_t memSize = size * dataTypeMemSize; + void* data = this->mTensors[0]->rawData(); + // Copy the data from the first tensor into all the tensors for (size_t i = 1; i < this->mTensors.size(); i++) { - this->mTensors[i]->setData(this->mTensors[0]->data()); + this->mTensors[i]->setRawData(data); } } diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 85cefde77..4dbfaec83 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -41,12 +41,6 @@ OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called"); - // Performing sync of data as eval can be called multiple times with same op - for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) { - this->mTensors[i]->mapDataIntoHostMemory(); - } - } } void diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index 092490d15..f7e15ffd5 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -48,11 +48,6 @@ OpTensorSyncLocal::postEval(const vk::CommandBuffer& commandBuffer) KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called"); KP_LOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local"); - for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) { - this->mTensors[i]->mapDataFromHostMemory(); - } - } } } diff --git a/src/Shader.cpp b/src/Shader.cpp index 968e53234..bedac0165 100644 --- a/src/Shader.cpp +++ b/src/Shader.cpp @@ -5,7 +5,7 @@ namespace kp { std::vector -Shader::compile_sources( +Shader::compileSources( const std::vector& sources, const std::vector& files, const std::string& entryPoint, @@ -92,13 +92,13 @@ Shader::compile_sources( } std::vector -Shader::compile_source( +Shader::compileSource( const std::string& source, const std::string& entryPoint, std::vector> definitions, const TBuiltInResource& resource) { - return compile_sources({ source }, + return compileSources({ source }, std::vector({}), entryPoint, definitions, diff --git a/src/Tensor.cpp b/src/Tensor.cpp index f584c07bd..947714693 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -5,17 +5,22 @@ namespace kp { Tensor::Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, - const std::vector& data, + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType, const TensorTypes& tensorType) { KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}", - data.size(), + elementTotalCount, tensorType); this->mPhysicalDevice = physicalDevice; this->mDevice = device; + this->mDataType = dataType; + this->mTensorType = tensorType; - this->rebuild(data, tensorType); + this->rebuild(data, elementTotalCount, elementMemorySize); } Tensor::~Tensor() @@ -29,12 +34,14 @@ Tensor::~Tensor() } void -Tensor::rebuild(const std::vector& data, TensorTypes tensorType) +Tensor::rebuild(void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize) { - KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size()); + KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount); - this->mData = data; - this->mTensorType = tensorType; + this->mSize = elementTotalCount; + this->mDataTypeMemorySize = elementMemorySize; if (this->mPrimaryBuffer || this->mPrimaryMemory) { KP_LOG_DEBUG( @@ -43,30 +50,9 @@ Tensor::rebuild(const std::vector& data, TensorTypes tensorType) } this->allocateMemoryCreateGPUResources(); -} + this->mapRawData(); -std::vector& -Tensor::data() -{ - return this->mData; -} - -float& -Tensor::operator[](int index) -{ - return this->mData[index]; -} - -uint64_t -Tensor::memorySize() -{ - return this->size() * sizeof(float); -} - -uint32_t -Tensor::size() -{ - return static_cast(this->mData.size()); + memcpy(this->mRawData, data, this->memorySize()); } Tensor::TensorTypes @@ -78,18 +64,12 @@ Tensor::tensorType() bool Tensor::isInit() { - return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory; + return this->mDevice + && this->mPrimaryBuffer + && this->mPrimaryMemory + && this->mRawData; } -void -Tensor::setData(const std::vector& data) -{ - if (data.size() != this->mData.size()) { - throw std::runtime_error( - "Kompute Tensor Cannot set data of different sizes"); - } - this->mData = data; -} void Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, @@ -195,66 +175,13 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::DescriptorBufferInfo Tensor::constructDescriptorBufferInfo() { + KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize()); vk::DeviceSize bufferSize = this->memorySize(); return vk::DescriptorBufferInfo(*this->mPrimaryBuffer, 0, // offset bufferSize); } -void -Tensor::mapDataFromHostMemory() -{ - KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN( - "Kompute Tensor mapping data not supported on storage tensor"); - return; - } - - vk::DeviceSize bufferSize = this->memorySize(); - void* mapped = this->mDevice->mapMemory( - *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); - vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange); - memcpy(this->mData.data(), mapped, bufferSize); - this->mDevice->unmapMemory(*hostVisibleMemory); -} - -void -Tensor::mapDataIntoHostMemory() -{ - - KP_LOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN( - "Kompute Tensor mapping data not supported on storage tensor"); - return; - } - - vk::DeviceSize bufferSize = this->memorySize(); - - void* mapped = this->mDevice->mapMemory( - *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); - memcpy(mapped, this->mData.data(), bufferSize); - vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->flushMappedMemoryRanges(1, &mappedRange); - this->mDevice->unmapMemory(*hostVisibleMemory); -} - vk::BufferUsageFlags Tensor::getPrimaryBufferUsageFlags() { @@ -285,7 +212,8 @@ Tensor::getPrimaryMemoryPropertyFlags() return vk::MemoryPropertyFlagBits::eDeviceLocal; break; case TensorTypes::eHost: - return vk::MemoryPropertyFlagBits::eHostVisible; + return vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent; break; case TensorTypes::eStorage: return vk::MemoryPropertyFlagBits::eDeviceLocal; @@ -435,12 +363,20 @@ Tensor::destroy() { KP_LOG_DEBUG("Kompute Tensor started destroy()"); + // Setting raw data to null regardless whether device is available to invalidate Tensor + this->mRawData = nullptr; + this->mSize = 0; + this->mDataTypeMemorySize = 0; + if (!this->mDevice) { KP_LOG_WARN( "Kompute Tensor destructor reached with null Device pointer"); return; } + // Unmap the current memory data + this->unmapRawData(); + if (this->mFreePrimaryBuffer) { if (!this->mPrimaryBuffer) { KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer " @@ -504,4 +440,34 @@ Tensor::destroy() KP_LOG_DEBUG("Kompute Tensor successful destroy()"); } +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eBool; +} + +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eInt; +} + +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eUnsignedInt; +} + +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eFloat; +} + +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eDouble; +} + } diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index d9c6ddf3e..6eb2042eb 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -74,9 +74,46 @@ class Manager * @param tensorType The type of tensor to initialize * @returns Shared pointer with initialised tensor */ - std::shared_ptr tensor( + template + std::shared_ptr> tensorT( + const std::vector& data, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); + + std::shared_ptr> tensor{ new kp::TensorT( + this->mPhysicalDevice, this->mDevice, data, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } + + std::shared_ptr> tensor( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice); + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + return this->tensorT(data, tensorType); + } + + std::shared_ptr tensor( + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const Tensor::TensorDataTypes& dataType, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + std::shared_ptr tensor{ new kp::Tensor( + this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } /** * Create a managed algorithm that will be destroyed by this manager diff --git a/src/include/kompute/Shader.hpp b/src/include/kompute/Shader.hpp index 9fd1709be..9ecab24cd 100644 --- a/src/include/kompute/Shader.hpp +++ b/src/include/kompute/Shader.hpp @@ -39,7 +39,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_sources( + static std::vector compileSources( const std::vector& sources, const std::vector& files = {}, const std::string& entryPoint = "main", @@ -60,7 +60,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_source( + static std::vector compileSource( const std::string& source, const std::string& entryPoint = "main", std::vector> definitions = {}, diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 195af44f4..0194e208f 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -27,6 +27,14 @@ class Tensor eHost = 1, ///< Type is host memory, source and destination eStorage = 2, ///< Type is Device memory (only) }; + enum class TensorDataTypes + { + eBool = 0, + eInt = 1, + eUnsignedInt = 2, + eFloat = 3, + eDouble = 4, + }; /** * Constructor with data provided which would be used to create the @@ -40,14 +48,17 @@ class Tensor */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, - const std::vector& data, + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType, const TensorTypes& tensorType = TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they * have been provided externally. */ - ~Tensor(); + virtual ~Tensor(); /** * Function to trigger reinitialisation of the tensor buffer and memory with @@ -56,8 +67,9 @@ class Tensor * @param data Vector of data to use to initialise vector from * @param tensorType The type to use for the tensor */ - void rebuild(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice); + void rebuild(void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -71,32 +83,6 @@ class Tensor */ bool isInit(); - /** - * Returns the vector of data currently contained by the Tensor. It is - * important to ensure that there is no out-of-sync data with the GPU - * memory. - * - * @return Reference to vector of elements representing the data in the - * tensor. - */ - std::vector& data(); - /** - * Overrides the subscript operator to expose the underlying data's - * subscript operator which in this case would be its underlying - * vector's. - * - * @param i The index where the element will be returned from. - * @return Returns the element in the position requested. - */ - float& operator[](int index); - /** - * Returns the size/magnitude of the Tensor, which will be the total number - * of elements across all dimensions - * - * @return Unsigned integer representing the total number of elements - */ - uint32_t size(); - /** * Retrieve the tensor type of the Tensor * @@ -104,12 +90,6 @@ class Tensor */ TensorTypes tensorType(); - /** - * Sets / resets the vector data of the tensor. This function does not - * perform any copies into GPU memory and is only performed on the host. - */ - void setData(const std::vector& data); - /** * Records a copy from the memory of the tensor provided to the current * thensor. This is intended to pass memory into a processing, to perform @@ -172,18 +152,118 @@ class Tensor * @return Descriptor buffer info with own buffer */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); + /** - * Maps data from the Host Visible GPU memory into the data vector. It - * requires the Tensor to be of staging type for it to work. + * Returns the size/magnitude of the Tensor, which will be the total number + * of elements across all dimensions + * + * @return Unsigned integer representing the total number of elements */ - void mapDataFromHostMemory(); + // TODO: move to cpp + uint32_t size() { + return this->mSize; + } + + // TODO: move to cpp + uint32_t dataTypeMemorySize() { + return this->mDataTypeMemorySize; + } + + // TODO: move to cpp + uint32_t memorySize() { + return this->mSize * this->mDataTypeMemorySize; + } + /** - * Maps data from the data vector into the Host Visible GPU memory. It - * requires the tensor to be of staging type for it to work. + * Retrieve the underlying data type of the Tensor + * + * @return Data type of tensor of type kp::Tensor::TensorDataTypes */ - void mapDataIntoHostMemory(); + TensorDataTypes dataType() { + return this->mDataType; + } + + void* rawData() { + return this->mRawData; + } + + // TODO: move to cpp + template + T* data() { + return (T*)this->mRawData; + } + + template + std::vector vector() { + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; + } + + /** + * Sets / resets the vector data of the tensor. This function does not + * perform any copies into GPU memory and is only performed on the host. + */ + void setRawData(const void* data) + { + // Copy data + memcpy(this->mRawData, data, this->memorySize()); + } + + protected: + // -------------- ALWAYS OWNED RESOURCES + TensorTypes mTensorType; + TensorDataTypes mDataType; + uint32_t mSize; + uint32_t mDataTypeMemorySize; + void* mRawData; private: + void mapRawData() { + + KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + + // Given we request coherent host memory we don't need to invalidate / flush + this->mRawData = this->mDevice->mapMemory( + *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + + vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); + } + + void unmapRawData() { + + KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); + this->mDevice->flushMappedMemoryRanges(1, &mappedRange); + this->mDevice->unmapMemory(*hostVisibleMemory); + } + // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; @@ -198,11 +278,6 @@ class Tensor std::shared_ptr mStagingMemory; bool mFreeStagingMemory = false; - // -------------- ALWAYS OWNED RESOURCES - std::vector mData; - - TensorTypes mTensorType = TensorTypes::eDevice; - void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); @@ -221,7 +296,60 @@ class Tensor vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); - uint64_t memorySize(); + +}; + +// TODO: Limit T to be only float, bool, double, etc +template +class TensorT: public Tensor +{ + + public: + TensorT(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType = TensorTypes::eDevice) + : Tensor(physicalDevice, + device, + (void*)data.data(), + data.size(), + sizeof(T), + this->dataType(), + tensorType) + { + KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size()); + } + + ~TensorT() { + KP_LOG_DEBUG("Kompute TensorT destructor"); + } + + T* data() { + return (T*)this->mRawData; + } + + std::vector vector() { + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; + } + + T& operator[](int index) { + return *(((T*)this->mRawData) + index); + } + + void setData(const std::vector& data) { + + KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size()); + + if (data.size() != this->mSize) { + throw std::runtime_error( + "Kompute TensorT Cannot set data of different sizes"); + } + + Tensor::setRawData(data.data()); + } + + TensorDataTypes dataType(); + }; } // End namespace kp diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index b1919ce52..7feaaa30e 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -37,7 +37,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) } )"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::vector data(size, 0.0); std::vector resultSync(size, 100000000); @@ -73,7 +73,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) sq->eval(inputsSyncB); for (uint32_t i = 0; i < numParallel; i++) { - EXPECT_EQ(inputsSyncB[i]->data(), resultSync); + EXPECT_EQ(inputsSyncB[i]->vector(), resultSync); } kp::Manager mgrAsync(0, { 0, 2 }); @@ -111,7 +111,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) sq->eval({ inputsAsyncB }); for (uint32_t i = 0; i < numParallel; i++) { - EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync); + EXPECT_EQ((inputsAsyncB[i]->vector()), resultAsync); } // The speedup should be at least 40% @@ -145,15 +145,15 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) } )"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::vector data(size, 0.0); std::vector resultAsync(size, 100000000); kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor(data); - std::shared_ptr tensorB = mgr.tensor(data); + std::shared_ptr> tensorA = mgr.tensor(data); + std::shared_ptr> tensorB = mgr.tensor(data); std::shared_ptr sq1 = mgr.sequence(); std::shared_ptr sq2 = mgr.sequence(); @@ -172,6 +172,6 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) sq1->evalAsync({ tensorA, tensorB }); sq1->evalAwait(); - EXPECT_EQ(tensorA->data(), resultAsync); - EXPECT_EQ(tensorB->data(), resultAsync); + EXPECT_EQ(tensorA->vector(), resultAsync); + EXPECT_EQ(tensorB->vector(), resultAsync); } diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index fee3854c4..defd40998 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -5,9 +5,9 @@ TEST(TestDestroy, TestDestroyTensorSingle) { - std::shared_ptr tensorA = nullptr; + std::shared_ptr> tensorA = nullptr; - std::string shader(R"( + std::string shader(R"( #version 450 layout (local_size_x = 1) in; layout(set = 0, binding = 0) buffer a { float pa[]; }; @@ -16,7 +16,7 @@ TEST(TestDestroy, TestDestroyTensorSingle) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); { std::shared_ptr sq = nullptr; @@ -34,18 +34,19 @@ TEST(TestDestroy, TestDestroyTensorSingle) ->eval() ->eval(algo->getTensors()); + EXPECT_EQ(tensorA->vector(), std::vector({ 1, 1, 1 })); + tensorA->destroy(); EXPECT_FALSE(tensorA->isInit()); } EXPECT_FALSE(tensorA->isInit()); } - EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); } TEST(TestDestroy, TestDestroyTensorVector) { - std::shared_ptr tensorA = nullptr; - std::shared_ptr tensorB = nullptr; + std::shared_ptr> tensorA = nullptr; + std::shared_ptr> tensorB = nullptr; std::string shader(R"( #version 450 @@ -57,7 +58,7 @@ TEST(TestDestroy, TestDestroyTensorVector) pa[index] = pa[index] + 1; pb[index] = pb[index] + 2; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); { std::shared_ptr sq = nullptr; @@ -77,6 +78,9 @@ TEST(TestDestroy, TestDestroyTensorVector) ->record(algo->getTensors()) ->eval(); + EXPECT_EQ(tensorA->vector(), std::vector({ 2, 2, 2 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 3, 3, 3 })); + tensorA->destroy(); tensorB->destroy(); @@ -84,13 +88,11 @@ TEST(TestDestroy, TestDestroyTensorVector) EXPECT_FALSE(tensorB->isInit()); } } - EXPECT_EQ(tensorA->data(), std::vector({ 2, 2, 2 })); - EXPECT_EQ(tensorB->data(), std::vector({ 3, 3, 3 })); } TEST(TestDestroy, TestDestroySequenceSingle) { - std::shared_ptr tensorA = nullptr; + std::shared_ptr> tensorA = nullptr; std::string shader(R"( #version 450 @@ -101,7 +103,7 @@ TEST(TestDestroy, TestDestroySequenceSingle) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); { std::shared_ptr sq = nullptr; @@ -121,7 +123,8 @@ TEST(TestDestroy, TestDestroySequenceSingle) sq->destroy(); EXPECT_FALSE(sq->isInit()); + + EXPECT_EQ(tensorA->vector(), std::vector({ 1, 1, 1 })); } } - EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); } diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 980273246..a4402637f 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -14,19 +14,19 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) { kp::Manager mgr; - std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); - std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); + std::shared_ptr> xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + std::shared_ptr> xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); + std::shared_ptr> y = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); - std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wIn = mgr.tensor({ 0.001, 0.001 }); + std::shared_ptr> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr bIn = mgr.tensor({ 0 }); - std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> bIn = mgr.tensor({ 0 }); + std::shared_ptr> bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, @@ -88,21 +88,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) { kp::Manager mgr; - std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); - std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); + std::shared_ptr> xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + std::shared_ptr> xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); + std::shared_ptr> y = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr wIn = + std::shared_ptr> wIn = mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost); - std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr bIn = + std::shared_ptr> bIn = mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost); - std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, @@ -136,8 +136,6 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) wIn->data()[1] -= learningRate * wOutJ->data()[j]; bIn->data()[0] -= learningRate * bOut->data()[j]; } - wIn->mapDataIntoHostMemory(); - bIn->mapDataIntoHostMemory(); } // Based on the inputs the outputs should be at least: diff --git a/test/TestManager.cpp b/test/TestManager.cpp index ce055ff8c..f759208aa 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -7,9 +7,9 @@ TEST(TestManager, EndToEndOpMultEvalFlow) { kp::Manager mgr; - std::shared_ptr tensorLHS = mgr.tensor({ 0, 1, 2 }); - std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); - std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorLHS = mgr.tensor({ 0, 1, 2 }); + std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); + std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorLHS, tensorRHS, @@ -20,16 +20,16 @@ TEST(TestManager, EndToEndOpMultEvalFlow) ->eval(params, mgr.algorithm()) ->eval(params); - EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); + EXPECT_EQ(tensorOutput->vector(), std::vector({ 0, 4, 12 })); } TEST(TestManager, EndToEndOpMultSeqFlow) { kp::Manager mgr; - std::shared_ptr tensorLHS = mgr.tensor({ 0, 1, 2 }); - std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); - std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorLHS = mgr.tensor({ 0, 1, 2 }); + std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); + std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorLHS, tensorRHS, @@ -41,16 +41,16 @@ TEST(TestManager, EndToEndOpMultSeqFlow) ->record(params) ->eval(); - EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); + EXPECT_EQ(tensorOutput->vector(), std::vector({ 0, 4, 12 })); } TEST(TestManager, TestMultipleSequences) { kp::Manager mgr; - std::shared_ptr tensorLHS = mgr.tensor({ 0, 1, 2 }); - std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); - std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorLHS = mgr.tensor({ 0, 1, 2 }); + std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); + std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorLHS, tensorRHS, @@ -60,5 +60,5 @@ TEST(TestManager, TestMultipleSequences) mgr.sequence()->eval(params, mgr.algorithm()); mgr.sequence()->eval(params); - EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); + EXPECT_EQ(tensorOutput->vector(), std::vector({ 0, 4, 12 })); } diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index b94591308..f9e066f47 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -8,10 +8,12 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) kp::Manager mgr; + // Default tensor constructor simplifies creation of float values auto tensorInA = mgr.tensor({ 2., 2., 2. }); auto tensorInB = mgr.tensor({ 1., 2., 3. }); - auto tensorOutA = mgr.tensor({ 0., 0., 0. }); - auto tensorOutB = mgr.tensor({ 0., 0., 0. }); + // Explicit type constructor supports int, in32, double, float and int + auto tensorOutA = mgr.tensorT({ 0, 0, 0 }); + auto tensorOutB = mgr.tensorT({ 0, 0, 0 }); std::string shader = (R"( #version 450 @@ -21,8 +23,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; - layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; }; - layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; }; + layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; + layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { @@ -34,8 +36,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) void main() { uint index = gl_GlobalInvocationID.x; - out_a[index] += in_a[index] * in_b[index]; - out_b[index] += const_one * push_const.val; + out_a[index] += uint( in_a[index] * in_b[index] ); + out_b[index] += uint( const_one * push_const.val ); } )"); @@ -49,7 +51,7 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) kp::Constants pushConstsB({ 3.0 }); auto algorithm = mgr.algorithm( - params, kp::Shader::compile_source(shader), workgroup, specConsts, pushConstsA); + params, kp::Shader::compileSource(shader), workgroup, specConsts, pushConstsA); // 3. Run operation with string shader synchronously mgr.sequence() @@ -64,8 +66,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) sq->evalAwait(); - EXPECT_EQ(tensorOutA->data(), std::vector({ 4, 8, 12 })); - EXPECT_EQ(tensorOutB->data(), std::vector({ 10, 10, 10 })); + EXPECT_EQ(tensorOutA->vector(), std::vector({ 4, 8, 12 })); + EXPECT_EQ(tensorOutB->vector(), std::vector({ 10, 10, 10 })); } TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) @@ -73,7 +75,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -84,7 +86,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); { mgr.sequence() @@ -96,14 +98,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) ->eval(); } - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -114,7 +116,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr algorithm = mgr.algorithm({ tensorA }, spirv); @@ -131,7 +133,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) mgr.sequence()->record({ tensorA })->eval(); - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } TEST(TestMultipleAlgoExecutions, MultipleSequences) @@ -139,7 +141,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -150,7 +152,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr algorithm = mgr.algorithm({ tensorA }, spirv); @@ -167,14 +169,14 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->record({ tensorA })->eval(); - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -185,7 +187,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr algorithm = mgr.algorithm({ tensorA }, spirv); @@ -198,43 +200,6 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) sq->record({ tensorA })->eval(); - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } -TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) -{ - std::shared_ptr tensorA = nullptr; - - { - std::shared_ptr sq = nullptr; - { - kp::Manager mgr; - - tensorA = mgr.tensor({ 0, 0, 0 }); - - std::string shader(R"( - #version 450 - layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - void main() { - uint index = gl_GlobalInvocationID.x; - pa[index] = pa[index] + 1; - })"); - - std::vector spirv = kp::Shader::compile_source(shader); - - std::shared_ptr algorithm = - mgr.algorithm({ tensorA }, spirv); - - sq = mgr.sequence(); - - sq->record({ tensorA })->eval(); - - sq->record(algorithm)->eval()->eval()->eval(); - - sq->record({ tensorA })->eval(); - } - } - - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); -} diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 3e6856a21..a1f8eda99 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -9,8 +9,8 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 3, 4, 5 }); - std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 3, 4, 5 }); + std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -27,7 +27,7 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) } )"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::vector> params = { tensorA, tensorB }; @@ -36,16 +36,16 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) ->eval(mgr.algorithm(params, spirv)) ->eval(params); - EXPECT_EQ(tensorA->data(), std::vector({ 0, 1, 2 })); - EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 0, 1, 2 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 3, 4, 5 })); } TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 3, 4, 5 }); - std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 3, 4, 5 }); + std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); std::vector spirv = std::vector( (uint32_t*) @@ -62,8 +62,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) ->eval(mgr.algorithm(params, spirv)) ->eval(params); - EXPECT_EQ(tensorA->data(), std::vector({ 0, 1, 2 })); - EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 0, 1, 2 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 3, 4, 5 })); } // TODO: Add support to read from file for shader @@ -71,8 +71,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) //{ // kp::Manager mgr; // -// std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; -// std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; +// std::shared_ptr> tensorA{ new kp::Tensor({ 3, 4, 5 }) }; +// std::shared_ptr> tensorB{ new kp::Tensor({ 0, 0, 0 }) }; // mgr.rebuild({ tensorA, tensorB }); // // mgr.evalOpDefault( @@ -81,6 +81,6 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) // // mgr.evalOpDefault({ tensorA, tensorB }); // -// EXPECT_EQ(tensorA->data(), std::vector({ 0, 1, 2 })); -// EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); +// EXPECT_EQ(tensorA->vector(), std::vector({ 0, 1, 2 })); +// EXPECT_EQ(tensorB->vector(), std::vector({ 3, 4, 5 })); //} diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index 85e0b545b..6978eeeea 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -11,8 +11,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) std::vector testVecA{ 1, 2, 3 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = mgr.tensor(testVecB); + std::shared_ptr> tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorB = mgr.tensor(testVecB); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -22,8 +22,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) ->eval({ tensorA, tensorB }) ->eval({ tensorA, tensorB }); - // Making sure the GPU holds the same data - EXPECT_EQ(tensorA->data(), tensorB->data()); + // Making sure the GPU holds the same vector + EXPECT_EQ(tensorA->vector(), tensorB->vector()); } TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) @@ -35,9 +35,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) std::vector testVecB{ 0, 0, 0 }; std::vector testVecC{ 0, 0, 0 }; - std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = mgr.tensor(testVecB); - std::shared_ptr tensorC = mgr.tensor(testVecC); + std::shared_ptr> tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorB = mgr.tensor(testVecB); + std::shared_ptr> tensorC = mgr.tensor(testVecC); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -47,14 +47,14 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) ->eval({ tensorA, tensorB, tensorC }) ->eval({ tensorA, tensorB, tensorC }); - EXPECT_EQ(tensorA->data(), tensorB->data()); - EXPECT_EQ(tensorA->data(), tensorC->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); + EXPECT_EQ(tensorA->vector(), tensorC->vector()); - // Making sure the GPU holds the same data + // Making sure the GPU holds the same vector mgr.sequence()->eval({ tensorB, tensorC }); - EXPECT_EQ(tensorA->data(), tensorB->data()); - EXPECT_EQ(tensorA->data(), tensorC->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); + EXPECT_EQ(tensorA->vector(), tensorC->vector()); } TEST(TestOpTensorCopy, CopyDeviceToHostTensor) @@ -65,8 +65,8 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) std::vector testVecA{ 3, 4, 5 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = + std::shared_ptr> tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorB = mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost); // Only calling sync on device type tensor @@ -77,11 +77,11 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) mgr.sequence()->eval({ tensorA, tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); - // Making sure the GPU holds the same data + // Making sure the GPU holds the same vector mgr.sequence()->eval({ tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); } TEST(TestOpTensorCopy, CopyHostToDeviceTensor) @@ -92,9 +92,9 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) std::vector testVecA{ 4, 5, 6 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = + std::shared_ptr> tensorA = mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); - std::shared_ptr tensorB = mgr.tensor(testVecB); + std::shared_ptr> tensorB = mgr.tensor(testVecB); // Only calling sync on device type tensor mgr.sequence()->eval({ tensorA, tensorB }); @@ -104,11 +104,11 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) mgr.sequence()->eval({ tensorA, tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); - // Making sure the GPU holds the same data + // Making sure the GPU holds the same vector mgr.sequence()->eval({ tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); } TEST(TestOpTensorCopy, CopyHostToHostTensor) @@ -119,9 +119,9 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) std::vector testVecA{ 5, 6, 7 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = + std::shared_ptr> tensorA = mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); - std::shared_ptr tensorB = + std::shared_ptr> tensorB = mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost); EXPECT_TRUE(tensorA->isInit()); @@ -131,11 +131,11 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) ->eval({ tensorA }) ->eval({ tensorA, tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); - // Making sure the GPU holds the same data + // Making sure the GPU holds the same vector mgr.sequence()->eval({ tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); } TEST(TestOpTensorCopy, SingleTensorShouldFail) @@ -145,7 +145,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) std::vector testVecA{ 6, 7, 8 }; - std::shared_ptr tensorA = + std::shared_ptr> tensorA = mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); EXPECT_TRUE(tensorA->isInit()); diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp index 14153427e..7ba1be615 100644 --- a/test/TestOpTensorCreate.cpp +++ b/test/TestOpTensorCreate.cpp @@ -6,7 +6,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp) { std::vector testVecA{ 9, 8, 7 }; - std::shared_ptr tensorA = nullptr; + std::shared_ptr> tensorA = nullptr; { kp::Manager mgr; @@ -15,7 +15,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp) EXPECT_TRUE(tensorA->isInit()); - EXPECT_EQ(tensorA->data(), testVecA); + EXPECT_EQ(tensorA->vector(), testVecA); } EXPECT_FALSE(tensorA->isInit()); @@ -29,11 +29,11 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore) kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = mgr.tensor(testVecB); + std::shared_ptr> tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorB = mgr.tensor(testVecB); - EXPECT_EQ(tensorA->data(), testVecA); - EXPECT_EQ(tensorB->data(), testVecB); + EXPECT_EQ(tensorA->vector(), testVecA); + EXPECT_EQ(tensorB->vector(), testVecB); tensorA->destroy(); tensorB->destroy(); @@ -49,7 +49,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor) kp::Manager mgr; try { - std::shared_ptr tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorA = mgr.tensor(testVecA); } catch (const std::runtime_error& err) { // check exception ASSERT_TRUE(std::string(err.what()).find("zero-sized") != diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp index 55e02ad13..02271c618 100644 --- a/test/TestOpTensorSync.cpp +++ b/test/TestOpTensorSync.cpp @@ -11,7 +11,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor) std::vector testVecPreA{ 0, 0, 0 }; std::vector testVecPostA{ 9, 8, 7 }; - std::shared_ptr tensorA = mgr.tensor(testVecPreA); + std::shared_ptr> tensorA = mgr.tensor(testVecPreA); EXPECT_TRUE(tensorA->isInit()); @@ -21,7 +21,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor) mgr.sequence()->eval({ tensorA }); - EXPECT_EQ(tensorA->data(), testVecPostA); + EXPECT_EQ(tensorA->vector(), testVecPostA); } TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) @@ -31,9 +31,9 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) std::vector testVec{ 9, 8, 7 }; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); - std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); - std::shared_ptr tensorC = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorC = mgr.tensor({ 0, 0, 0 }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -47,7 +47,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) mgr.sequence()->eval({ tensorA, tensorB, tensorC }); - EXPECT_EQ(tensorA->data(), testVec); - EXPECT_EQ(tensorB->data(), testVec); - EXPECT_EQ(tensorC->data(), testVec); + EXPECT_EQ(tensorA->vector(), testVec); + EXPECT_EQ(tensorB->vector(), testVec); + EXPECT_EQ(tensorC->vector(), testVec); } diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index f51f8cc42..9599596ed 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -22,14 +22,14 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride) pa[2] += pcs.z; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; { kp::Manager mgr; - std::shared_ptr tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); std::shared_ptr algo = mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 }); @@ -42,7 +42,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride) sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); - EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 })); } } } @@ -65,14 +65,14 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride) pa[2] += pcs.z; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; { kp::Manager mgr; - std::shared_ptr tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); std::shared_ptr algo = mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 }); @@ -85,7 +85,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride) sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); - EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 })); } } } @@ -108,14 +108,14 @@ TEST(TestPushConstants, TestConstantsWrongSize) pa[2] += pcs.z; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; { kp::Manager mgr; - std::shared_ptr tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); std::shared_ptr algo = mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 }); diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index b8afd1ad6..ca3b9a485 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -60,13 +60,13 @@ TEST(TestSequence, RerecordSequence) std::shared_ptr sq = mgr.sequence(); - std::shared_ptr tensorA = mgr.tensor({1, 2, 3}); - std::shared_ptr tensorB = mgr.tensor({2, 2, 2}); - std::shared_ptr tensorOut = mgr.tensor({0, 0, 0}); + std::shared_ptr> tensorA = mgr.tensor({1, 2, 3}); + std::shared_ptr> tensorB = mgr.tensor({2, 2, 2}); + std::shared_ptr> tensorOut = mgr.tensor({0, 0, 0}); sq->eval({ tensorA, tensorB, tensorOut }); - std::vector spirv = kp::Shader::compile_source(R"( + std::vector spirv = kp::Shader::compileSource(R"( #version 450 layout (local_size_x = 1) in; @@ -90,7 +90,7 @@ TEST(TestSequence, RerecordSequence) sq->eval(); - EXPECT_EQ(tensorOut->data(), std::vector({2, 4, 6})); + EXPECT_EQ(tensorOut->vector(), std::vector({2, 4, 6})); algo->rebuild({tensorOut, tensorA, tensorB}, spirv); @@ -98,7 +98,7 @@ TEST(TestSequence, RerecordSequence) sq->rerecord(); sq->eval(); - EXPECT_EQ(tensorB->data(), std::vector({2, 8, 18})); + EXPECT_EQ(tensorB->vector(), std::vector({2, 8, 18})); } @@ -117,7 +117,7 @@ TEST(TestSequence, SequenceTimestamps) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); auto seq = mgr.sequence(0, 100); //100 timestamps seq->record({ tensorA }) diff --git a/test/TestShaderResources.cpp b/test/TestShaderResources.cpp index b0013ef80..536f4ca0c 100644 --- a/test/TestShaderResources.cpp +++ b/test/TestShaderResources.cpp @@ -25,7 +25,7 @@ static const std::string shaderString = (R"( )"); void compileShaderWithGivenResources(const std::string shaderString, const TBuiltInResource resources) { - kp::Shader::compile_source(shaderString, std::string("main"), std::vector>({}), resources); + kp::Shader::compileSource(shaderString, std::string("main"), std::vector>({}), resources); } diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index e66f9d52e..fe40fb5ea 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -18,15 +18,15 @@ TEST(TestSpecializationConstants, TestTwoConstants) pb[index] = cTwo; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); - std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorA, tensorB }; @@ -42,8 +42,8 @@ TEST(TestSpecializationConstants, TestTwoConstants) ->record(params) ->eval(); - EXPECT_EQ(tensorA->data(), std::vector({ 5, 5, 5 })); - EXPECT_EQ(tensorB->data(), std::vector({ 0.3, 0.3, 0.3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 5, 5, 5 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 0.3, 0.3, 0.3 })); } } } diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index d33367722..c267024db 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -7,7 +7,7 @@ TEST(TestTensor, ConstructorData) { kp::Manager mgr; std::vector vec{ 0, 1, 2 }; - std::shared_ptr tensor = mgr.tensor(vec); + std::shared_ptr> tensor = mgr.tensor(vec); EXPECT_EQ(tensor->size(), vec.size()); - EXPECT_EQ(tensor->data(), vec); + EXPECT_EQ(tensor->vector(), vec); } diff --git a/test/TestWorkgroup.cpp b/test/TestWorkgroup.cpp index 3eb9147a1..8836840a6 100644 --- a/test/TestWorkgroup.cpp +++ b/test/TestWorkgroup.cpp @@ -7,8 +7,8 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) { - std::shared_ptr tensorA = nullptr; - std::shared_ptr tensorB = nullptr; + std::shared_ptr> tensorA = nullptr; + std::shared_ptr> tensorB = nullptr; { std::shared_ptr sq = nullptr; @@ -39,29 +39,29 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) sq->record(algorithm); sq->record(params); sq->eval(); + + std::vector expectedA = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15 + }; + + std::vector expectedB = { + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, + 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, + 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, + 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, + 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 + }; + + EXPECT_EQ(tensorA->vector(), expectedA); + EXPECT_EQ(tensorB->vector(), expectedB); } } - - std::vector expectedA = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15 - }; - - std::vector expectedB = { - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, - 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, - 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, - 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, - 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 - }; - - EXPECT_EQ(tensorA->data(), expectedA); - EXPECT_EQ(tensorB->data(), expectedB); }