From fa5dc43b443f41c6438766036fc8b58aa3bfadcc Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 6 Mar 2021 12:02:08 +0000 Subject: [PATCH 01/16] Updated compile_shader to compileShader --- examples/array_multiplication/src/Main.cpp | 2 +- .../kompute_summator/KomputeSummatorNode.cpp | 2 +- .../gdnative_shared/src/KomputeSummator.cpp | 2 +- single_include/kompute/Kompute.hpp | 4 ++-- src/Shader.cpp | 6 +++--- src/include/kompute/Shader.hpp | 4 ++-- test/TestAsyncOperations.cpp | 4 ++-- test/TestDestroy.cpp | 6 +++--- test/TestMultipleAlgoExecutions.cpp | 12 ++++++------ test/TestOpShadersFromStringAndFile.cpp | 2 +- test/TestPushConstant.cpp | 6 +++--- test/TestSequence.cpp | 2 +- test/TestShaderResources.cpp | 2 +- test/TestSpecializationConstant.cpp | 2 +- 14 files changed, 28 insertions(+), 28 deletions(-) diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index fd823bca8..acb76898c 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -39,7 +39,7 @@ int main() std::vector> params = { tensorInA, tensorInB, tensorOut }; - std::shared_ptr algo = mgr.algorithm(params, kp::Shader::compile_source(shader)); + std::shared_ptr algo = mgr.algorithm(params, kp::Shader::compileSource(shader)); mgr.sequence() ->record(params) diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp index f50c56d5c..e901ef816 100644 --- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp +++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp @@ -54,7 +54,7 @@ void KomputeSummatorNode::_init() { std::shared_ptr algo = mgr.algorithm( { this->mPrimaryTensor, this->mSecondaryTensor }, - kp::Shader::compile_source(shader)); + kp::Shader::compileSource(shader)); // First we ensure secondary tensor loads to GPU diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp index ece095c8e..99aabb338 100644 --- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp +++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp @@ -58,7 +58,7 @@ void KomputeSummator::_init() { // Then we run the operation with both tensors this->mSequence->record( { this->mPrimaryTensor, this->mSecondaryTensor }, - kp::Shader::compile_source(shader)); + kp::Shader::compileSource(shader)); // We map the result back to local this->mSequence->record( diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 38213bb6e..593390dbe 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -762,7 +762,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_sources( + static std::vector compileSources( const std::vector& sources, const std::vector& files = {}, const std::string& entryPoint = "main", @@ -783,7 +783,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_source( + static std::vector compileSource( const std::string& source, const std::string& entryPoint = "main", std::vector> definitions = {}, diff --git a/src/Shader.cpp b/src/Shader.cpp index 968e53234..bedac0165 100644 --- a/src/Shader.cpp +++ b/src/Shader.cpp @@ -5,7 +5,7 @@ namespace kp { std::vector -Shader::compile_sources( +Shader::compileSources( const std::vector& sources, const std::vector& files, const std::string& entryPoint, @@ -92,13 +92,13 @@ Shader::compile_sources( } std::vector -Shader::compile_source( +Shader::compileSource( const std::string& source, const std::string& entryPoint, std::vector> definitions, const TBuiltInResource& resource) { - return compile_sources({ source }, + return compileSources({ source }, std::vector({}), entryPoint, definitions, diff --git a/src/include/kompute/Shader.hpp b/src/include/kompute/Shader.hpp index 9fd1709be..9ecab24cd 100644 --- a/src/include/kompute/Shader.hpp +++ b/src/include/kompute/Shader.hpp @@ -39,7 +39,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_sources( + static std::vector compileSources( const std::vector& sources, const std::vector& files = {}, const std::string& entryPoint = "main", @@ -60,7 +60,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_source( + static std::vector compileSource( const std::string& source, const std::string& entryPoint = "main", std::vector> definitions = {}, diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index b1919ce52..2f8c7d819 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -37,7 +37,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) } )"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::vector data(size, 0.0); std::vector resultSync(size, 100000000); @@ -145,7 +145,7 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) } )"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::vector data(size, 0.0); std::vector resultAsync(size, 100000000); diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index fee3854c4..0b948d64f 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -16,7 +16,7 @@ TEST(TestDestroy, TestDestroyTensorSingle) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); { std::shared_ptr sq = nullptr; @@ -57,7 +57,7 @@ TEST(TestDestroy, TestDestroyTensorVector) pa[index] = pa[index] + 1; pb[index] = pb[index] + 2; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); { std::shared_ptr sq = nullptr; @@ -101,7 +101,7 @@ TEST(TestDestroy, TestDestroySequenceSingle) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); { std::shared_ptr sq = nullptr; diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index b94591308..63dd5f7fe 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -49,7 +49,7 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) kp::Constants pushConstsB({ 3.0 }); auto algorithm = mgr.algorithm( - params, kp::Shader::compile_source(shader), workgroup, specConsts, pushConstsA); + params, kp::Shader::compileSource(shader), workgroup, specConsts, pushConstsA); // 3. Run operation with string shader synchronously mgr.sequence() @@ -84,7 +84,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); { mgr.sequence() @@ -114,7 +114,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr algorithm = mgr.algorithm({ tensorA }, spirv); @@ -150,7 +150,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr algorithm = mgr.algorithm({ tensorA }, spirv); @@ -185,7 +185,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr algorithm = mgr.algorithm({ tensorA }, spirv); @@ -221,7 +221,7 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr algorithm = mgr.algorithm({ tensorA }, spirv); diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 3e6856a21..e766c8efb 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -27,7 +27,7 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) } )"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::vector> params = { tensorA, tensorB }; diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index f51f8cc42..b37fe4d72 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -22,7 +22,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride) pa[2] += pcs.z; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; @@ -65,7 +65,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride) pa[2] += pcs.z; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; @@ -108,7 +108,7 @@ TEST(TestPushConstants, TestConstantsWrongSize) pa[2] += pcs.z; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index b8afd1ad6..7d70a477b 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -66,7 +66,7 @@ TEST(TestSequence, RerecordSequence) sq->eval({ tensorA, tensorB, tensorOut }); - std::vector spirv = kp::Shader::compile_source(R"( + std::vector spirv = kp::Shader::compileSource(R"( #version 450 layout (local_size_x = 1) in; diff --git a/test/TestShaderResources.cpp b/test/TestShaderResources.cpp index b0013ef80..536f4ca0c 100644 --- a/test/TestShaderResources.cpp +++ b/test/TestShaderResources.cpp @@ -25,7 +25,7 @@ static const std::string shaderString = (R"( )"); void compileShaderWithGivenResources(const std::string shaderString, const TBuiltInResource resources) { - kp::Shader::compile_source(shaderString, std::string("main"), std::vector>({}), resources); + kp::Shader::compileSource(shaderString, std::string("main"), std::vector>({}), resources); } diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index e66f9d52e..2c6e284d2 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -18,7 +18,7 @@ TEST(TestSpecializationConstants, TestTwoConstants) pb[index] = cTwo; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; From f569bae998a8805f23f8ca7d53ec2c4c9b142dce Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 6 Mar 2021 12:02:26 +0000 Subject: [PATCH 02/16] Updated python docstrings --- python/src/docstrings.hpp | 145 ++++++++++++++++++++++++-------------- python/src/main.cpp | 137 +++++++++++++++++++++-------------- 2 files changed, 178 insertions(+), 104 deletions(-) diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index bf98e6581..a5bda0a4d 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -247,8 +247,10 @@ static const char *__doc_kp_Manager_sequence = R"doc(Create a managed sequence that will be destroyed by this manager if it hasn't been destroyed by its reference count going to zero. -@param queueIndex The queue to use from the available queues @returns -Shared pointer with initialised sequence)doc"; +@param queueIndex The queue to use from the available queues @param +nrOfTimestamps The maximum number of timestamps to allocate. If zero +(default), disables latching of timestamps. @returns Shared pointer +with initialised sequence)doc"; static const char *__doc_kp_Manager_tensor = R"doc(Create a managed tensor that will be destroyed by this manager if it @@ -264,18 +266,26 @@ of algorithm and parameter components which can be used with shaders. By default it enables the user to provide a dynamic number of tensors which are then passed as inputs.)doc"; -static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = R"doc()doc"; +static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = +R"doc(Constructor that stores the algorithm to use as well as the relevant +push constants to override when recording. + +@param algorithm The algorithm object to use for dispatch @param +pushConstants The push constants to use for override)doc"; static const char *__doc_kp_OpAlgoDispatch_mAlgorithm = R"doc()doc"; static const char *__doc_kp_OpAlgoDispatch_mPushConstants = R"doc()doc"; static const char *__doc_kp_OpAlgoDispatch_postEval = -R"doc(Executes after the recorded commands are submitted, and performs a -copy of the GPU Device memory into the staging buffer so the output -data can be retrieved.)doc"; +R"doc(Does not perform any postEval commands. -static const char *__doc_kp_OpAlgoDispatch_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpAlgoDispatch_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpAlgoDispatch_record = R"doc(This records the commands that are to be sent to the GPU. This @@ -283,7 +293,9 @@ includes the barriers that ensure the memory has been copied before going in and out of the shader, as well as the dispatch operation that sends the shader processing to the gpu. This function also records the GPU memory copy of the output data for the staging buffer so it can be -read by the host.)doc"; +read by the host. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase = R"doc(Base Operation which provides the high level interface that Kompute @@ -299,7 +311,9 @@ the commands to the GPU for processing, and can be used to perform any tear-down steps required as the computation iteration finishes. It's worth noting that there are situations where eval can be called multiple times, so the resources that are destroyed should not require -a re-init unless explicitly provided by the user.)doc"; +a re-init unless explicitly provided by the user. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase_preEval = R"doc(Pre eval is called before the Sequence has called eval and submitted @@ -307,12 +321,16 @@ the commands to the GPU for processing, and can be used to perform any per-eval setup steps required as the computation iteration begins. It's worth noting that there are situations where eval can be called multiple times, so the resources that are created should be idempotent -in case it's called multiple times in a row.)doc"; +in case it's called multiple times in a row. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase_record = R"doc(The record function is intended to only send a record command or run commands that are expected to record operations that are to be -submitted as a batch into the GPU.)doc"; +submitted as a batch into the GPU. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpMult = R"doc(Operation that performs multiplication on two tensors and outpus on @@ -323,12 +341,9 @@ R"doc(Default constructor with parameters that provides the bare minimum requirements for the operations to be able to create and manage their sub-components. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that are to be used in this operation @param -komputeWorkgroup Optional parameter to specify the layout for -processing)doc"; +algorithm An algorithm that will be overridden with the OpMult shader +data and the tensors provided which are expected to be 3)doc"; static const char *__doc_kp_OpTensorCopy = R"doc(Operation that copies the data from the first tensor to the rest of @@ -340,84 +355,95 @@ static const char *__doc_kp_OpTensorCopy_OpTensorCopy = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorCopy_mTensors = R"doc()doc"; static const char *__doc_kp_OpTensorCopy_postEval = R"doc(Copies the local vectors for all the tensors to sync the data with the -gpu.)doc"; +gpu. -static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorCopy_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorCopy_record = R"doc(Records the copy commands from the first tensor into all the other -tensors provided. Also optionally records a barrier.)doc"; +tensors provided. Also optionally records a barrier. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice = R"doc(Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For -TensorTypes::eStaging it will only map the data into host memory which +TensorTypes::eHost it will only map the data into host memory which will happen during preEval before the recorded commands are -dispatched. This operation won't have any effect on -TensorTypes::eStaging.)doc"; +dispatched.)doc"; static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorSyncDevice_mTensors = R"doc()doc"; -static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc"; +static const char *__doc_kp_OpTensorSyncDevice_postEval = +R"doc(Does not perform any postEval commands. -static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice_record = R"doc(For device tensors, it records the copy command for the tensor to copy -the data from its staging to device memory.)doc"; +the data from its staging to device memory. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal = R"doc(Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For -TensorTypes::eStaging it will only map the data into host memory which +TensorTypes::eHost it will only map the data into host memory which will happen during preEval before the recorded commands are -dispatched. This operation won't have any effect on -TensorTypes::eStaging.)doc"; +dispatched.)doc"; static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorSyncLocal_mTensors = R"doc()doc"; static const char *__doc_kp_OpTensorSyncLocal_postEval = R"doc(For host tensors it performs the map command from the host memory into -local memory.)doc"; +local memory. -static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal_record = R"doc(For device tensors, it records the copy command for the tensor to copy -the data from its device to staging memory.)doc"; +the data from its device to staging memory. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc"; @@ -427,7 +453,8 @@ generate all dependent resources. @param physicalDevice Vulkan physical device @param device Vulkan logical device @param computeQueue Vulkan compute queue @param -queueIndex Vulkan compute queue index in device)doc"; +queueIndex Vulkan compute queue index in device @param totalTimestamps +Maximum number of timestamps to allocate)doc"; static const char *__doc_kp_Sequence_begin = R"doc(Begins recording commands for commands to be submitted into the @@ -443,6 +470,8 @@ static const char *__doc_kp_Sequence_createCommandBuffer = R"doc()doc"; static const char *__doc_kp_Sequence_createCommandPool = R"doc()doc"; +static const char *__doc_kp_Sequence_createTimestampQueryPool = R"doc()doc"; + static const char *__doc_kp_Sequence_destroy = R"doc(Destroys and frees the GPU resources which include the buffer and memory and sets the sequence as init=False.)doc"; @@ -528,6 +557,10 @@ finishes, it runs the postEval of all operations. @param waitFor Number of milliseconds to wait before timing out. @return shared_ptr of the Sequence class itself)doc"; +static const char *__doc_kp_Sequence_getTimestamps = +R"doc(Return the timestamps that were latched at the beginning and after +each operation during the last eval() call.)doc"; + static const char *__doc_kp_Sequence_isInit = R"doc(Returns true if the sequence has been initialised, and it's based on the GPU resources being refrenced. @@ -607,9 +640,11 @@ R"doc(Clears command buffer and triggers re-record of all the current operations saved, which is useful if the underlying kp::Tensors or kp::Algorithms are modified and need to be re-recorded.)doc"; +static const char *__doc_kp_Sequence_timestampQueryPool = R"doc()doc"; + static const char *__doc_kp_Shader = R"doc(Shader utily class with functions to compile and process glsl files.)doc"; -static const char *__doc_kp_Shader_compile_source = +static const char *__doc_kp_Shader_compileSource = R"doc(Compile a single glslang source from string value. Currently this function uses the glslang C++ interface which is not thread safe so this funciton should not be called from multiple threads concurrently. @@ -622,7 +657,7 @@ List of pairs containing key value definitions @param resourcesLimit A list that contains the resource limits for the GLSL compiler @return The compiled SPIR-V binary in unsigned int32 format)doc"; -static const char *__doc_kp_Shader_compile_sources = +static const char *__doc_kp_Shader_compileSources = R"doc(Compile multiple sources with optional filenames. Currently this function uses the glslang C++ interface which is not thread safe so this funciton should not be called from multiple threads concurrently. @@ -645,11 +680,13 @@ buffer, which would be used to store their respective data. The tensors can be used for GPU data storage or transfer.)doc"; static const char *__doc_kp_Tensor_Tensor = -R"doc(Default constructor with data provided which would be used to create -the respective vulkan buffer and memory. +R"doc(Constructor with data provided which would be used to create the +respective vulkan buffer and memory. +@param physicalDevice The physical device to use to fetch properties +@param device The device to use to create the buffer and memory from @param data Non-zero-sized vector of data that will be used by the -tensor @param tensorType Type for the tensor which is of type +tensor @param tensorTypes Type for the tensor which is of type TensorTypes)doc"; static const char *__doc_kp_Tensor_TensorTypes = @@ -697,7 +734,11 @@ static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc"; static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc"; -static const char *__doc_kp_Tensor_isInit = R"doc()doc"; +static const char *__doc_kp_Tensor_isInit = +R"doc(Check whether tensor is initialized based on the created gpu +resources. + +@returns Boolean stating whether tensor is initialized)doc"; static const char *__doc_kp_Tensor_mData = R"doc()doc"; @@ -742,11 +783,11 @@ vector's. Returns the element in the position requested.)doc"; static const char *__doc_kp_Tensor_rebuild = -R"doc(Initialiser which calls the initialisation for all the respective -tensors as well as creates the respective staging tensors. The staging -tensors would only be created for the tensors of type -TensorType::eDevice as otherwise there is no need to copy from host -memory.)doc"; +R"doc(Function to trigger reinitialisation of the tensor buffer and memory +with new data as well as new potential device type. + +@param data Vector of data to use to initialise vector from @param +tensorType The type to use for the tensor)doc"; static const char *__doc_kp_Tensor_recordBufferMemoryBarrier = R"doc(Records the buffer memory barrier into the command buffer which diff --git a/python/src/main.cpp b/python/src/main.cpp index 7165d41e7..d4b0f2084 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -26,9 +26,9 @@ PYBIND11_MODULE(kp, m) { py::module_ np = py::module_::import("numpy"); py::enum_(m, "TensorTypes") - .value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.") - .value("host", kp::Tensor::TensorTypes::eHost, "Tensor used for CPU visible GPU data.") - .value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.") + .value("device", kp::Tensor::TensorTypes::eDevice, DOC(kp, Tensor, TensorTypes, eDevice)) + .value("host", kp::Tensor::TensorTypes::eHost, DOC(kp, Tensor, TensorTypes, eHost)) + .value("storage", kp::Tensor::TensorTypes::eStorage, DOC(kp, Tensor, TensorTypes, eStorage)) .export_values(); #if !defined(KOMPUTE_DISABLE_SHADER_UTILS) || !KOMPUTE_DISABLE_SHADER_UTILS @@ -37,51 +37,63 @@ PYBIND11_MODULE(kp, m) { const std::string& source, const std::string& entryPoint, const std::vector>& definitions) { - std::vector spirv = kp::Shader::compile_source(source, entryPoint, definitions); + std::vector spirv = kp::Shader::compileSource(source, entryPoint, definitions); return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t)); }, - "Compiles string source provided and returns the value in bytes", - py::arg("source"), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector>() ) + DOC(kp, Shader, compileSource), + py::arg("source"), + py::arg("entryPoint") = "main", + py::arg("definitions") = std::vector>() ) .def_static("compile_sources", []( const std::vector& source, const std::vector& files, const std::string& entryPoint, const std::vector>& definitions) { - std::vector spirv = kp::Shader::compile_sources(source, files, entryPoint, definitions); + std::vector spirv = kp::Shader::compileSources(source, files, entryPoint, definitions); return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t)); }, - "Compiles sources provided with file names and returns the value in bytes", - py::arg("sources"), py::arg("files") = std::vector(), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector>() ); + DOC(kp, Shader, compileSources), + py::arg("sources"), + py::arg("files") = std::vector(), + py::arg("entryPoint") = "main", + py::arg("definitions") = std::vector>() ); #endif // KOMPUTE_DISABLE_SHADER_UTILS - py::class_>(m, "OpBase"); + py::class_>(m, "OpBase", DOC(kp, OpBase)); - py::class_>(m, "OpTensorSyncDevice", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorSyncDevice", py::base(), DOC(kp, OpTensorSyncDevice)) + .def(py::init>&>(), DOC(kp, OpTensorSyncDevice, OpTensorSyncDevice)); - py::class_>(m, "OpTensorSyncLocal", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorSyncLocal", py::base(), DOC(kp, OpTensorSyncLocal)) + .def(py::init>&>(), DOC(kp, OpTensorSyncLocal, OpTensorSyncLocal)); - py::class_>(m, "OpTensorCopy", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorCopy", py::base(), DOC(kp, OpTensorCopy)) + .def(py::init>&>(), DOC(kp, OpTensorCopy, OpTensorCopy)); - py::class_>(m, "OpAlgoDispatch", py::base()) + py::class_>( + m, "OpAlgoDispatch", py::base(), DOC(kp, OpAlgoDispatch)) .def(py::init&,const kp::Constants&>(), + DOC(kp, OpAlgoDispatch, OpAlgoDispatch), py::arg("algorithm"), py::arg("push_consts") = kp::Constants()); - py::class_>(m, "OpMult", py::base()) - .def(py::init>&,const std::shared_ptr&>()); + py::class_>( + m, "OpMult", py::base(), DOC(kp, OpMult)) + .def(py::init>&,const std::shared_ptr&>(), + DOC(kp, OpMult, OpMult)); - py::class_>(m, "Algorithm") - .def("get_tensors", &kp::Algorithm::getTensors) - .def("destroy", &kp::Algorithm::destroy) - .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants) - .def("is_init", &kp::Algorithm::isInit); + py::class_>(m, "Algorithm", DOC(kp, Algorithm, Algorithm)) + .def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors)) + .def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy)) + .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants)) + .def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit)); py::class_>(m, "Tensor", DOC(kp, Tensor)) .def("data", [](kp::Tensor& self) { return py::array(self.data().size(), self.data().data()); - }, "Returns stored data as a new numpy array.") + }, DOC(kp, Tensor, data)) .def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; }, "When only an index is necessary") .def("__setitem__", [](kp::Tensor &self, size_t index, float value) { @@ -91,7 +103,7 @@ PYBIND11_MODULE(kp, m) { const py::buffer_info info = flatdata.request(); const float* ptr = (float*) info.ptr; self.setData(std::vector(ptr, ptr+flatdata.size())); - }, "Overrides the data in the local Tensor memory.") + }, DOC(kp, Tensor, setData)) .def("__iter__", [](kp::Tensor &self) { return py::make_iterator(self.data().begin(), self.data().end()); }, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists @@ -112,35 +124,52 @@ PYBIND11_MODULE(kp, m) { } return reversed; }) - .def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.") - .def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.") - .def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.") - .def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.") - .def("destroy", &kp::Tensor::destroy, "Destroy tensor GPU resources."); + .def("size", &kp::Tensor::size, DOC(kp, Tensor, size)) + .def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size)) + .def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType)) + .def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit)) + .def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy)); - py::class_>(m, "Sequence") - .def("record", [](kp::Sequence& self, std::shared_ptr op) { return self.record(op); }) - .def("eval", [](kp::Sequence& self) { return self.eval(); }) - .def("eval", [](kp::Sequence& self, std::shared_ptr op) { return self.eval(op); }) - .def("eval_async", [](kp::Sequence& self) { return self.eval(); }) - .def("eval_async", [](kp::Sequence& self, std::shared_ptr op) { return self.evalAsync(op); }) - .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); }) - .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); }) - .def("is_recording", &kp::Sequence::isRecording) - .def("is_running", &kp::Sequence::isRunning) - .def("is_init", &kp::Sequence::isInit) - .def("get_timestamps", &kp::Sequence::getTimestamps) - .def("clear", &kp::Sequence::clear) - .def("destroy", &kp::Sequence::destroy); + py::class_>(m, "Sequence", DOC(kp, Sequence)) + .def("record", [](kp::Sequence& self, std::shared_ptr op) { return self.record(op); }, + DOC(kp, Sequence, record)) + .def("eval", [](kp::Sequence& self) { return self.eval(); }, + DOC(kp, Sequence, eval)) + .def("eval", [](kp::Sequence& self, std::shared_ptr op) { return self.eval(op); }, + DOC(kp, Sequence, eval)) + .def("eval_async", [](kp::Sequence& self) { return self.eval(); }, + DOC(kp, Sequence, evalAsync)) + .def("eval_async", [](kp::Sequence& self, std::shared_ptr op) { return self.evalAsync(op); }, + DOC(kp, Sequence, evalAsync)) + .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); }, + DOC(kp, Sequence, evalAwait)) + .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); }, + DOC(kp, Sequence, evalAwait)) + .def("is_recording", &kp::Sequence::isRecording, + DOC(kp, Sequence, isRecording)) + .def("is_running", &kp::Sequence::isRunning, + DOC(kp, Sequence, isRunning)) + .def("is_init", &kp::Sequence::isInit, + DOC(kp, Sequence, isInit)) + .def("clear", &kp::Sequence::clear, + DOC(kp, Sequence, clear)) + .def("rerecord", &kp::Sequence::rerecord, + DOC(kp, Sequence, rerecord)) + .def("get_timestamps", &kp::Sequence::getTimestamps, + DOC(kp, Sequence, getTimestamps)) + .def("destroy", &kp::Sequence::destroy, + DOC(kp, Sequence, destroy)); - py::class_>(m, "Manager") - .def(py::init()) - .def(py::init()) + py::class_>(m, "Manager", DOC(kp, Manager)) + .def(py::init(), DOC(kp, Manager, Manager)) + .def(py::init(), DOC(kp, Manager, Manager_2)) .def(py::init&,const std::vector&>(), + DOC(kp, Manager, Manager_2), py::arg("device") = 0, py::arg("family_queue_indices") = std::vector(), py::arg("desired_extensions") = std::vector()) - .def("sequence", &kp::Manager::sequence, py::arg("queue_index") = 0, py::arg("total_timestamps") = 0) + .def("sequence", &kp::Manager::sequence, DOC(kp, Manager, sequence), + py::arg("queue_index") = 0, py::arg("total_timestamps") = 0) .def("tensor", [np](kp::Manager& self, const py::array_t data, kp::Tensor::TensorTypes tensor_type) { @@ -149,7 +178,7 @@ PYBIND11_MODULE(kp, m) { const float* ptr = (float*) info.ptr; return self.tensor(std::vector(ptr, ptr+flatdata.size()), tensor_type); }, - "Tensor initialisation function with data and tensor type", + DOC(kp, Manager, tensor), py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) .def("algorithm", [](kp::Manager& self, const std::vector>& tensors, @@ -163,8 +192,12 @@ PYBIND11_MODULE(kp, m) { std::vector spirvVec((uint32_t*)data, (uint32_t*)(data + length)); return self.algorithm(tensors, spirvVec, workgroup, spec_consts, push_consts); }, - "Algorithm initialisation function", - py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), py::arg("spec_consts") = kp::Constants(), py::arg("push_consts") = kp::Constants()); + DOC(kp, Manager, algorithm), + py::arg("tensors"), + py::arg("spirv"), + py::arg("workgroup") = kp::Workgroup(), + py::arg("spec_consts") = kp::Constants(), + py::arg("push_consts") = kp::Constants()); #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; From b81896a78062fb53b56ecadadb66936e434879bd Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 6 Mar 2021 16:15:03 +0000 Subject: [PATCH 03/16] Innitial iteration of multiple type tensor --- src/Tensor.cpp | 101 +++---------- src/include/kompute/Tensor.hpp | 268 +++++++++++++++++++++++++++------ 2 files changed, 243 insertions(+), 126 deletions(-) diff --git a/src/Tensor.cpp b/src/Tensor.cpp index f584c07bd..dc254fe83 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -5,17 +5,20 @@ namespace kp { Tensor::Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, - const std::vector& data, + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType, const TensorTypes& tensorType) { KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}", - data.size(), + elementTotalCount, tensorType); this->mPhysicalDevice = physicalDevice; this->mDevice = device; - this->rebuild(data, tensorType); + this->rebuild(data, elementTotalCount, elementMemorySize, dataType, tensorType); } Tensor::~Tensor() @@ -29,11 +32,17 @@ Tensor::~Tensor() } void -Tensor::rebuild(const std::vector& data, TensorTypes tensorType) +Tensor::rebuild(void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType, + TensorTypes tensorType) { - KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size()); + KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount); - this->mData = data; + this->mSize = elementTotalCount; + this->mElementMemorySize = elementMemorySize; + this->mDataType = dataType; this->mTensorType = tensorType; if (this->mPrimaryBuffer || this->mPrimaryMemory) { @@ -43,30 +52,7 @@ Tensor::rebuild(const std::vector& data, TensorTypes tensorType) } this->allocateMemoryCreateGPUResources(); -} - -std::vector& -Tensor::data() -{ - return this->mData; -} - -float& -Tensor::operator[](int index) -{ - return this->mData[index]; -} - -uint64_t -Tensor::memorySize() -{ - return this->size() * sizeof(float); -} - -uint32_t -Tensor::size() -{ - return static_cast(this->mData.size()); + this->rawMapDataIntoHostMemory(data); } Tensor::TensorTypes @@ -81,15 +67,6 @@ Tensor::isInit() return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory; } -void -Tensor::setData(const std::vector& data) -{ - if (data.size() != this->mData.size()) { - throw std::runtime_error( - "Kompute Tensor Cannot set data of different sizes"); - } - this->mData = data; -} void Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, @@ -204,55 +181,13 @@ Tensor::constructDescriptorBufferInfo() void Tensor::mapDataFromHostMemory() { - KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN( - "Kompute Tensor mapping data not supported on storage tensor"); - return; - } - - vk::DeviceSize bufferSize = this->memorySize(); - void* mapped = this->mDevice->mapMemory( - *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); - vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange); - memcpy(this->mData.data(), mapped, bufferSize); - this->mDevice->unmapMemory(*hostVisibleMemory); + KP_LOG_DEBUG("Kompute Tensor mapDataFromHostMemory - SKIPPING"); } void Tensor::mapDataIntoHostMemory() { - - KP_LOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN( - "Kompute Tensor mapping data not supported on storage tensor"); - return; - } - - vk::DeviceSize bufferSize = this->memorySize(); - - void* mapped = this->mDevice->mapMemory( - *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); - memcpy(mapped, this->mData.data(), bufferSize); - vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->flushMappedMemoryRanges(1, &mappedRange); - this->mDevice->unmapMemory(*hostVisibleMemory); + KP_LOG_DEBUG("Kompute Tensor mapDataIntoHostMemory - SKIPPING"); } vk::BufferUsageFlags diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 195af44f4..f2583708d 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -27,6 +27,14 @@ class Tensor eHost = 1, ///< Type is host memory, source and destination eStorage = 2, ///< Type is Device memory (only) }; + enum class TensorDataTypes + { + eBool = 0, + eInt = 1, + eUnsignedInt = 2, + eFloat = 3, + eDouble = 4, + }; /** * Constructor with data provided which would be used to create the @@ -40,7 +48,10 @@ class Tensor */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, - const std::vector& data, + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType = TensorDataTypes::eFloat, const TensorTypes& tensorType = TensorTypes::eDevice); /** @@ -49,6 +60,48 @@ class Tensor */ ~Tensor(); + /** + * Returns the size/magnitude of the Tensor, which will be the total number + * of elements across all dimensions + * + * @return Unsigned integer representing the total number of elements + */ + // TODO: move to cpp + virtual uint32_t size() { + return this->mElementMemorySize; + } + + // TODO: move to cpp + virtual uint32_t memorySize() { + return this->mSize * this->mElementMemorySize; + } + + /** + * Retrieve the underlying data type of the Tensor + * + * @return Data type of tensor of type kp::Tensor::TensorDataTypes + */ + virtual TensorDataTypes dataType() { + return this->mDataType; + } + + /** + * Maps data from the Host Visible GPU memory into the data vector. It + * requires the Tensor to be of staging type for it to work. + */ + virtual void mapDataFromHostMemory(); + /** + * Maps data from the data vector into the Host Visible GPU memory. It + * requires the tensor to be of staging type for it to work. + */ + virtual void mapDataIntoHostMemory(); + + // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView + // TODO: move to cpp + void getRawData(void* data) { + this->rawMapDataFromHostMemory(data); + } + /** * Function to trigger reinitialisation of the tensor buffer and memory with * new data as well as new potential device type. @@ -56,7 +109,10 @@ class Tensor * @param data Vector of data to use to initialise vector from * @param tensorType The type to use for the tensor */ - void rebuild(const std::vector& data, + void rebuild(void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType = TensorDataTypes::eFloat, TensorTypes tensorType = TensorTypes::eDevice); /** @@ -71,32 +127,6 @@ class Tensor */ bool isInit(); - /** - * Returns the vector of data currently contained by the Tensor. It is - * important to ensure that there is no out-of-sync data with the GPU - * memory. - * - * @return Reference to vector of elements representing the data in the - * tensor. - */ - std::vector& data(); - /** - * Overrides the subscript operator to expose the underlying data's - * subscript operator which in this case would be its underlying - * vector's. - * - * @param i The index where the element will be returned from. - * @return Returns the element in the position requested. - */ - float& operator[](int index); - /** - * Returns the size/magnitude of the Tensor, which will be the total number - * of elements across all dimensions - * - * @return Unsigned integer representing the total number of elements - */ - uint32_t size(); - /** * Retrieve the tensor type of the Tensor * @@ -108,7 +138,15 @@ class Tensor * Sets / resets the vector data of the tensor. This function does not * perform any copies into GPU memory and is only performed on the host. */ - void setData(const std::vector& data); + void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) { + if (elementTotalCount * elementMemorySize != this->memorySize()) { + throw std::runtime_error( + "Kompute Tensor Cannot set data of different sizes"); + } + this->mSize = elementTotalCount; + this->mElementMemorySize = elementMemorySize; + this->rawMapDataIntoHostMemory(data); + } /** * Records a copy from the memory of the tensor provided to the current @@ -172,16 +210,6 @@ class Tensor * @return Descriptor buffer info with own buffer */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); - /** - * Maps data from the Host Visible GPU memory into the data vector. It - * requires the Tensor to be of staging type for it to work. - */ - void mapDataFromHostMemory(); - /** - * Maps data from the data vector into the Host Visible GPU memory. It - * requires the tensor to be of staging type for it to work. - */ - void mapDataIntoHostMemory(); private: // -------------- NEVER OWNED RESOURCES @@ -199,9 +227,10 @@ class Tensor bool mFreeStagingMemory = false; // -------------- ALWAYS OWNED RESOURCES - std::vector mData; - - TensorTypes mTensorType = TensorTypes::eDevice; + TensorTypes mTensorType; + TensorDataTypes mDataType; + uint32_t mSize; + uint32_t mElementMemorySize; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, @@ -221,7 +250,160 @@ class Tensor vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); - uint64_t memorySize(); + + void rawMapDataFromHostMemory(void* data) { + + KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + void* mapped = this->mDevice->mapMemory( + *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); + this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange); + memcpy(data, mapped, bufferSize); + this->mDevice->unmapMemory(*hostVisibleMemory); + } + + void rawMapDataIntoHostMemory(void* data) { + KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + + void* mapped = this->mDevice->mapMemory( + *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + memcpy(mapped, data, bufferSize); + vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); + this->mDevice->flushMappedMemoryRanges(1, &mappedRange); + this->mDevice->unmapMemory(*hostVisibleMemory); + } }; +// TODO: Limit T to be only float, bool, double, etc +template +class TensorView: public Tensor +{ + public: + TensorView(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType = TensorTypes::eDevice); + + ~TensorView(); + + void rebuild(const std::vector& data, + TensorTypes tensorType = TensorTypes::eDevice) { + + this->mData = data; + Tensor::rebuild(data.data(), data.size(), sizeof(T), this->dataType(), tensorType); + } + + std::vector& data() { + return this->mData; + } + + T& operator[](int index) { + return this->mData[index]; + } + + void setData(const std::vector& data) { + + if (data.size() != this->mData.size()) { + throw std::runtime_error( + "Kompute TensorView Cannot set data of different sizes"); + } + + this->mData = data; + + this->setRawData(this->mData.data(), this->mData.size(), sizeof(T), this->dataType()); + } + + TensorDataTypes dataType() override; + + uint32_t size() override { + return this->mData->size(); + } + + uint32_t memorySize() override { + return this->mData->size() * sizeof(T); + } + + /** + * Maps data from the Host Visible GPU memory into the data vector. It + * requires the Tensor to be of staging type for it to work. + */ + void mapDataFromHostMemory() override { + KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data"); + + this->rawMapDataFromHostMemory(this->mData.data()); + } + /** + * Maps data from the data vector into the Host Visible GPU memory. It + * requires the tensor to be of staging type for it to work. + */ + void mapDataIntoHostMemory() override { + KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data"); + + this->rawMapDataIntoHostMemory(this->mData.data()); + } + + private: + // -------------- ALWAYS OWNED RESOURCES + std::vector mData; + +}; + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eBool; +} + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eInt; +} + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eUnsignedInt; +} + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eFloat; +} + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eDouble; +} + } // End namespace kp From ad18c2e54698e1496347727ce61d46a8e9562e7b Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 6 Mar 2021 17:25:35 +0000 Subject: [PATCH 04/16] Initial implementation of tensor working compiling --- examples/array_multiplication/CMakeLists.txt | 6 +- examples/array_multiplication/README.md | 7 +- examples/array_multiplication/src/Main.cpp | 11 +- examples/logistic_regression/CMakeLists.txt | 8 +- examples/logistic_regression/README.md | 7 +- examples/logistic_regression/src/Main.cpp | 18 +- single_include/kompute/Kompute.hpp | 294 +++++++++++++++---- src/Manager.cpp | 15 - src/OpTensorCopy.cpp | 17 +- src/Tensor.cpp | 42 ++- src/include/kompute/Manager.hpp | 19 +- src/include/kompute/Tensor.hpp | 183 ++++++------ 12 files changed, 417 insertions(+), 210 deletions(-) diff --git a/examples/array_multiplication/CMakeLists.txt b/examples/array_multiplication/CMakeLists.txt index 0b648382e..bfc4c1c79 100644 --- a/examples/array_multiplication/CMakeLists.txt +++ b/examples/array_multiplication/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.17.0) +cmake_minimum_required(VERSION 3.4.1) project(kompute_array_mult VERSION 0.1.0) set(CMAKE_CXX_STANDARD 14) @@ -23,10 +23,6 @@ endif() find_package(Vulkan REQUIRED) -if(KOMPUTE_OPT_ENABLE_SPDLOG) - find_package(spdlog REQUIRED) -endif() - add_executable(kompute_array_mult src/Main.cpp) diff --git a/examples/array_multiplication/README.md b/examples/array_multiplication/README.md index 931c7d639..d4082c713 100644 --- a/examples/array_multiplication/README.md +++ b/examples/array_multiplication/README.md @@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to To build you just need to run the cmake command in this folder as follows: ``` -cmake \ - -Bbuild +cmake -Bbuild/ \ + -DCMAKE_BUILD_TYPE=Debug \ + -DKOMPUTE_OPT_INSTALL=0 \ + -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \ + -DKOMPUTE_OPT_ENABLE_SPDLOG=1 ``` You can pass the following optional parameters based on your desired configuration: diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index acb76898c..dacc67f89 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -7,16 +7,11 @@ int main() { -#if KOMPUTE_ENABLE_SPDLOG - spdlog::set_level( - static_cast(SPDLOG_ACTIVE_LEVEL)); -#endif - kp::Manager mgr; - auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 }); - auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 }); - auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 }); + auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 }); + auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 }); + auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 }); std::string shader(R"( // The version to use diff --git a/examples/logistic_regression/CMakeLists.txt b/examples/logistic_regression/CMakeLists.txt index f918bbf21..8c8e0eb8f 100644 --- a/examples/logistic_regression/CMakeLists.txt +++ b/examples/logistic_regression/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.17.0) +cmake_minimum_required(VERSION 3.4.1) project(kompute_linear_reg VERSION 0.1.0) set(CMAKE_CXX_STANDARD 14) @@ -23,10 +23,6 @@ endif() find_package(Vulkan REQUIRED) -if(KOMPUTE_OPT_ENABLE_SPDLOG) - find_package(spdlog REQUIRED) -endif() - add_executable(kompute_linear_reg src/Main.cpp) @@ -39,7 +35,7 @@ include_directories( ../../single_include/) if(KOMPUTE_OPT_ENABLE_SPDLOG) - target_link_libraries(kompute_array_mult + target_link_libraries(kompute_linear_reg spdlog::spdlog) endif() diff --git a/examples/logistic_regression/README.md b/examples/logistic_regression/README.md index 0de7ee30a..342bbfca1 100644 --- a/examples/logistic_regression/README.md +++ b/examples/logistic_regression/README.md @@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to To build you just need to run the cmake command in this folder as follows: ``` -cmake \ - -Bbuild +cmake -Bbuild/ \ + -DCMAKE_BUILD_TYPE=Debug \ + -DKOMPUTE_OPT_INSTALL=0 \ + -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \ + -DKOMPUTE_OPT_ENABLE_SPDLOG=1 ``` You can pass the following optional parameters based on your desired configuration: diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index c435575e2..3b6ec11e1 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -17,19 +17,19 @@ int main() kp::Manager mgr; - std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); - std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); + auto xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + auto xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); + auto y = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); - std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto wIn = mgr.tensor({ 0.001, 0.001 }); + auto wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr bIn = mgr.tensor({ 0 }); - std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto bIn = mgr.tensor({ 0 }); + auto bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 593390dbe..41e9434f8 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -762,7 +762,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compileSources( + static std::vector compile_sources( const std::vector& sources, const std::vector& files = {}, const std::string& entryPoint = "main", @@ -783,7 +783,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compileSource( + static std::vector compile_source( const std::string& source, const std::string& entryPoint = "main", std::vector> definitions = {}, @@ -818,6 +818,14 @@ class Tensor eHost = 1, ///< Type is host memory, source and destination eStorage = 2, ///< Type is Device memory (only) }; + enum class TensorDataTypes + { + eBool = 0, + eInt = 1, + eUnsignedInt = 2, + eFloat = 3, + eDouble = 4, + }; /** * Constructor with data provided which would be used to create the @@ -831,14 +839,78 @@ class Tensor */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, - const std::vector& data, + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType, const TensorTypes& tensorType = TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they * have been provided externally. */ - ~Tensor(); + virtual ~Tensor(); + + /** + * Returns the size/magnitude of the Tensor, which will be the total number + * of elements across all dimensions + * + * @return Unsigned integer representing the total number of elements + */ + // TODO: move to cpp + virtual uint32_t size() { + return this->mSize; + } + + // TODO: move to cpp + virtual uint32_t dataTypeMemorySize() { + return this->mDataTypeMemorySize; + } + + // TODO: move to cpp + virtual uint32_t memorySize() { + return this->mSize * this->mDataTypeMemorySize; + } + + /** + * Retrieve the underlying data type of the Tensor + * + * @return Data type of tensor of type kp::Tensor::TensorDataTypes + */ + virtual TensorDataTypes dataType() { + return this->mDataType; + } + + /** + * Maps data from the Host Visible GPU memory into the data vector. It + * requires the Tensor to be of staging type for it to work. + */ + virtual void mapDataFromHostMemory(); + /** + * Maps data from the data vector into the Host Visible GPU memory. It + * requires the tensor to be of staging type for it to work. + */ + virtual void mapDataIntoHostMemory(); + + // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView + // TODO: move to cpp + virtual void getRawData(void* data) { + this->rawMapDataFromHostMemory(data); + } + + /** + * Sets / resets the vector data of the tensor. This function does not + * perform any copies into GPU memory and is only performed on the host. + */ + virtual void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) { + if (elementTotalCount * elementMemorySize != this->memorySize()) { + throw std::runtime_error( + "Kompute Tensor Cannot set data of different sizes"); + } + this->mSize = elementTotalCount; + this->mDataTypeMemorySize = elementMemorySize; + this->rawMapDataIntoHostMemory(data); + } /** * Function to trigger reinitialisation of the tensor buffer and memory with @@ -847,8 +919,9 @@ class Tensor * @param data Vector of data to use to initialise vector from * @param tensorType The type to use for the tensor */ - void rebuild(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice); + void rebuild(void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -862,32 +935,6 @@ class Tensor */ bool isInit(); - /** - * Returns the vector of data currently contained by the Tensor. It is - * important to ensure that there is no out-of-sync data with the GPU - * memory. - * - * @return Reference to vector of elements representing the data in the - * tensor. - */ - std::vector& data(); - /** - * Overrides the subscript operator to expose the underlying data's - * subscript operator which in this case would be its underlying - * vector's. - * - * @param i The index where the element will be returned from. - * @return Returns the element in the position requested. - */ - float& operator[](int index); - /** - * Returns the size/magnitude of the Tensor, which will be the total number - * of elements across all dimensions - * - * @return Unsigned integer representing the total number of elements - */ - uint32_t size(); - /** * Retrieve the tensor type of the Tensor * @@ -895,12 +942,6 @@ class Tensor */ TensorTypes tensorType(); - /** - * Sets / resets the vector data of the tensor. This function does not - * perform any copies into GPU memory and is only performed on the host. - */ - void setData(const std::vector& data); - /** * Records a copy from the memory of the tensor provided to the current * thensor. This is intended to pass memory into a processing, to perform @@ -963,17 +1004,57 @@ class Tensor * @return Descriptor buffer info with own buffer */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); - /** - * Maps data from the Host Visible GPU memory into the data vector. It - * requires the Tensor to be of staging type for it to work. - */ - void mapDataFromHostMemory(); - /** - * Maps data from the data vector into the Host Visible GPU memory. It - * requires the tensor to be of staging type for it to work. - */ - void mapDataIntoHostMemory(); + protected: + void rawMapDataFromHostMemory(void* data) { + + KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + void* mapped = this->mDevice->mapMemory( + *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); + this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange); + memcpy(data, mapped, bufferSize); + this->mDevice->unmapMemory(*hostVisibleMemory); + } + + void rawMapDataIntoHostMemory(void* data) { + KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + + void* mapped = this->mDevice->mapMemory( + *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + memcpy(mapped, data, bufferSize); + vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); + this->mDevice->flushMappedMemoryRanges(1, &mappedRange); + this->mDevice->unmapMemory(*hostVisibleMemory); + } private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; @@ -990,9 +1071,10 @@ class Tensor bool mFreeStagingMemory = false; // -------------- ALWAYS OWNED RESOURCES - std::vector mData; - - TensorTypes mTensorType = TensorTypes::eDevice; + TensorTypes mTensorType; + TensorDataTypes mDataType; + uint32_t mSize; + uint32_t mDataTypeMemorySize; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, @@ -1012,9 +1094,98 @@ class Tensor vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); - uint64_t memorySize(); + }; +// TODO: Limit T to be only float, bool, double, etc +template +class TensorView: public Tensor +{ + public: + TensorView(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType = TensorTypes::eDevice) + : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType()) + { + + } + + ~TensorView() { + + } + + void rebuild(const std::vector& data, + TensorTypes tensorType = TensorTypes::eDevice) { + + this->mData = data; + Tensor::rebuild(data.data(), data.size(), sizeof(T)); + } + + std::vector& data() { + return this->mData; + } + + T& operator[](int index) { + return this->mData[index]; + } + + void setData(const std::vector& data) { + + if (data.size() != this->mData.size()) { + throw std::runtime_error( + "Kompute TensorView Cannot set data of different sizes"); + } + + this->mData = data; + + Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); + } + + void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override + { + assert(elementMemorySize == sizeof(T)); + + this->mData = { (T*)data, ((T*)data) + elementTotalCount }; + Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); + } + + TensorDataTypes dataType() override; + + uint32_t size() override { + return this->mData.size(); + } + + uint32_t memorySize() override { + return this->mData.size() * sizeof(T); + } + + /** + * Maps data from the Host Visible GPU memory into the data vector. It + * requires the Tensor to be of staging type for it to work. + */ + void mapDataFromHostMemory() override { + KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data"); + + this->rawMapDataFromHostMemory(this->mData.data()); + } + /** + * Maps data from the data vector into the Host Visible GPU memory. It + * requires the tensor to be of staging type for it to work. + */ + void mapDataIntoHostMemory() override { + KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data"); + + this->rawMapDataIntoHostMemory(this->mData.data()); + } + + private: + // -------------- ALWAYS OWNED RESOURCES + std::vector mData; + +}; + + } // End namespace kp namespace kp { @@ -1883,9 +2054,22 @@ class Manager * @param tensorType The type of tensor to initialize * @returns Shared pointer with initialised tensor */ - std::shared_ptr tensor( - const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice); + template + std::shared_ptr> tensor( + const std::vector& data, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); + + std::shared_ptr> tensor{ new kp::TensorView( + this->mPhysicalDevice, this->mDevice, data, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } /** * Create a managed algorithm that will be destroyed by this manager diff --git a/src/Manager.cpp b/src/Manager.cpp index e3bdbb2d9..5d6bf4cd4 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -395,21 +395,6 @@ Manager::createDevice(const std::vector& familyQueueIndices, KP_LOG_DEBUG("Kompute Manager compute queue obtained"); } -std::shared_ptr -Manager::tensor(const std::vector& data, Tensor::TensorTypes tensorType) -{ - KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - - std::shared_ptr tensor{ new kp::Tensor( - this->mPhysicalDevice, this->mDevice, data, tensorType) }; - - if (this->mManageResources) { - this->mManagedTensors.push_back(tensor); - } - - return tensor; -} - std::shared_ptr Manager::algorithm(const std::vector>& tensors, const std::vector& spirv, diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 6950a4cd2..16e3017e9 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -13,6 +13,14 @@ OpTensorCopy::OpTensorCopy(const std::vector>& tensors) throw std::runtime_error( "Kompute OpTensorCopy called with less than 2 tensor"); } + + kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType(); + for (const std::shared_ptr& tensor : tensors) { + if (tensor->dataType() != dataType) { + throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}", + dataType, tensor->dataType())); + } + } } OpTensorCopy::~OpTensorCopy() @@ -43,9 +51,16 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorCopy postEval called"); + // TODO: Simplify with a copyRawData + uint32_t size = this->mTensors[0]->size(); + uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize(); + uint32_t memSize = size * dataTypeMemSize; + void* data = operator new(memSize); + this->mTensors[0]->getRawData(data); + // Copy the data from the first tensor into all the tensors for (size_t i = 1; i < this->mTensors.size(); i++) { - this->mTensors[i]->setData(this->mTensors[0]->data()); + this->mTensors[i]->setRawData(data, size, dataTypeMemSize); } } diff --git a/src/Tensor.cpp b/src/Tensor.cpp index dc254fe83..4f188d5af 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -17,8 +17,10 @@ Tensor::Tensor(std::shared_ptr physicalDevice, this->mPhysicalDevice = physicalDevice; this->mDevice = device; + this->mDataType = dataType; + this->mTensorType = tensorType; - this->rebuild(data, elementTotalCount, elementMemorySize, dataType, tensorType); + this->rebuild(data, elementTotalCount, elementMemorySize); } Tensor::~Tensor() @@ -34,16 +36,12 @@ Tensor::~Tensor() void Tensor::rebuild(void* data, uint32_t elementTotalCount, - uint32_t elementMemorySize, - const TensorDataTypes& dataType, - TensorTypes tensorType) + uint32_t elementMemorySize) { KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount); this->mSize = elementTotalCount; - this->mElementMemorySize = elementMemorySize; - this->mDataType = dataType; - this->mTensorType = tensorType; + this->mDataTypeMemorySize = elementMemorySize; if (this->mPrimaryBuffer || this->mPrimaryMemory) { KP_LOG_DEBUG( @@ -439,4 +437,34 @@ Tensor::destroy() KP_LOG_DEBUG("Kompute Tensor successful destroy()"); } +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eBool; +} + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eInt; +} + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eUnsignedInt; +} + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eFloat; +} + +template<> +Tensor::TensorDataTypes +TensorView::dataType() { + return Tensor::TensorDataTypes::eDouble; +} + } diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index d9c6ddf3e..d27bccacc 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -74,9 +74,22 @@ class Manager * @param tensorType The type of tensor to initialize * @returns Shared pointer with initialised tensor */ - std::shared_ptr tensor( - const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice); + template + std::shared_ptr> tensor( + const std::vector& data, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); + + std::shared_ptr> tensor{ new kp::TensorView( + this->mPhysicalDevice, this->mDevice, data, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } /** * Create a managed algorithm that will be destroyed by this manager diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index f2583708d..03e52d43d 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -51,14 +51,14 @@ class Tensor void* data, uint32_t elementTotalCount, uint32_t elementMemorySize, - const TensorDataTypes& dataType = TensorDataTypes::eFloat, + const TensorDataTypes& dataType, const TensorTypes& tensorType = TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they * have been provided externally. */ - ~Tensor(); + virtual ~Tensor(); /** * Returns the size/magnitude of the Tensor, which will be the total number @@ -68,12 +68,17 @@ class Tensor */ // TODO: move to cpp virtual uint32_t size() { - return this->mElementMemorySize; + return this->mSize; + } + + // TODO: move to cpp + virtual uint32_t dataTypeMemorySize() { + return this->mDataTypeMemorySize; } // TODO: move to cpp virtual uint32_t memorySize() { - return this->mSize * this->mElementMemorySize; + return this->mSize * this->mDataTypeMemorySize; } /** @@ -98,10 +103,24 @@ class Tensor // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView // TODO: move to cpp - void getRawData(void* data) { + virtual void getRawData(void* data) { this->rawMapDataFromHostMemory(data); } + /** + * Sets / resets the vector data of the tensor. This function does not + * perform any copies into GPU memory and is only performed on the host. + */ + virtual void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) { + if (elementTotalCount * elementMemorySize != this->memorySize()) { + throw std::runtime_error( + "Kompute Tensor Cannot set data of different sizes"); + } + this->mSize = elementTotalCount; + this->mDataTypeMemorySize = elementMemorySize; + this->rawMapDataIntoHostMemory(data); + } + /** * Function to trigger reinitialisation of the tensor buffer and memory with * new data as well as new potential device type. @@ -111,9 +130,7 @@ class Tensor */ void rebuild(void* data, uint32_t elementTotalCount, - uint32_t elementMemorySize, - const TensorDataTypes& dataType = TensorDataTypes::eFloat, - TensorTypes tensorType = TensorTypes::eDevice); + uint32_t elementMemorySize); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -134,19 +151,6 @@ class Tensor */ TensorTypes tensorType(); - /** - * Sets / resets the vector data of the tensor. This function does not - * perform any copies into GPU memory and is only performed on the host. - */ - void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) { - if (elementTotalCount * elementMemorySize != this->memorySize()) { - throw std::runtime_error( - "Kompute Tensor Cannot set data of different sizes"); - } - this->mSize = elementTotalCount; - this->mElementMemorySize = elementMemorySize; - this->rawMapDataIntoHostMemory(data); - } /** * Records a copy from the memory of the tensor provided to the current @@ -211,46 +215,7 @@ class Tensor */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); - private: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mPhysicalDevice; - std::shared_ptr mDevice; - - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mPrimaryBuffer; - bool mFreePrimaryBuffer = false; - std::shared_ptr mStagingBuffer; - bool mFreeStagingBuffer = false; - std::shared_ptr mPrimaryMemory; - bool mFreePrimaryMemory = false; - std::shared_ptr mStagingMemory; - bool mFreeStagingMemory = false; - - // -------------- ALWAYS OWNED RESOURCES - TensorTypes mTensorType; - TensorDataTypes mDataType; - uint32_t mSize; - uint32_t mElementMemorySize; - - void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer - void createBuffer(std::shared_ptr buffer, - vk::BufferUsageFlags bufferUsageFlags); - void allocateBindMemory(std::shared_ptr buffer, - std::shared_ptr memory, - vk::MemoryPropertyFlags memoryPropertyFlags); - void recordCopyBuffer(const vk::CommandBuffer& commandBuffer, - std::shared_ptr bufferFrom, - std::shared_ptr bufferTo, - vk::DeviceSize bufferSize, - vk::BufferCopy copyRegion, - bool createBarrier); - - // Private util functions - vk::BufferUsageFlags getPrimaryBufferUsageFlags(); - vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); - vk::BufferUsageFlags getStagingBufferUsageFlags(); - vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); - + protected: void rawMapDataFromHostMemory(void* data) { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); @@ -300,6 +265,46 @@ class Tensor this->mDevice->flushMappedMemoryRanges(1, &mappedRange); this->mDevice->unmapMemory(*hostVisibleMemory); } + private: + // -------------- NEVER OWNED RESOURCES + std::shared_ptr mPhysicalDevice; + std::shared_ptr mDevice; + + // -------------- OPTIONALLY OWNED RESOURCES + std::shared_ptr mPrimaryBuffer; + bool mFreePrimaryBuffer = false; + std::shared_ptr mStagingBuffer; + bool mFreeStagingBuffer = false; + std::shared_ptr mPrimaryMemory; + bool mFreePrimaryMemory = false; + std::shared_ptr mStagingMemory; + bool mFreeStagingMemory = false; + + // -------------- ALWAYS OWNED RESOURCES + TensorTypes mTensorType; + TensorDataTypes mDataType; + uint32_t mSize; + uint32_t mDataTypeMemorySize; + + void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer + void createBuffer(std::shared_ptr buffer, + vk::BufferUsageFlags bufferUsageFlags); + void allocateBindMemory(std::shared_ptr buffer, + std::shared_ptr memory, + vk::MemoryPropertyFlags memoryPropertyFlags); + void recordCopyBuffer(const vk::CommandBuffer& commandBuffer, + std::shared_ptr bufferFrom, + std::shared_ptr bufferTo, + vk::DeviceSize bufferSize, + vk::BufferCopy copyRegion, + bool createBarrier); + + // Private util functions + vk::BufferUsageFlags getPrimaryBufferUsageFlags(); + vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); + vk::BufferUsageFlags getStagingBufferUsageFlags(); + vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); + }; // TODO: Limit T to be only float, bool, double, etc @@ -310,15 +315,21 @@ class TensorView: public Tensor TensorView(std::shared_ptr physicalDevice, std::shared_ptr device, const std::vector& data, - const TensorTypes& tensorType = TensorTypes::eDevice); + const TensorTypes& tensorType = TensorTypes::eDevice) + : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType()) + { - ~TensorView(); + } + + ~TensorView() { + + } void rebuild(const std::vector& data, TensorTypes tensorType = TensorTypes::eDevice) { this->mData = data; - Tensor::rebuild(data.data(), data.size(), sizeof(T), this->dataType(), tensorType); + Tensor::rebuild(data.data(), data.size(), sizeof(T)); } std::vector& data() { @@ -338,17 +349,25 @@ class TensorView: public Tensor this->mData = data; - this->setRawData(this->mData.data(), this->mData.size(), sizeof(T), this->dataType()); + Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); + } + + void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override + { + assert(elementMemorySize == sizeof(T)); + + this->mData = { (T*)data, ((T*)data) + elementTotalCount }; + Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); } TensorDataTypes dataType() override; uint32_t size() override { - return this->mData->size(); + return this->mData.size(); } uint32_t memorySize() override { - return this->mData->size() * sizeof(T); + return this->mData.size() * sizeof(T); } /** @@ -376,34 +395,4 @@ class TensorView: public Tensor }; -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eBool; -} - -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eInt; -} - -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eUnsignedInt; -} - -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eFloat; -} - -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eDouble; -} - } // End namespace kp From 956883e0cdee22541b284892ff3f53efcf562cbf Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 6 Mar 2021 17:44:17 +0000 Subject: [PATCH 05/16] Working iteration of kompute tensor with multiplee types --- examples/array_multiplication/src/Main.cpp | 8 +++++++- single_include/kompute/Kompute.hpp | 15 +++++++++++---- src/Tensor.cpp | 1 + src/include/kompute/Tensor.hpp | 14 +++++++++++--- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index dacc67f89..812a5039f 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -7,6 +7,11 @@ int main() { +#if KOMPUTE_ENABLE_SPDLOG + spdlog::set_level( + static_cast(SPDLOG_ACTIVE_LEVEL)); +#endif + kp::Manager mgr; auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 }); @@ -39,7 +44,8 @@ int main() mgr.sequence() ->record(params) ->record(algo) - ->record(params); + ->record(params) + ->eval(); // prints "Output { 0 4 12 }" std::cout<< "Output: { "; diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 41e9434f8..989f58c20 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1108,16 +1108,17 @@ class TensorView: public Tensor const TensorTypes& tensorType = TensorTypes::eDevice) : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType()) { - + KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size()); + this->mData = data; } ~TensorView() { - + KP_LOG_DEBUG("Kompute TensorView destructor"); } void rebuild(const std::vector& data, TensorTypes tensorType = TensorTypes::eDevice) { - + KP_LOG_DEBUG("Kompute TensorView creating with data size {}", data.size()); this->mData = data; Tensor::rebuild(data.data(), data.size(), sizeof(T)); } @@ -1131,6 +1132,7 @@ class TensorView: public Tensor } void setData(const std::vector& data) { + KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size()); if (data.size() != this->mData.size()) { throw std::runtime_error( @@ -1144,6 +1146,8 @@ class TensorView: public Tensor void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override { + KP_LOG_DEBUG("Kompute TensorView setRawData with data size {}", elementTotalCount); + assert(elementMemorySize == sizeof(T)); this->mData = { (T*)data, ((T*)data) + elementTotalCount }; @@ -1153,10 +1157,14 @@ class TensorView: public Tensor TensorDataTypes dataType() override; uint32_t size() override { + KP_LOG_DEBUG("Kompute TensorView retrieving size: {}", this->mData.size()); + return this->mData.size(); } uint32_t memorySize() override { + KP_LOG_DEBUG("Kompute TensorView retrieving memory size: {}", this->mData.size() * sizeof(T)); + return this->mData.size() * sizeof(T); } @@ -1185,7 +1193,6 @@ class TensorView: public Tensor }; - } // End namespace kp namespace kp { diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 4f188d5af..d3225987e 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -170,6 +170,7 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::DescriptorBufferInfo Tensor::constructDescriptorBufferInfo() { + KP_LOG_WARN("Kompute Tensor construct descriptor buffer info size {}", this->memorySize()); vk::DeviceSize bufferSize = this->memorySize(); return vk::DescriptorBufferInfo(*this->mPrimaryBuffer, 0, // offset diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 03e52d43d..6af4682d6 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -318,16 +318,17 @@ class TensorView: public Tensor const TensorTypes& tensorType = TensorTypes::eDevice) : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType()) { - + KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size()); + this->mData = data; } ~TensorView() { - + KP_LOG_DEBUG("Kompute TensorView destructor"); } void rebuild(const std::vector& data, TensorTypes tensorType = TensorTypes::eDevice) { - + KP_LOG_DEBUG("Kompute TensorView creating with data size {}", data.size()); this->mData = data; Tensor::rebuild(data.data(), data.size(), sizeof(T)); } @@ -341,6 +342,7 @@ class TensorView: public Tensor } void setData(const std::vector& data) { + KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size()); if (data.size() != this->mData.size()) { throw std::runtime_error( @@ -354,6 +356,8 @@ class TensorView: public Tensor void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override { + KP_LOG_DEBUG("Kompute TensorView setRawData with data size {}", elementTotalCount); + assert(elementMemorySize == sizeof(T)); this->mData = { (T*)data, ((T*)data) + elementTotalCount }; @@ -363,10 +367,14 @@ class TensorView: public Tensor TensorDataTypes dataType() override; uint32_t size() override { + KP_LOG_DEBUG("Kompute TensorView retrieving size: {}", this->mData.size()); + return this->mData.size(); } uint32_t memorySize() override { + KP_LOG_DEBUG("Kompute TensorView retrieving memory size: {}", this->mData.size() * sizeof(T)); + return this->mData.size() * sizeof(T); } From cf7d46cd23a0e76cbc181eb58dbb059a73f22ee2 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 6 Mar 2021 19:42:41 +0000 Subject: [PATCH 06/16] Initial simpification of interface implementation --- single_include/kompute/Kompute.hpp | 215 ++++++++-------------------- src/OpTensorCopy.cpp | 11 +- src/OpTensorSyncDevice.cpp | 6 - src/OpTensorSyncLocal.cpp | 5 - src/Tensor.cpp | 49 +------ src/include/kompute/Tensor.hpp | 216 ++++++++--------------------- 6 files changed, 135 insertions(+), 367 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 989f58c20..496e6f198 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -851,67 +851,6 @@ class Tensor */ virtual ~Tensor(); - /** - * Returns the size/magnitude of the Tensor, which will be the total number - * of elements across all dimensions - * - * @return Unsigned integer representing the total number of elements - */ - // TODO: move to cpp - virtual uint32_t size() { - return this->mSize; - } - - // TODO: move to cpp - virtual uint32_t dataTypeMemorySize() { - return this->mDataTypeMemorySize; - } - - // TODO: move to cpp - virtual uint32_t memorySize() { - return this->mSize * this->mDataTypeMemorySize; - } - - /** - * Retrieve the underlying data type of the Tensor - * - * @return Data type of tensor of type kp::Tensor::TensorDataTypes - */ - virtual TensorDataTypes dataType() { - return this->mDataType; - } - - /** - * Maps data from the Host Visible GPU memory into the data vector. It - * requires the Tensor to be of staging type for it to work. - */ - virtual void mapDataFromHostMemory(); - /** - * Maps data from the data vector into the Host Visible GPU memory. It - * requires the tensor to be of staging type for it to work. - */ - virtual void mapDataIntoHostMemory(); - - // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView - // TODO: move to cpp - virtual void getRawData(void* data) { - this->rawMapDataFromHostMemory(data); - } - - /** - * Sets / resets the vector data of the tensor. This function does not - * perform any copies into GPU memory and is only performed on the host. - */ - virtual void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) { - if (elementTotalCount * elementMemorySize != this->memorySize()) { - throw std::runtime_error( - "Kompute Tensor Cannot set data of different sizes"); - } - this->mSize = elementTotalCount; - this->mDataTypeMemorySize = elementMemorySize; - this->rawMapDataIntoHostMemory(data); - } - /** * Function to trigger reinitialisation of the tensor buffer and memory with * new data as well as new potential device type. @@ -1005,8 +944,53 @@ class Tensor */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); - protected: - void rawMapDataFromHostMemory(void* data) { + /** + * Returns the size/magnitude of the Tensor, which will be the total number + * of elements across all dimensions + * + * @return Unsigned integer representing the total number of elements + */ + // TODO: move to cpp + uint32_t size() { + return this->mSize; + } + + // TODO: move to cpp + uint32_t dataTypeMemorySize() { + return this->mDataTypeMemorySize; + } + + // TODO: move to cpp + uint32_t memorySize() { + return this->mSize * this->mDataTypeMemorySize; + } + + /** + * Retrieve the underlying data type of the Tensor + * + * @return Data type of tensor of type kp::Tensor::TensorDataTypes + */ + TensorDataTypes dataType() { + return this->mDataType; + } + + // TODO: move to cpp + const void* getRawData() { + return this->mRawData; + } + + /** + * Sets / resets the vector data of the tensor. This function does not + * perform any copies into GPU memory and is only performed on the host. + */ + void setRawData(const void* data) + { + // Copy data + memcpy(this->mRawData, data, this->memorySize()); + } + + private: + void rawMapData() { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); @@ -1023,39 +1007,12 @@ class Tensor } vk::DeviceSize bufferSize = this->memorySize(); - void* mapped = this->mDevice->mapMemory( + // Given we request coherent host memory we don't need to invalidate / flush + this->mRawData = this->mDevice->mapMemory( *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange); - memcpy(data, mapped, bufferSize); - this->mDevice->unmapMemory(*hostVisibleMemory); } - void rawMapDataIntoHostMemory(void* data) { - KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN( - "Kompute Tensor mapping data not supported on storage tensor"); - return; - } - - vk::DeviceSize bufferSize = this->memorySize(); - - void* mapped = this->mDevice->mapMemory( - *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); - memcpy(mapped, data, bufferSize); - vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->flushMappedMemoryRanges(1, &mappedRange); - this->mDevice->unmapMemory(*hostVisibleMemory); - } - private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; @@ -1075,6 +1032,7 @@ class Tensor TensorDataTypes mDataType; uint32_t mSize; uint32_t mDataTypeMemorySize; + void* mRawData; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, @@ -1106,91 +1064,40 @@ class TensorView: public Tensor std::shared_ptr device, const std::vector& data, const TensorTypes& tensorType = TensorTypes::eDevice) - : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType()) + : Tensor(physicalDevice, + device, + (void*)data.data(), + data.size(), + sizeof(T), + this->dataType()) { KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size()); - this->mData = data; } ~TensorView() { KP_LOG_DEBUG("Kompute TensorView destructor"); } - void rebuild(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice) { - KP_LOG_DEBUG("Kompute TensorView creating with data size {}", data.size()); - this->mData = data; - Tensor::rebuild(data.data(), data.size(), sizeof(T)); - } - - std::vector& data() { - return this->mData; + std::vector data() { + return { (T*)this->getRawData(), ((T*)this->getRawData()) + this->size() }; } T& operator[](int index) { - return this->mData[index]; + return ((T*)this->mRawData)[index]; } void setData(const std::vector& data) { + KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size()); - if (data.size() != this->mData.size()) { + if (data.size() != this->mSize) { throw std::runtime_error( "Kompute TensorView Cannot set data of different sizes"); } - this->mData = data; - Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); } - void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override - { - KP_LOG_DEBUG("Kompute TensorView setRawData with data size {}", elementTotalCount); - - assert(elementMemorySize == sizeof(T)); - - this->mData = { (T*)data, ((T*)data) + elementTotalCount }; - Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); - } - - TensorDataTypes dataType() override; - - uint32_t size() override { - KP_LOG_DEBUG("Kompute TensorView retrieving size: {}", this->mData.size()); - - return this->mData.size(); - } - - uint32_t memorySize() override { - KP_LOG_DEBUG("Kompute TensorView retrieving memory size: {}", this->mData.size() * sizeof(T)); - - return this->mData.size() * sizeof(T); - } - - /** - * Maps data from the Host Visible GPU memory into the data vector. It - * requires the Tensor to be of staging type for it to work. - */ - void mapDataFromHostMemory() override { - KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data"); - - this->rawMapDataFromHostMemory(this->mData.data()); - } - /** - * Maps data from the data vector into the Host Visible GPU memory. It - * requires the tensor to be of staging type for it to work. - */ - void mapDataIntoHostMemory() override { - KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data"); - - this->rawMapDataIntoHostMemory(this->mData.data()); - } - - private: - // -------------- ALWAYS OWNED RESOURCES - std::vector mData; - }; } // End namespace kp diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 16e3017e9..ce53455a3 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -15,11 +15,17 @@ OpTensorCopy::OpTensorCopy(const std::vector>& tensors) } kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType(); + uint32_t size = this->mTensors[0]->size(); for (const std::shared_ptr& tensor : tensors) { if (tensor->dataType() != dataType) { throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}", dataType, tensor->dataType())); } + if (tensor->size() != size) { + throw std::runtime_error(fmt::format("Attempting to copy tensors of different sizes from {} to {}", + size, tensor->size())); + + } } } @@ -55,12 +61,11 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer) uint32_t size = this->mTensors[0]->size(); uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize(); uint32_t memSize = size * dataTypeMemSize; - void* data = operator new(memSize); - this->mTensors[0]->getRawData(data); + const void* data = this->mTensors[0]->getRawData(); // Copy the data from the first tensor into all the tensors for (size_t i = 1; i < this->mTensors.size(); i++) { - this->mTensors[i]->setRawData(data, size, dataTypeMemSize); + this->mTensors[i]->setRawData(data); } } diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 85cefde77..4dbfaec83 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -41,12 +41,6 @@ OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called"); - // Performing sync of data as eval can be called multiple times with same op - for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) { - this->mTensors[i]->mapDataIntoHostMemory(); - } - } } void diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index 092490d15..f7e15ffd5 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -48,11 +48,6 @@ OpTensorSyncLocal::postEval(const vk::CommandBuffer& commandBuffer) KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called"); KP_LOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local"); - for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) { - this->mTensors[i]->mapDataFromHostMemory(); - } - } } } diff --git a/src/Tensor.cpp b/src/Tensor.cpp index d3225987e..4d7dcd2db 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -50,7 +50,9 @@ Tensor::rebuild(void* data, } this->allocateMemoryCreateGPUResources(); - this->rawMapDataIntoHostMemory(data); + this->rawMapData(); + + memcpy(this->mRawData, data, this->memorySize()); } Tensor::TensorTypes @@ -177,18 +179,6 @@ Tensor::constructDescriptorBufferInfo() bufferSize); } -void -Tensor::mapDataFromHostMemory() -{ - KP_LOG_DEBUG("Kompute Tensor mapDataFromHostMemory - SKIPPING"); -} - -void -Tensor::mapDataIntoHostMemory() -{ - KP_LOG_DEBUG("Kompute Tensor mapDataIntoHostMemory - SKIPPING"); -} - vk::BufferUsageFlags Tensor::getPrimaryBufferUsageFlags() { @@ -219,7 +209,8 @@ Tensor::getPrimaryMemoryPropertyFlags() return vk::MemoryPropertyFlagBits::eDeviceLocal; break; case TensorTypes::eHost: - return vk::MemoryPropertyFlagBits::eHostVisible; + return vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent; break; case TensorTypes::eStorage: return vk::MemoryPropertyFlagBits::eDeviceLocal; @@ -438,34 +429,4 @@ Tensor::destroy() KP_LOG_DEBUG("Kompute Tensor successful destroy()"); } -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eBool; -} - -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eInt; -} - -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eUnsignedInt; -} - -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eFloat; -} - -template<> -Tensor::TensorDataTypes -TensorView::dataType() { - return Tensor::TensorDataTypes::eDouble; -} - } diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 6af4682d6..f041d57e3 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -60,67 +60,6 @@ class Tensor */ virtual ~Tensor(); - /** - * Returns the size/magnitude of the Tensor, which will be the total number - * of elements across all dimensions - * - * @return Unsigned integer representing the total number of elements - */ - // TODO: move to cpp - virtual uint32_t size() { - return this->mSize; - } - - // TODO: move to cpp - virtual uint32_t dataTypeMemorySize() { - return this->mDataTypeMemorySize; - } - - // TODO: move to cpp - virtual uint32_t memorySize() { - return this->mSize * this->mDataTypeMemorySize; - } - - /** - * Retrieve the underlying data type of the Tensor - * - * @return Data type of tensor of type kp::Tensor::TensorDataTypes - */ - virtual TensorDataTypes dataType() { - return this->mDataType; - } - - /** - * Maps data from the Host Visible GPU memory into the data vector. It - * requires the Tensor to be of staging type for it to work. - */ - virtual void mapDataFromHostMemory(); - /** - * Maps data from the data vector into the Host Visible GPU memory. It - * requires the tensor to be of staging type for it to work. - */ - virtual void mapDataIntoHostMemory(); - - // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView - // TODO: move to cpp - virtual void getRawData(void* data) { - this->rawMapDataFromHostMemory(data); - } - - /** - * Sets / resets the vector data of the tensor. This function does not - * perform any copies into GPU memory and is only performed on the host. - */ - virtual void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) { - if (elementTotalCount * elementMemorySize != this->memorySize()) { - throw std::runtime_error( - "Kompute Tensor Cannot set data of different sizes"); - } - this->mSize = elementTotalCount; - this->mDataTypeMemorySize = elementMemorySize; - this->rawMapDataIntoHostMemory(data); - } - /** * Function to trigger reinitialisation of the tensor buffer and memory with * new data as well as new potential device type. @@ -151,7 +90,6 @@ class Tensor */ TensorTypes tensorType(); - /** * Records a copy from the memory of the tensor provided to the current * thensor. This is intended to pass memory into a processing, to perform @@ -215,8 +153,53 @@ class Tensor */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); - protected: - void rawMapDataFromHostMemory(void* data) { + /** + * Returns the size/magnitude of the Tensor, which will be the total number + * of elements across all dimensions + * + * @return Unsigned integer representing the total number of elements + */ + // TODO: move to cpp + uint32_t size() { + return this->mSize; + } + + // TODO: move to cpp + uint32_t dataTypeMemorySize() { + return this->mDataTypeMemorySize; + } + + // TODO: move to cpp + uint32_t memorySize() { + return this->mSize * this->mDataTypeMemorySize; + } + + /** + * Retrieve the underlying data type of the Tensor + * + * @return Data type of tensor of type kp::Tensor::TensorDataTypes + */ + TensorDataTypes dataType() { + return this->mDataType; + } + + // TODO: move to cpp + const void* getRawData() { + return this->mRawData; + } + + /** + * Sets / resets the vector data of the tensor. This function does not + * perform any copies into GPU memory and is only performed on the host. + */ + void setRawData(const void* data) + { + // Copy data + memcpy(this->mRawData, data, this->memorySize()); + } + + private: + void rawMapData() { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); @@ -233,39 +216,12 @@ class Tensor } vk::DeviceSize bufferSize = this->memorySize(); - void* mapped = this->mDevice->mapMemory( + // Given we request coherent host memory we don't need to invalidate / flush + this->mRawData = this->mDevice->mapMemory( *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange); - memcpy(data, mapped, bufferSize); - this->mDevice->unmapMemory(*hostVisibleMemory); } - void rawMapDataIntoHostMemory(void* data) { - KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN( - "Kompute Tensor mapping data not supported on storage tensor"); - return; - } - - vk::DeviceSize bufferSize = this->memorySize(); - - void* mapped = this->mDevice->mapMemory( - *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); - memcpy(mapped, data, bufferSize); - vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->flushMappedMemoryRanges(1, &mappedRange); - this->mDevice->unmapMemory(*hostVisibleMemory); - } - private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; @@ -285,6 +241,7 @@ class Tensor TensorDataTypes mDataType; uint32_t mSize; uint32_t mDataTypeMemorySize; + void* mRawData; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, @@ -316,91 +273,40 @@ class TensorView: public Tensor std::shared_ptr device, const std::vector& data, const TensorTypes& tensorType = TensorTypes::eDevice) - : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType()) + : Tensor(physicalDevice, + device, + (void*)data.data(), + data.size(), + sizeof(T), + this->dataType()) { KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size()); - this->mData = data; } ~TensorView() { KP_LOG_DEBUG("Kompute TensorView destructor"); } - void rebuild(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice) { - KP_LOG_DEBUG("Kompute TensorView creating with data size {}", data.size()); - this->mData = data; - Tensor::rebuild(data.data(), data.size(), sizeof(T)); - } - - std::vector& data() { - return this->mData; + std::vector data() { + return { (T*)this->getRawData(), ((T*)this->getRawData()) + this->size() }; } T& operator[](int index) { - return this->mData[index]; + return ((T*)this->mRawData)[index]; } void setData(const std::vector& data) { + KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size()); - if (data.size() != this->mData.size()) { + if (data.size() != this->mSize) { throw std::runtime_error( "Kompute TensorView Cannot set data of different sizes"); } - this->mData = data; - Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); } - void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override - { - KP_LOG_DEBUG("Kompute TensorView setRawData with data size {}", elementTotalCount); - - assert(elementMemorySize == sizeof(T)); - - this->mData = { (T*)data, ((T*)data) + elementTotalCount }; - Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); - } - - TensorDataTypes dataType() override; - - uint32_t size() override { - KP_LOG_DEBUG("Kompute TensorView retrieving size: {}", this->mData.size()); - - return this->mData.size(); - } - - uint32_t memorySize() override { - KP_LOG_DEBUG("Kompute TensorView retrieving memory size: {}", this->mData.size() * sizeof(T)); - - return this->mData.size() * sizeof(T); - } - - /** - * Maps data from the Host Visible GPU memory into the data vector. It - * requires the Tensor to be of staging type for it to work. - */ - void mapDataFromHostMemory() override { - KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data"); - - this->rawMapDataFromHostMemory(this->mData.data()); - } - /** - * Maps data from the data vector into the Host Visible GPU memory. It - * requires the tensor to be of staging type for it to work. - */ - void mapDataIntoHostMemory() override { - KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data"); - - this->rawMapDataIntoHostMemory(this->mData.data()); - } - - private: - // -------------- ALWAYS OWNED RESOURCES - std::vector mData; - }; } // End namespace kp From f02b9d6915c9e05b71b426c5f609178c5946aa04 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 08:00:19 +0000 Subject: [PATCH 07/16] Working implementation with tests --- examples/array_multiplication/src/Main.cpp | 8 +-- examples/logistic_regression/src/Main.cpp | 21 ++++--- single_include/kompute/Kompute.hpp | 72 +++++++++++++++------- src/OpTensorCopy.cpp | 2 +- src/Tensor.cpp | 30 +++++++++ src/include/kompute/Manager.hpp | 11 +++- src/include/kompute/Tensor.hpp | 61 +++++++++++------- test/TestAsyncOperations.cpp | 12 ++-- test/TestDestroy.cpp | 18 +++--- test/TestLogisticRegression.cpp | 38 ++++++------ test/TestManager.cpp | 24 ++++---- test/TestMultipleAlgoExecutions.cpp | 24 ++++---- test/TestOpShadersFromStringAndFile.cpp | 24 ++++---- test/TestOpTensorCopy.cpp | 56 ++++++++--------- test/TestOpTensorCreate.cpp | 14 ++--- test/TestOpTensorSync.cpp | 16 ++--- test/TestPushConstant.cpp | 10 +-- test/TestSequence.cpp | 10 +-- test/TestSpecializationConstant.cpp | 8 +-- test/TestTensor.cpp | 4 +- test/TestWorkgroup.cpp | 50 +++++++-------- 21 files changed, 297 insertions(+), 216 deletions(-) diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index 812a5039f..95e0781ad 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -14,9 +14,9 @@ int main() kp::Manager mgr; - auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 }); - auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 }); - auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 }); + auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 }); + auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 }); + auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 }); std::string shader(R"( // The version to use @@ -49,7 +49,7 @@ int main() // prints "Output { 0 4 12 }" std::cout<< "Output: { "; - for (const float& elem : tensorOut->data()) { + for (const float& elem : tensorOut->vector()) { std::cout << elem << " "; } std::cout << "}" << std::endl; diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index 3b6ec11e1..c7cc827ba 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -17,19 +17,19 @@ int main() kp::Manager mgr; - auto xI = mgr.tensor({ 0, 1, 1, 1, 1 }); - auto xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); + auto xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + auto xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - auto y = mgr.tensor({ 0, 0, 0, 1, 1 }); + auto y = mgr.tensor({ 0, 0, 0, 1, 1 }); - auto wIn = mgr.tensor({ 0.001, 0.001 }); - auto wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - auto wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto wIn = mgr.tensor({ 0.001, 0.001 }); + auto wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - auto bIn = mgr.tensor({ 0 }); - auto bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto bIn = mgr.tensor({ 0 }); + auto bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - auto lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + auto lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, @@ -40,7 +40,8 @@ int main() (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); - std::shared_ptr algo = mgr.algorithm(params, spirv); + std::shared_ptr algo = mgr.algorithm( + params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); mgr.sequence()->eval(params); diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 496e6f198..df9549aab 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -974,11 +974,21 @@ class Tensor return this->mDataType; } - // TODO: move to cpp - const void* getRawData() { + void* rawData() { return this->mRawData; } + // TODO: move to cpp + template + T* data() { + return this->mRawData; + } + + template + std::vector vector() { + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; + } + /** * Sets / resets the vector data of the tensor. This function does not * perform any copies into GPU memory and is only performed on the host. @@ -989,6 +999,14 @@ class Tensor memcpy(this->mRawData, data, this->memorySize()); } + protected: + // -------------- ALWAYS OWNED RESOURCES + TensorTypes mTensorType; + TensorDataTypes mDataType; + uint32_t mSize; + uint32_t mDataTypeMemorySize; + void* mRawData; + private: void rawMapData() { @@ -1027,13 +1045,6 @@ class Tensor std::shared_ptr mStagingMemory; bool mFreeStagingMemory = false; - // -------------- ALWAYS OWNED RESOURCES - TensorTypes mTensorType; - TensorDataTypes mDataType; - uint32_t mSize; - uint32_t mDataTypeMemorySize; - void* mRawData; - void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); @@ -1057,10 +1068,11 @@ class Tensor // TODO: Limit T to be only float, bool, double, etc template -class TensorView: public Tensor +class TensorT: public Tensor { + public: - TensorView(std::shared_ptr physicalDevice, + TensorT(std::shared_ptr physicalDevice, std::shared_ptr device, const std::vector& data, const TensorTypes& tensorType = TensorTypes::eDevice) @@ -1069,35 +1081,42 @@ class TensorView: public Tensor (void*)data.data(), data.size(), sizeof(T), - this->dataType()) + this->dataType(), + tensorType) { - KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size()); + KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size()); } - ~TensorView() { - KP_LOG_DEBUG("Kompute TensorView destructor"); + ~TensorT() { + KP_LOG_DEBUG("Kompute TensorT destructor"); } - std::vector data() { - return { (T*)this->getRawData(), ((T*)this->getRawData()) + this->size() }; + T* data() { + return (T*)this->mRawData; + } + + std::vector vector() { + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; } T& operator[](int index) { - return ((T*)this->mRawData)[index]; + return *(((T*)this->mRawData) + index); } void setData(const std::vector& data) { - KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size()); + KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size()); if (data.size() != this->mSize) { throw std::runtime_error( - "Kompute TensorView Cannot set data of different sizes"); + "Kompute TensorT Cannot set data of different sizes"); } - Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); + Tensor::setRawData(data.data()); } + TensorDataTypes dataType(); + }; } // End namespace kp @@ -1969,13 +1988,13 @@ class Manager * @returns Shared pointer with initialised tensor */ template - std::shared_ptr> tensor( + std::shared_ptr> tensorT( const std::vector& data, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) { KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - std::shared_ptr> tensor{ new kp::TensorView( + std::shared_ptr> tensor{ new kp::TensorT( this->mPhysicalDevice, this->mDevice, data, tensorType) }; if (this->mManageResources) { @@ -1985,6 +2004,13 @@ class Manager return tensor; } + std::shared_ptr> tensor( + const std::vector& data, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + return this->tensorT(data, tensorType); + } + /** * Create a managed algorithm that will be destroyed by this manager * if it hasn't been destroyed by its reference count going to zero. diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index ce53455a3..c93830902 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -61,7 +61,7 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer) uint32_t size = this->mTensors[0]->size(); uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize(); uint32_t memSize = size * dataTypeMemSize; - const void* data = this->mTensors[0]->getRawData(); + void* data = this->mTensors[0]->rawData(); // Copy the data from the first tensor into all the tensors for (size_t i = 1; i < this->mTensors.size(); i++) { diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 4d7dcd2db..335e48959 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -429,4 +429,34 @@ Tensor::destroy() KP_LOG_DEBUG("Kompute Tensor successful destroy()"); } +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eBool; +} + +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eInt; +} + +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eUnsignedInt; +} + +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eFloat; +} + +template<> +Tensor::TensorDataTypes +TensorT::dataType() { + return Tensor::TensorDataTypes::eDouble; +} + } diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index d27bccacc..c39f5d6b5 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -75,13 +75,13 @@ class Manager * @returns Shared pointer with initialised tensor */ template - std::shared_ptr> tensor( + std::shared_ptr> tensorT( const std::vector& data, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) { KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - std::shared_ptr> tensor{ new kp::TensorView( + std::shared_ptr> tensor{ new kp::TensorT( this->mPhysicalDevice, this->mDevice, data, tensorType) }; if (this->mManageResources) { @@ -91,6 +91,13 @@ class Manager return tensor; } + std::shared_ptr> tensor( + const std::vector& data, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + return this->tensorT(data, tensorType); + } + /** * Create a managed algorithm that will be destroyed by this manager * if it hasn't been destroyed by its reference count going to zero. diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index f041d57e3..898a2df08 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -183,11 +183,21 @@ class Tensor return this->mDataType; } - // TODO: move to cpp - const void* getRawData() { + void* rawData() { return this->mRawData; } + // TODO: move to cpp + template + T* data() { + return this->mRawData; + } + + template + std::vector vector() { + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; + } + /** * Sets / resets the vector data of the tensor. This function does not * perform any copies into GPU memory and is only performed on the host. @@ -198,6 +208,14 @@ class Tensor memcpy(this->mRawData, data, this->memorySize()); } + protected: + // -------------- ALWAYS OWNED RESOURCES + TensorTypes mTensorType; + TensorDataTypes mDataType; + uint32_t mSize; + uint32_t mDataTypeMemorySize; + void* mRawData; + private: void rawMapData() { @@ -236,13 +254,6 @@ class Tensor std::shared_ptr mStagingMemory; bool mFreeStagingMemory = false; - // -------------- ALWAYS OWNED RESOURCES - TensorTypes mTensorType; - TensorDataTypes mDataType; - uint32_t mSize; - uint32_t mDataTypeMemorySize; - void* mRawData; - void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); @@ -266,10 +277,11 @@ class Tensor // TODO: Limit T to be only float, bool, double, etc template -class TensorView: public Tensor +class TensorT: public Tensor { + public: - TensorView(std::shared_ptr physicalDevice, + TensorT(std::shared_ptr physicalDevice, std::shared_ptr device, const std::vector& data, const TensorTypes& tensorType = TensorTypes::eDevice) @@ -278,35 +290,42 @@ class TensorView: public Tensor (void*)data.data(), data.size(), sizeof(T), - this->dataType()) + this->dataType(), + tensorType) { - KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size()); + KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size()); } - ~TensorView() { - KP_LOG_DEBUG("Kompute TensorView destructor"); + ~TensorT() { + KP_LOG_DEBUG("Kompute TensorT destructor"); } - std::vector data() { - return { (T*)this->getRawData(), ((T*)this->getRawData()) + this->size() }; + T* data() { + return (T*)this->mRawData; + } + + std::vector vector() { + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; } T& operator[](int index) { - return ((T*)this->mRawData)[index]; + return *(((T*)this->mRawData) + index); } void setData(const std::vector& data) { - KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size()); + KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size()); if (data.size() != this->mSize) { throw std::runtime_error( - "Kompute TensorView Cannot set data of different sizes"); + "Kompute TensorT Cannot set data of different sizes"); } - Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T)); + Tensor::setRawData(data.data()); } + TensorDataTypes dataType(); + }; } // End namespace kp diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 2f8c7d819..7feaaa30e 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -73,7 +73,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) sq->eval(inputsSyncB); for (uint32_t i = 0; i < numParallel; i++) { - EXPECT_EQ(inputsSyncB[i]->data(), resultSync); + EXPECT_EQ(inputsSyncB[i]->vector(), resultSync); } kp::Manager mgrAsync(0, { 0, 2 }); @@ -111,7 +111,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) sq->eval({ inputsAsyncB }); for (uint32_t i = 0; i < numParallel; i++) { - EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync); + EXPECT_EQ((inputsAsyncB[i]->vector()), resultAsync); } // The speedup should be at least 40% @@ -152,8 +152,8 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor(data); - std::shared_ptr tensorB = mgr.tensor(data); + std::shared_ptr> tensorA = mgr.tensor(data); + std::shared_ptr> tensorB = mgr.tensor(data); std::shared_ptr sq1 = mgr.sequence(); std::shared_ptr sq2 = mgr.sequence(); @@ -172,6 +172,6 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) sq1->evalAsync({ tensorA, tensorB }); sq1->evalAwait(); - EXPECT_EQ(tensorA->data(), resultAsync); - EXPECT_EQ(tensorB->data(), resultAsync); + EXPECT_EQ(tensorA->vector(), resultAsync); + EXPECT_EQ(tensorB->vector(), resultAsync); } diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index 0b948d64f..0ccfdb0f8 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -5,9 +5,9 @@ TEST(TestDestroy, TestDestroyTensorSingle) { - std::shared_ptr tensorA = nullptr; + std::shared_ptr> tensorA = nullptr; - std::string shader(R"( + std::string shader(R"( #version 450 layout (local_size_x = 1) in; layout(set = 0, binding = 0) buffer a { float pa[]; }; @@ -39,13 +39,13 @@ TEST(TestDestroy, TestDestroyTensorSingle) } EXPECT_FALSE(tensorA->isInit()); } - EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 1, 1, 1 })); } TEST(TestDestroy, TestDestroyTensorVector) { - std::shared_ptr tensorA = nullptr; - std::shared_ptr tensorB = nullptr; + std::shared_ptr> tensorA = nullptr; + std::shared_ptr> tensorB = nullptr; std::string shader(R"( #version 450 @@ -84,13 +84,13 @@ TEST(TestDestroy, TestDestroyTensorVector) EXPECT_FALSE(tensorB->isInit()); } } - EXPECT_EQ(tensorA->data(), std::vector({ 2, 2, 2 })); - EXPECT_EQ(tensorB->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 2, 2, 2 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 3, 3, 3 })); } TEST(TestDestroy, TestDestroySequenceSingle) { - std::shared_ptr tensorA = nullptr; + std::shared_ptr> tensorA = nullptr; std::string shader(R"( #version 450 @@ -123,5 +123,5 @@ TEST(TestDestroy, TestDestroySequenceSingle) EXPECT_FALSE(sq->isInit()); } } - EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 1, 1, 1 })); } diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 980273246..a4402637f 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -14,19 +14,19 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) { kp::Manager mgr; - std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); - std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); + std::shared_ptr> xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + std::shared_ptr> xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); + std::shared_ptr> y = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); - std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wIn = mgr.tensor({ 0.001, 0.001 }); + std::shared_ptr> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr bIn = mgr.tensor({ 0 }); - std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> bIn = mgr.tensor({ 0 }); + std::shared_ptr> bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, @@ -88,21 +88,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) { kp::Manager mgr; - std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); - std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); + std::shared_ptr> xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + std::shared_ptr> xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); + std::shared_ptr> y = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr wIn = + std::shared_ptr> wIn = mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost); - std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr bIn = + std::shared_ptr> bIn = mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost); - std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, @@ -136,8 +136,6 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) wIn->data()[1] -= learningRate * wOutJ->data()[j]; bIn->data()[0] -= learningRate * bOut->data()[j]; } - wIn->mapDataIntoHostMemory(); - bIn->mapDataIntoHostMemory(); } // Based on the inputs the outputs should be at least: diff --git a/test/TestManager.cpp b/test/TestManager.cpp index ce055ff8c..f759208aa 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -7,9 +7,9 @@ TEST(TestManager, EndToEndOpMultEvalFlow) { kp::Manager mgr; - std::shared_ptr tensorLHS = mgr.tensor({ 0, 1, 2 }); - std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); - std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorLHS = mgr.tensor({ 0, 1, 2 }); + std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); + std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorLHS, tensorRHS, @@ -20,16 +20,16 @@ TEST(TestManager, EndToEndOpMultEvalFlow) ->eval(params, mgr.algorithm()) ->eval(params); - EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); + EXPECT_EQ(tensorOutput->vector(), std::vector({ 0, 4, 12 })); } TEST(TestManager, EndToEndOpMultSeqFlow) { kp::Manager mgr; - std::shared_ptr tensorLHS = mgr.tensor({ 0, 1, 2 }); - std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); - std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorLHS = mgr.tensor({ 0, 1, 2 }); + std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); + std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorLHS, tensorRHS, @@ -41,16 +41,16 @@ TEST(TestManager, EndToEndOpMultSeqFlow) ->record(params) ->eval(); - EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); + EXPECT_EQ(tensorOutput->vector(), std::vector({ 0, 4, 12 })); } TEST(TestManager, TestMultipleSequences) { kp::Manager mgr; - std::shared_ptr tensorLHS = mgr.tensor({ 0, 1, 2 }); - std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); - std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorLHS = mgr.tensor({ 0, 1, 2 }); + std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); + std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorLHS, tensorRHS, @@ -60,5 +60,5 @@ TEST(TestManager, TestMultipleSequences) mgr.sequence()->eval(params, mgr.algorithm()); mgr.sequence()->eval(params); - EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); + EXPECT_EQ(tensorOutput->vector(), std::vector({ 0, 4, 12 })); } diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 63dd5f7fe..b934f7e83 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -64,8 +64,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) sq->evalAwait(); - EXPECT_EQ(tensorOutA->data(), std::vector({ 4, 8, 12 })); - EXPECT_EQ(tensorOutB->data(), std::vector({ 10, 10, 10 })); + EXPECT_EQ(tensorOutA->vector(), std::vector({ 4, 8, 12 })); + EXPECT_EQ(tensorOutB->vector(), std::vector({ 10, 10, 10 })); } TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) @@ -73,7 +73,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -96,14 +96,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) ->eval(); } - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -131,7 +131,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) mgr.sequence()->record({ tensorA })->eval(); - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } TEST(TestMultipleAlgoExecutions, MultipleSequences) @@ -139,7 +139,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -167,14 +167,14 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->record({ tensorA })->eval(); - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -198,12 +198,12 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) sq->record({ tensorA })->eval(); - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) { - std::shared_ptr tensorA = nullptr; + std::shared_ptr> tensorA = nullptr; { std::shared_ptr sq = nullptr; @@ -236,5 +236,5 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) } } - EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index e766c8efb..a1f8eda99 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -9,8 +9,8 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 3, 4, 5 }); - std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 3, 4, 5 }); + std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); std::string shader(R"( #version 450 @@ -36,16 +36,16 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) ->eval(mgr.algorithm(params, spirv)) ->eval(params); - EXPECT_EQ(tensorA->data(), std::vector({ 0, 1, 2 })); - EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 0, 1, 2 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 3, 4, 5 })); } TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 3, 4, 5 }); - std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 3, 4, 5 }); + std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); std::vector spirv = std::vector( (uint32_t*) @@ -62,8 +62,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) ->eval(mgr.algorithm(params, spirv)) ->eval(params); - EXPECT_EQ(tensorA->data(), std::vector({ 0, 1, 2 })); - EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 0, 1, 2 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 3, 4, 5 })); } // TODO: Add support to read from file for shader @@ -71,8 +71,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) //{ // kp::Manager mgr; // -// std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; -// std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; +// std::shared_ptr> tensorA{ new kp::Tensor({ 3, 4, 5 }) }; +// std::shared_ptr> tensorB{ new kp::Tensor({ 0, 0, 0 }) }; // mgr.rebuild({ tensorA, tensorB }); // // mgr.evalOpDefault( @@ -81,6 +81,6 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) // // mgr.evalOpDefault({ tensorA, tensorB }); // -// EXPECT_EQ(tensorA->data(), std::vector({ 0, 1, 2 })); -// EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); +// EXPECT_EQ(tensorA->vector(), std::vector({ 0, 1, 2 })); +// EXPECT_EQ(tensorB->vector(), std::vector({ 3, 4, 5 })); //} diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index 85e0b545b..6978eeeea 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -11,8 +11,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) std::vector testVecA{ 1, 2, 3 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = mgr.tensor(testVecB); + std::shared_ptr> tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorB = mgr.tensor(testVecB); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -22,8 +22,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) ->eval({ tensorA, tensorB }) ->eval({ tensorA, tensorB }); - // Making sure the GPU holds the same data - EXPECT_EQ(tensorA->data(), tensorB->data()); + // Making sure the GPU holds the same vector + EXPECT_EQ(tensorA->vector(), tensorB->vector()); } TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) @@ -35,9 +35,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) std::vector testVecB{ 0, 0, 0 }; std::vector testVecC{ 0, 0, 0 }; - std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = mgr.tensor(testVecB); - std::shared_ptr tensorC = mgr.tensor(testVecC); + std::shared_ptr> tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorB = mgr.tensor(testVecB); + std::shared_ptr> tensorC = mgr.tensor(testVecC); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -47,14 +47,14 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) ->eval({ tensorA, tensorB, tensorC }) ->eval({ tensorA, tensorB, tensorC }); - EXPECT_EQ(tensorA->data(), tensorB->data()); - EXPECT_EQ(tensorA->data(), tensorC->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); + EXPECT_EQ(tensorA->vector(), tensorC->vector()); - // Making sure the GPU holds the same data + // Making sure the GPU holds the same vector mgr.sequence()->eval({ tensorB, tensorC }); - EXPECT_EQ(tensorA->data(), tensorB->data()); - EXPECT_EQ(tensorA->data(), tensorC->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); + EXPECT_EQ(tensorA->vector(), tensorC->vector()); } TEST(TestOpTensorCopy, CopyDeviceToHostTensor) @@ -65,8 +65,8 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) std::vector testVecA{ 3, 4, 5 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = + std::shared_ptr> tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorB = mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost); // Only calling sync on device type tensor @@ -77,11 +77,11 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) mgr.sequence()->eval({ tensorA, tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); - // Making sure the GPU holds the same data + // Making sure the GPU holds the same vector mgr.sequence()->eval({ tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); } TEST(TestOpTensorCopy, CopyHostToDeviceTensor) @@ -92,9 +92,9 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) std::vector testVecA{ 4, 5, 6 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = + std::shared_ptr> tensorA = mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); - std::shared_ptr tensorB = mgr.tensor(testVecB); + std::shared_ptr> tensorB = mgr.tensor(testVecB); // Only calling sync on device type tensor mgr.sequence()->eval({ tensorA, tensorB }); @@ -104,11 +104,11 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) mgr.sequence()->eval({ tensorA, tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); - // Making sure the GPU holds the same data + // Making sure the GPU holds the same vector mgr.sequence()->eval({ tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); } TEST(TestOpTensorCopy, CopyHostToHostTensor) @@ -119,9 +119,9 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) std::vector testVecA{ 5, 6, 7 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = + std::shared_ptr> tensorA = mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); - std::shared_ptr tensorB = + std::shared_ptr> tensorB = mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost); EXPECT_TRUE(tensorA->isInit()); @@ -131,11 +131,11 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) ->eval({ tensorA }) ->eval({ tensorA, tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); - // Making sure the GPU holds the same data + // Making sure the GPU holds the same vector mgr.sequence()->eval({ tensorB }); - EXPECT_EQ(tensorA->data(), tensorB->data()); + EXPECT_EQ(tensorA->vector(), tensorB->vector()); } TEST(TestOpTensorCopy, SingleTensorShouldFail) @@ -145,7 +145,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) std::vector testVecA{ 6, 7, 8 }; - std::shared_ptr tensorA = + std::shared_ptr> tensorA = mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); EXPECT_TRUE(tensorA->isInit()); diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp index 14153427e..7ba1be615 100644 --- a/test/TestOpTensorCreate.cpp +++ b/test/TestOpTensorCreate.cpp @@ -6,7 +6,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp) { std::vector testVecA{ 9, 8, 7 }; - std::shared_ptr tensorA = nullptr; + std::shared_ptr> tensorA = nullptr; { kp::Manager mgr; @@ -15,7 +15,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp) EXPECT_TRUE(tensorA->isInit()); - EXPECT_EQ(tensorA->data(), testVecA); + EXPECT_EQ(tensorA->vector(), testVecA); } EXPECT_FALSE(tensorA->isInit()); @@ -29,11 +29,11 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore) kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = mgr.tensor(testVecB); + std::shared_ptr> tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorB = mgr.tensor(testVecB); - EXPECT_EQ(tensorA->data(), testVecA); - EXPECT_EQ(tensorB->data(), testVecB); + EXPECT_EQ(tensorA->vector(), testVecA); + EXPECT_EQ(tensorB->vector(), testVecB); tensorA->destroy(); tensorB->destroy(); @@ -49,7 +49,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor) kp::Manager mgr; try { - std::shared_ptr tensorA = mgr.tensor(testVecA); + std::shared_ptr> tensorA = mgr.tensor(testVecA); } catch (const std::runtime_error& err) { // check exception ASSERT_TRUE(std::string(err.what()).find("zero-sized") != diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp index 55e02ad13..02271c618 100644 --- a/test/TestOpTensorSync.cpp +++ b/test/TestOpTensorSync.cpp @@ -11,7 +11,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor) std::vector testVecPreA{ 0, 0, 0 }; std::vector testVecPostA{ 9, 8, 7 }; - std::shared_ptr tensorA = mgr.tensor(testVecPreA); + std::shared_ptr> tensorA = mgr.tensor(testVecPreA); EXPECT_TRUE(tensorA->isInit()); @@ -21,7 +21,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor) mgr.sequence()->eval({ tensorA }); - EXPECT_EQ(tensorA->data(), testVecPostA); + EXPECT_EQ(tensorA->vector(), testVecPostA); } TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) @@ -31,9 +31,9 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) std::vector testVec{ 9, 8, 7 }; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); - std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); - std::shared_ptr tensorC = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorC = mgr.tensor({ 0, 0, 0 }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -47,7 +47,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) mgr.sequence()->eval({ tensorA, tensorB, tensorC }); - EXPECT_EQ(tensorA->data(), testVec); - EXPECT_EQ(tensorB->data(), testVec); - EXPECT_EQ(tensorC->data(), testVec); + EXPECT_EQ(tensorA->vector(), testVec); + EXPECT_EQ(tensorB->vector(), testVec); + EXPECT_EQ(tensorC->vector(), testVec); } diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index b37fe4d72..9599596ed 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -29,7 +29,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride) { kp::Manager mgr; - std::shared_ptr tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); std::shared_ptr algo = mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 }); @@ -42,7 +42,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride) sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); - EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 })); } } } @@ -72,7 +72,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride) { kp::Manager mgr; - std::shared_ptr tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); std::shared_ptr algo = mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 }); @@ -85,7 +85,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride) sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); - EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 })); } } } @@ -115,7 +115,7 @@ TEST(TestPushConstants, TestConstantsWrongSize) { kp::Manager mgr; - std::shared_ptr tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); std::shared_ptr algo = mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 }); diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index 7d70a477b..090a6317b 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -60,9 +60,9 @@ TEST(TestSequence, RerecordSequence) std::shared_ptr sq = mgr.sequence(); - std::shared_ptr tensorA = mgr.tensor({1, 2, 3}); - std::shared_ptr tensorB = mgr.tensor({2, 2, 2}); - std::shared_ptr tensorOut = mgr.tensor({0, 0, 0}); + std::shared_ptr> tensorA = mgr.tensor({1, 2, 3}); + std::shared_ptr> tensorB = mgr.tensor({2, 2, 2}); + std::shared_ptr> tensorOut = mgr.tensor({0, 0, 0}); sq->eval({ tensorA, tensorB, tensorOut }); @@ -90,7 +90,7 @@ TEST(TestSequence, RerecordSequence) sq->eval(); - EXPECT_EQ(tensorOut->data(), std::vector({2, 4, 6})); + EXPECT_EQ(tensorOut->vector(), std::vector({2, 4, 6})); algo->rebuild({tensorOut, tensorA, tensorB}, spirv); @@ -98,7 +98,7 @@ TEST(TestSequence, RerecordSequence) sq->rerecord(); sq->eval(); - EXPECT_EQ(tensorB->data(), std::vector({2, 8, 18})); + EXPECT_EQ(tensorB->vector(), std::vector({2, 8, 18})); } diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index 2c6e284d2..fe40fb5ea 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -25,8 +25,8 @@ TEST(TestSpecializationConstants, TestTwoConstants) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); - std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorA, tensorB }; @@ -42,8 +42,8 @@ TEST(TestSpecializationConstants, TestTwoConstants) ->record(params) ->eval(); - EXPECT_EQ(tensorA->data(), std::vector({ 5, 5, 5 })); - EXPECT_EQ(tensorB->data(), std::vector({ 0.3, 0.3, 0.3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ 5, 5, 5 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 0.3, 0.3, 0.3 })); } } } diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index d33367722..c267024db 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -7,7 +7,7 @@ TEST(TestTensor, ConstructorData) { kp::Manager mgr; std::vector vec{ 0, 1, 2 }; - std::shared_ptr tensor = mgr.tensor(vec); + std::shared_ptr> tensor = mgr.tensor(vec); EXPECT_EQ(tensor->size(), vec.size()); - EXPECT_EQ(tensor->data(), vec); + EXPECT_EQ(tensor->vector(), vec); } diff --git a/test/TestWorkgroup.cpp b/test/TestWorkgroup.cpp index 3eb9147a1..8836840a6 100644 --- a/test/TestWorkgroup.cpp +++ b/test/TestWorkgroup.cpp @@ -7,8 +7,8 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) { - std::shared_ptr tensorA = nullptr; - std::shared_ptr tensorB = nullptr; + std::shared_ptr> tensorA = nullptr; + std::shared_ptr> tensorB = nullptr; { std::shared_ptr sq = nullptr; @@ -39,29 +39,29 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) sq->record(algorithm); sq->record(params); sq->eval(); + + std::vector expectedA = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15 + }; + + std::vector expectedB = { + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, + 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, + 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, + 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, + 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 + }; + + EXPECT_EQ(tensorA->vector(), expectedA); + EXPECT_EQ(tensorB->vector(), expectedB); } } - - std::vector expectedA = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15 - }; - - std::vector expectedB = { - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, - 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, - 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, - 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, - 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 - }; - - EXPECT_EQ(tensorA->data(), expectedA); - EXPECT_EQ(tensorB->data(), expectedB); } From 1cc369cb191db337d30d588e6f8ebeabb813e0ec Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 08:02:30 +0000 Subject: [PATCH 08/16] Mark pointer invalid after destroy tensor --- src/Tensor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 335e48959..aaf6ba388 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -360,6 +360,10 @@ Tensor::destroy() { KP_LOG_DEBUG("Kompute Tensor started destroy()"); + this->mRawData = nullptr; + this->mSize = 0; + this->mDataTypeMemorySize = 0; + if (!this->mDevice) { KP_LOG_WARN( "Kompute Tensor destructor reached with null Device pointer"); From bb64b2b37c44038c01a27bd1e985cfc4c92d00fe Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 08:10:42 +0000 Subject: [PATCH 09/16] Updated destroy and amended tests to ensure they test tensor in scope --- src/Tensor.cpp | 11 +++++++++-- src/include/kompute/Tensor.hpp | 26 +++++++++++++++++++++++++- test/TestDestroy.cpp | 11 +++++++---- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/Tensor.cpp b/src/Tensor.cpp index aaf6ba388..8b96be163 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -50,7 +50,7 @@ Tensor::rebuild(void* data, } this->allocateMemoryCreateGPUResources(); - this->rawMapData(); + this->mapRawData(); memcpy(this->mRawData, data, this->memorySize()); } @@ -64,7 +64,10 @@ Tensor::tensorType() bool Tensor::isInit() { - return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory; + return this->mDevice + && this->mPrimaryBuffer + && this->mPrimaryMemory + && this->mRawData; } @@ -360,6 +363,7 @@ Tensor::destroy() { KP_LOG_DEBUG("Kompute Tensor started destroy()"); + // Setting raw data to null regardless whether device is available to invalidate Tensor this->mRawData = nullptr; this->mSize = 0; this->mDataTypeMemorySize = 0; @@ -370,6 +374,9 @@ Tensor::destroy() return; } + // Unmap the current memory data + this->unmapRawData(); + if (this->mFreePrimaryBuffer) { if (!this->mPrimaryBuffer) { KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer " diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 898a2df08..efc3cda18 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -217,7 +217,7 @@ class Tensor void* mRawData; private: - void rawMapData() { + void mapRawData() { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); @@ -234,12 +234,36 @@ class Tensor } vk::DeviceSize bufferSize = this->memorySize(); + // Given we request coherent host memory we don't need to invalidate / flush this->mRawData = this->mDevice->mapMemory( *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); } + void unmapRawData() { + + KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); + this->mDevice->flushMappedMemoryRanges(1, &mappedRange); + this->mDevice->unmapMemory(*hostVisibleMemory); + } + // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index 0ccfdb0f8..72eeaf72b 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -34,12 +34,13 @@ TEST(TestDestroy, TestDestroyTensorSingle) ->eval() ->eval(algo->getTensors()); + EXPECT_EQ(tensorA->vector(), std::vector({ 1, 1, 1 })); + tensorA->destroy(); EXPECT_FALSE(tensorA->isInit()); } EXPECT_FALSE(tensorA->isInit()); } - EXPECT_EQ(tensorA->vector(), std::vector({ 1, 1, 1 })); } TEST(TestDestroy, TestDestroyTensorVector) @@ -82,10 +83,11 @@ TEST(TestDestroy, TestDestroyTensorVector) EXPECT_FALSE(tensorA->isInit()); EXPECT_FALSE(tensorB->isInit()); + + EXPECT_EQ(tensorA->vector(), std::vector({ 2, 2, 2 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 3, 3, 3 })); } } - EXPECT_EQ(tensorA->vector(), std::vector({ 2, 2, 2 })); - EXPECT_EQ(tensorB->vector(), std::vector({ 3, 3, 3 })); } TEST(TestDestroy, TestDestroySequenceSingle) @@ -121,7 +123,8 @@ TEST(TestDestroy, TestDestroySequenceSingle) sq->destroy(); EXPECT_FALSE(sq->isInit()); + + EXPECT_EQ(tensorA->vector(), std::vector({ 1, 1, 1 })); } } - EXPECT_EQ(tensorA->vector(), std::vector({ 1, 1, 1 })); } From a2ee928f4c3503127bf773ad8348f37e6db191cd Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 10:39:30 +0000 Subject: [PATCH 10/16] Updated tests and rebased --- python/src/main.cpp | 105 +++++++++++++++-------- python/test/test_array_multiplication.py | 6 +- python/test/test_logistic_regression.py | 27 +++--- setup.py | 2 +- single_include/kompute/Kompute.hpp | 49 ++++++++++- src/include/kompute/Manager.hpp | 17 ++++ src/include/kompute/Tensor.hpp | 2 +- test/TestDestroy.cpp | 6 +- test/TestMultipleAlgoExecutions.cpp | 37 -------- 9 files changed, 151 insertions(+), 100 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index d4b0f2084..eab8e5ef4 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -92,59 +92,46 @@ PYBIND11_MODULE(kp, m) { py::class_>(m, "Tensor", DOC(kp, Tensor)) .def("data", [](kp::Tensor& self) { - return py::array(self.data().size(), self.data().data()); - }, DOC(kp, Tensor, data)) - .def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; }, - "When only an index is necessary") - .def("__setitem__", [](kp::Tensor &self, size_t index, float value) { - self.data()[index] = value; }) - .def("set_data", [np](kp::Tensor &self, const py::array_t data){ - const py::array_t flatdata = np.attr("ravel")(data); - const py::buffer_info info = flatdata.request(); - const float* ptr = (float*) info.ptr; - self.setData(std::vector(ptr, ptr+flatdata.size())); - }, DOC(kp, Tensor, setData)) - .def("__iter__", [](kp::Tensor &self) { - return py::make_iterator(self.data().begin(), self.data().end()); - }, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists - "Iterator to enable looping within data structure as required.") - .def("__contains__", [](kp::Tensor &self, float v) { - for (size_t i = 0; i < self.data().size(); ++i) { - if (v == self.data()[i]) { - return true; - } - } - return false; - }) - .def("__reversed__", [](kp::Tensor &self) { - size_t size = self.data().size(); - std::vector reversed(size); - for (size_t i = 0; i < size; i++) { - reversed[size - i - 1] = self.data()[i]; + // Non-owning container exposing the underlying pointer + py::str dummyDataOwner; // Explicitly request data to not be owned by np + switch (self.dataType()) { + case kp::Tensor::TensorDataTypes::eFloat: + return py::array(self.size(), self.data(), dummyDataOwner); + case kp::Tensor::TensorDataTypes::eUnsignedInt: + return py::array(self.size(), self.data(), dummyDataOwner); + case kp::Tensor::TensorDataTypes::eInt: + return py::array(self.size(), self.data(), dummyDataOwner); + case kp::Tensor::TensorDataTypes::eDouble: + return py::array(self.size(), self.data(), dummyDataOwner); + case kp::Tensor::TensorDataTypes::eBool: + return py::array(self.size(), self.data(), dummyDataOwner); + default: + throw std::runtime_error("Kompute Python data type not supported"); } - return reversed; - }) + }, DOC(kp, Tensor, data)) .def("size", &kp::Tensor::size, DOC(kp, Tensor, size)) .def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size)) .def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType)) + .def("data_type", &kp::Tensor::dataType, DOC(kp, Tensor, dataType)) .def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit)) .def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy)); - py::class_>(m, "Sequence", DOC(kp, Sequence)) + py::class_>(m, "Sequence") .def("record", [](kp::Sequence& self, std::shared_ptr op) { return self.record(op); }, DOC(kp, Sequence, record)) .def("eval", [](kp::Sequence& self) { return self.eval(); }, DOC(kp, Sequence, eval)) .def("eval", [](kp::Sequence& self, std::shared_ptr op) { return self.eval(op); }, - DOC(kp, Sequence, eval)) + DOC(kp, Sequence, eval_2)) .def("eval_async", [](kp::Sequence& self) { return self.eval(); }, - DOC(kp, Sequence, evalAsync)) + DOC(kp, Sequence, evalAwait)) .def("eval_async", [](kp::Sequence& self, std::shared_ptr op) { return self.evalAsync(op); }, DOC(kp, Sequence, evalAsync)) .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); }, DOC(kp, Sequence, evalAwait)) .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); }, DOC(kp, Sequence, evalAwait)) +<<<<<<< HEAD .def("is_recording", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording)) .def("is_running", &kp::Sequence::isRunning, @@ -163,6 +150,17 @@ PYBIND11_MODULE(kp, m) { py::class_>(m, "Manager", DOC(kp, Manager)) .def(py::init(), DOC(kp, Manager, Manager)) .def(py::init(), DOC(kp, Manager, Manager_2)) +======= + .def("is_recording", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording)) + .def("is_running", &kp::Sequence::isRunning, DOC(kp, Sequence, isRunning)) + .def("is_init", &kp::Sequence::isInit, DOC(kp, Sequence, isInit)) + .def("clear", &kp::Sequence::clear, DOC(kp, Sequence, clear)) + .def("destroy", &kp::Sequence::destroy, DOC(kp, Sequence, destroy)); + + py::class_>(m, "Manager") + .def(py::init()) + .def(py::init()) +>>>>>>> cc1a6cc (Updated tests and rebased) .def(py::init&,const std::vector&>(), DOC(kp, Manager, Manager_2), py::arg("device") = 0, @@ -173,13 +171,44 @@ PYBIND11_MODULE(kp, m) { .def("tensor", [np](kp::Manager& self, const py::array_t data, kp::Tensor::TensorTypes tensor_type) { - const py::array_t flatdata = np.attr("ravel")(data); - const py::buffer_info info = flatdata.request(); - const float* ptr = (float*) info.ptr; - return self.tensor(std::vector(ptr, ptr+flatdata.size()), tensor_type); + const py::buffer_info info = data.request(); + return self.tensor( + info.ptr, + data.size(), + sizeof(float), + kp::Tensor::TensorDataTypes::eFloat, + tensor_type); }, DOC(kp, Manager, tensor), py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) + .def("tensor_t", [np](kp::Manager& self, + const py::array data, + kp::Tensor::TensorTypes tensor_type) { + // TODO: confirm if ravel is required as numpy data is always flat + //const py::array_t flatdata = np.attr("ravel")(data); + //const py::buffer_info info = flatdata.request(); + const py::buffer_info info = data.request(); + if (data.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, data.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type); + } else if (data.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, data.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type); + } else if (data.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, data.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type); + } else if (data.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, data.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type); + } else if (data.dtype() == py::dtype::of()) { + return self.tensor( + info.ptr, data.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type); + } else { + throw std::runtime_error("Kompute Python no valid dtype supported"); + } + }, + DOC(kp, Manager, tensorT), + py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) .def("algorithm", [](kp::Manager& self, const std::vector>& tensors, const py::bytes& spirv, diff --git a/python/test/test_array_multiplication.py b/python/test/test_array_multiplication.py index 0dab581c6..e8de68328 100644 --- a/python/test/test_array_multiplication.py +++ b/python/test/test_array_multiplication.py @@ -9,9 +9,9 @@ def test_array_multiplication(): mgr = kp.Manager() # 2. Create Kompute Tensors to hold data - tensor_in_a = mgr.tensor([2, 2, 2]) - tensor_in_b = mgr.tensor([1, 2, 3]) - tensor_out = mgr.tensor([0, 0, 0]) + tensor_in_a = mgr.tensor(np.array([2, 2, 2])) + tensor_in_b = mgr.tensor(np.array([1, 2, 3])) + tensor_out = mgr.tensor(np.array([0, 0, 0])) params = [tensor_in_a, tensor_in_b, tensor_out] diff --git a/python/test/test_logistic_regression.py b/python/test/test_logistic_regression.py index 4bd0c28fa..862758413 100644 --- a/python/test/test_logistic_regression.py +++ b/python/test/test_logistic_regression.py @@ -1,4 +1,5 @@ import pyshader as ps +import numpy as np import kp def test_logistic_regression(): @@ -46,21 +47,21 @@ def test_logistic_regression(): mgr = kp.Manager(0) # First we create input and ouput tensors for shader - tensor_x_i = mgr.tensor([0.0, 1.0, 1.0, 1.0, 1.0]) - tensor_x_j = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + tensor_x_i = mgr.tensor(np.array([0.0, 1.0, 1.0, 1.0, 1.0])) + tensor_x_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0])) - tensor_y = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + tensor_y = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0])) - tensor_w_in = mgr.tensor([0.001, 0.001]) - tensor_w_out_i = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) - tensor_w_out_j = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + tensor_w_in = mgr.tensor(np.array([0.001, 0.001])) + tensor_w_out_i = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0])) + tensor_w_out_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0])) - tensor_b_in = mgr.tensor([0.0]) - tensor_b_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + tensor_b_in = mgr.tensor(np.array([0.0])) + tensor_b_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0])) - tensor_l_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + tensor_l_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0])) - tensor_m = mgr.tensor([ tensor_y.size() ]) + tensor_m = mgr.tensor(np.array([ tensor_y.size() ])) # We store them in an array for easier interaction params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, @@ -91,9 +92,9 @@ def test_logistic_regression(): # Calculate the parameters based on the respective derivatives calculated for j_iter in range(tensor_b_out.size()): - tensor_w_in[0] -= learning_rate * tensor_w_out_i.data()[j_iter] - tensor_w_in[1] -= learning_rate * tensor_w_out_j.data()[j_iter] - tensor_b_in[0] -= learning_rate * tensor_b_out.data()[j_iter] + tensor_w_in.data()[0] -= learning_rate * tensor_w_out_i.data()[j_iter] + tensor_w_in.data()[1] -= learning_rate * tensor_w_out_j.data()[j_iter] + tensor_b_in.data()[0] -= learning_rate * tensor_b_out.data()[j_iter] assert tensor_w_in.data()[0] < 0.01 assert tensor_w_in.data()[0] > 0.0 diff --git a/setup.py b/setup.py index ee3521064..733c4c185 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ class CMakeBuild(build_ext): else: cmake_args += ['-DKOMPUTE_EXTRA_CXX_FLAGS="-fPIC"'] cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] - build_args += ['--', '-j2'] + build_args += ['--', '-j'] env = os.environ.copy() env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index df9549aab..572f0e4da 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -762,7 +762,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_sources( + static std::vector compileSources( const std::vector& sources, const std::vector& files = {}, const std::string& entryPoint = "main", @@ -783,7 +783,7 @@ class Shader * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ - static std::vector compile_source( + static std::vector compileSource( const std::string& source, const std::string& entryPoint = "main", std::vector> definitions = {}, @@ -981,7 +981,7 @@ class Tensor // TODO: move to cpp template T* data() { - return this->mRawData; + return (T*)this->mRawData; } template @@ -1008,7 +1008,7 @@ class Tensor void* mRawData; private: - void rawMapData() { + void mapRawData() { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); @@ -1025,12 +1025,36 @@ class Tensor } vk::DeviceSize bufferSize = this->memorySize(); + // Given we request coherent host memory we don't need to invalidate / flush this->mRawData = this->mDevice->mapMemory( *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); } + void unmapRawData() { + + KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mTensorType == TensorTypes::eHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mTensorType == TensorTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; + } + + vk::DeviceSize bufferSize = this->memorySize(); + vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); + this->mDevice->flushMappedMemoryRanges(1, &mappedRange); + this->mDevice->unmapMemory(*hostVisibleMemory); + } + // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; @@ -2011,6 +2035,23 @@ class Manager return this->tensorT(data, tensorType); } + std::shared_ptr tensor( + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const Tensor::TensorDataTypes& dataType, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + std::shared_ptr tensor{ new kp::Tensor( + this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } + /** * Create a managed algorithm that will be destroyed by this manager * if it hasn't been destroyed by its reference count going to zero. diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index c39f5d6b5..6eb2042eb 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -98,6 +98,23 @@ class Manager return this->tensorT(data, tensorType); } + std::shared_ptr tensor( + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const Tensor::TensorDataTypes& dataType, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + std::shared_ptr tensor{ new kp::Tensor( + this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } + /** * Create a managed algorithm that will be destroyed by this manager * if it hasn't been destroyed by its reference count going to zero. diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index efc3cda18..0194e208f 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -190,7 +190,7 @@ class Tensor // TODO: move to cpp template T* data() { - return this->mRawData; + return (T*)this->mRawData; } template diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index 72eeaf72b..defd40998 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -78,14 +78,14 @@ TEST(TestDestroy, TestDestroyTensorVector) ->record(algo->getTensors()) ->eval(); + EXPECT_EQ(tensorA->vector(), std::vector({ 2, 2, 2 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 3, 3, 3 })); + tensorA->destroy(); tensorB->destroy(); EXPECT_FALSE(tensorA->isInit()); EXPECT_FALSE(tensorB->isInit()); - - EXPECT_EQ(tensorA->vector(), std::vector({ 2, 2, 2 })); - EXPECT_EQ(tensorB->vector(), std::vector({ 3, 3, 3 })); } } } diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index b934f7e83..effc75227 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -201,40 +201,3 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } -TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) -{ - std::shared_ptr> tensorA = nullptr; - - { - std::shared_ptr sq = nullptr; - { - kp::Manager mgr; - - tensorA = mgr.tensor({ 0, 0, 0 }); - - std::string shader(R"( - #version 450 - layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - void main() { - uint index = gl_GlobalInvocationID.x; - pa[index] = pa[index] + 1; - })"); - - std::vector spirv = kp::Shader::compileSource(shader); - - std::shared_ptr algorithm = - mgr.algorithm({ tensorA }, spirv); - - sq = mgr.sequence(); - - sq->record({ tensorA })->eval(); - - sq->record(algorithm)->eval()->eval()->eval(); - - sq->record({ tensorA })->eval(); - } - } - - EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); -} From 6a7f410675a8b7416f48dcfc458267a3c03e73e7 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 11:12:01 +0000 Subject: [PATCH 11/16] Updated to use flatdata on the python --- python/src/main.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index eab8e5ef4..a82cd160d 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -169,12 +169,13 @@ PYBIND11_MODULE(kp, m) { .def("sequence", &kp::Manager::sequence, DOC(kp, Manager, sequence), py::arg("queue_index") = 0, py::arg("total_timestamps") = 0) .def("tensor", [np](kp::Manager& self, - const py::array_t data, + const py::array_t& data, kp::Tensor::TensorTypes tensor_type) { - const py::buffer_info info = data.request(); + const py::array_t& flatdata = np.attr("ravel")(data); + const py::buffer_info info = flatdata.request(); return self.tensor( info.ptr, - data.size(), + flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type); @@ -182,27 +183,26 @@ PYBIND11_MODULE(kp, m) { DOC(kp, Manager, tensor), py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) .def("tensor_t", [np](kp::Manager& self, - const py::array data, + const py::array& data, kp::Tensor::TensorTypes tensor_type) { - // TODO: confirm if ravel is required as numpy data is always flat - //const py::array_t flatdata = np.attr("ravel")(data); - //const py::buffer_info info = flatdata.request(); - const py::buffer_info info = data.request(); - if (data.dtype() == py::dtype::of()) { + // TODO: Suppport strides in numpy format + const py::array_t& flatdata = np.attr("ravel")(data); + const py::buffer_info info = flatdata.request(); + if (flatdata.dtype() == py::dtype::of()) { return self.tensor( - info.ptr, data.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type); - } else if (data.dtype() == py::dtype::of()) { + info.ptr, flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type); + } else if (flatdata.dtype() == py::dtype::of()) { return self.tensor( - info.ptr, data.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type); - } else if (data.dtype() == py::dtype::of()) { + info.ptr, flatdata.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type); + } else if (flatdata.dtype() == py::dtype::of()) { return self.tensor( - info.ptr, data.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type); - } else if (data.dtype() == py::dtype::of()) { + info.ptr, flatdata.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type); + } else if (flatdata.dtype() == py::dtype::of()) { return self.tensor( - info.ptr, data.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type); - } else if (data.dtype() == py::dtype::of()) { + info.ptr, flatdata.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type); + } else if (flatdata.dtype() == py::dtype::of()) { return self.tensor( - info.ptr, data.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type); + info.ptr, flatdata.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type); } else { throw std::runtime_error("Kompute Python no valid dtype supported"); } From 8abb2313d0d08010226a83100c3fdda5bcb2a89f Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 12:16:25 +0000 Subject: [PATCH 12/16] Updated python and cpp end to end test and readme to show support for different types on tensor --- README.md | 30 +++++++++++++++++------------ python/src/main.cpp | 5 ++++- python/test/test_kompute.py | 13 +++++++------ test/TestMultipleAlgoExecutions.cpp | 18 +++++++++-------- 4 files changed, 39 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 41596cb00..7a7375a6a 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,13 @@ void kompute(const std::string& shader) { kp::Manager mgr; // 2. Create and initialise Kompute Tensors through manager + + // Default tensor constructor simplifies creation of float values auto tensorInA = mgr.tensor({ 2., 2., 2. }); auto tensorInB = mgr.tensor({ 1., 2., 3. }); - auto tensorOutA = mgr.tensor({ 0., 0., 0. }); - auto tensorOutB = mgr.tensor({ 0., 0., 0. }); + // Explicit type constructor supports uint32, int32, double, float and bool + auto tensorOutA = mgr.tensorT({ 0, 0, 0 }); + auto tensorOutB = mgr.tensorT({ 0, 0, 0 }); std::vector> params = {tensorInA, tensorInB, tensorOutA, tensorOutB}; @@ -109,8 +112,8 @@ int main() { // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; - layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; }; - layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; }; + layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; + layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { @@ -122,8 +125,8 @@ int main() { void main() { uint index = gl_GlobalInvocationID.x; - out_a[index] += in_a[index] * in_b[index]; - out_b[index] += const_one * push_const.val; + out_a[index] += uint( in_a[index] * in_b[index] ); + out_b[index] += uint( const_one * push_const.val ); } )"); @@ -144,10 +147,13 @@ def kompute(shader): mgr = kp.Manager() # 2. Create and initialise Kompute Tensors through manager + + # Default tensor constructor simplifies creation of float values tensor_in_a = mgr.tensor([2, 2, 2]) tensor_in_b = mgr.tensor([1, 2, 3]) - tensor_out_a = mgr.tensor([0, 0, 0]) - tensor_out_b = mgr.tensor([0, 0, 0]) + # Explicit type constructor supports uint32, int32, double, float and bool + tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32)) + tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32)) params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b] @@ -194,8 +200,8 @@ if __name__ == "__main__": // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; - layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; }; - layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; }; + layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; + layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { @@ -207,8 +213,8 @@ if __name__ == "__main__": void main() { uint index = gl_GlobalInvocationID.x; - out_a[index] += in_a[index] * in_b[index]; - out_b[index] += const_one * push_const.val; + out_a[index] += uint( in_a[index] * in_b[index] ); + out_b[index] += uint( const_one * push_const.val ); } """ diff --git a/python/src/main.cpp b/python/src/main.cpp index a82cd160d..495d0ed0c 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -173,6 +173,7 @@ PYBIND11_MODULE(kp, m) { kp::Tensor::TensorTypes tensor_type) { const py::array_t& flatdata = np.attr("ravel")(data); const py::buffer_info info = flatdata.request(); + KP_LOG_DEBUG("Kompute Python Manager tensor() creating tensor float with data size {}", flatdata.size()); return self.tensor( info.ptr, flatdata.size(), @@ -186,8 +187,10 @@ PYBIND11_MODULE(kp, m) { const py::array& data, kp::Tensor::TensorTypes tensor_type) { // TODO: Suppport strides in numpy format - const py::array_t& flatdata = np.attr("ravel")(data); + const py::array& flatdata = np.attr("ravel")(data); const py::buffer_info info = flatdata.request(); + KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with data size {} dtype {}", + flatdata.size(), std::string(py::str(flatdata.dtype()))); if (flatdata.dtype() == py::dtype::of()) { return self.tensor( info.ptr, flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type); diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 47887930a..736768053 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -36,8 +36,9 @@ def test_end_to_end(): tensor_in_a = mgr.tensor([2, 2, 2]) tensor_in_b = mgr.tensor([1, 2, 3]) - tensor_out_a = mgr.tensor([0, 0, 0]) - tensor_out_b = mgr.tensor([0, 0, 0]) + # Explicit type constructor supports int, in32, double, float and int + tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32)) + tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32)) params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b] @@ -49,8 +50,8 @@ def test_end_to_end(): // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; - layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; }; - layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; }; + layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; + layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { @@ -62,8 +63,8 @@ def test_end_to_end(): void main() { uint index = gl_GlobalInvocationID.x; - out_a[index] += in_a[index] * in_b[index]; - out_b[index] += const_one * push_const.val; + out_a[index] += uint( in_a[index] * in_b[index] ); + out_b[index] += uint( const_one * push_const.val ); } """ diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index effc75227..f9e066f47 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -8,10 +8,12 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) kp::Manager mgr; + // Default tensor constructor simplifies creation of float values auto tensorInA = mgr.tensor({ 2., 2., 2. }); auto tensorInB = mgr.tensor({ 1., 2., 3. }); - auto tensorOutA = mgr.tensor({ 0., 0., 0. }); - auto tensorOutB = mgr.tensor({ 0., 0., 0. }); + // Explicit type constructor supports int, in32, double, float and int + auto tensorOutA = mgr.tensorT({ 0, 0, 0 }); + auto tensorOutB = mgr.tensorT({ 0, 0, 0 }); std::string shader = (R"( #version 450 @@ -21,8 +23,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; }; layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; }; - layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; }; - layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; }; + layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; }; + layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; }; // Kompute supports push constants updated on dispatch layout(push_constant) uniform PushConstants { @@ -34,8 +36,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) void main() { uint index = gl_GlobalInvocationID.x; - out_a[index] += in_a[index] * in_b[index]; - out_b[index] += const_one * push_const.val; + out_a[index] += uint( in_a[index] * in_b[index] ); + out_b[index] += uint( const_one * push_const.val ); } )"); @@ -64,8 +66,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) sq->evalAwait(); - EXPECT_EQ(tensorOutA->vector(), std::vector({ 4, 8, 12 })); - EXPECT_EQ(tensorOutB->vector(), std::vector({ 10, 10, 10 })); + EXPECT_EQ(tensorOutA->vector(), std::vector({ 4, 8, 12 })); + EXPECT_EQ(tensorOutB->vector(), std::vector({ 10, 10, 10 })); } TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) From df0dfd351f41f93884baa166f922c4b77d10a42b Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 13:37:54 +0000 Subject: [PATCH 13/16] Added types tests --- python/test/test_kompute.py | 21 ---- python/test/test_tensor_types.py | 206 +++++++++++++++++++++++++++++++ src/Tensor.cpp | 2 +- 3 files changed, 207 insertions(+), 22 deletions(-) create mode 100644 python/test/test_tensor_types.py diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 736768053..e1bcee940 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -9,27 +9,6 @@ DIRNAME = os.path.dirname(os.path.abspath(__file__)) kp_log = logging.getLogger("kp") -# TODO: Add example with file -#def test_opalgobase_file(): -# """ -# Test basic OpMult operation -# """ -# -# tensor_in_a = kp.Tensor([2, 2, 2]) -# tensor_in_b = kp.Tensor([1, 2, 3]) -# tensor_out = kp.Tensor([0, 0, 0]) -# -# mgr = kp.Manager() -# mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) -# -# shader_path = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv") -# -# mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path) -# -# mgr.eval_tensor_sync_local_def([tensor_out]) -# -# assert tensor_out.data() == [2.0, 4.0, 6.0] - def test_end_to_end(): mgr = kp.Manager() diff --git a/python/test/test_tensor_types.py b/python/test/test_tensor_types.py new file mode 100644 index 000000000..b1d90fe03 --- /dev/null +++ b/python/test/test_tensor_types.py @@ -0,0 +1,206 @@ +import pyshader as ps +import os +import pytest +import kp +import numpy as np + + +def test_type_float(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];}; + layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];}; + layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];}; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123., 153., 231.], dtype=np.float32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.float32) + arr_out = np.array([0, 0, 0], dtype=np.float32) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor(arr_in_a) + tensor_in_b = mgr.tensor(arr_in_b) + tensor_out = mgr.tensor(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + assert np.all(tensor_out.data() == arr_in_a * arr_in_b) + + +def test_type_float_double_incorrect(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];}; + layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];}; + layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];}; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123., 153., 231.], dtype=np.float32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32) + arr_out = np.array([0, 0, 0], dtype=np.float32) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor_t(arr_in_a) + tensor_in_b = mgr.tensor_t(arr_in_b) + tensor_out = mgr.tensor_t(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + assert np.all(tensor_out.data() != arr_in_a * arr_in_b) + +@pytest.mark.skipif("swiftshader" in os.environ.get("VK_ICD_FILENAMES"), + reason="Swiftshader doesn't support double") +def test_type_double(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs { double valuesLhs[]; }; + layout(set = 0, binding = 1) buffer tensorRhs { double valuesRhs[]; }; + layout(set = 0, binding = 2) buffer tensorOutput { double valuesOutput[]; }; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123., 153., 231.], dtype=np.float64) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.float64) + arr_out = np.array([0, 0, 0], dtype=np.float64) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor_t(arr_in_a) + tensor_in_b = mgr.tensor_t(arr_in_b) + tensor_out = mgr.tensor_t(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + print(f"Dtype value {tensor_out.data().dtype}") + + assert np.all(tensor_out.data() == arr_in_a * arr_in_b) + +def test_type_int(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs { int valuesLhs[]; }; + layout(set = 0, binding = 1) buffer tensorRhs { int valuesRhs[]; }; + layout(set = 0, binding = 2) buffer tensorOutput { int valuesOutput[]; }; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123, 153, 231], dtype=np.int32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.int32) + arr_out = np.array([0, 0, 0], dtype=np.int32) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor_t(arr_in_a) + tensor_in_b = mgr.tensor_t(arr_in_b) + tensor_out = mgr.tensor_t(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + print(f"Dtype value {tensor_out.data().dtype}") + + assert np.all(tensor_out.data() == arr_in_a * arr_in_b) + +def test_type_unsigned_int(): + + shader = """ + #version 450 + layout(set = 0, binding = 0) buffer tensorLhs { uint valuesLhs[]; }; + layout(set = 0, binding = 1) buffer tensorRhs { uint valuesRhs[]; }; + layout(set = 0, binding = 2) buffer tensorOutput { uint valuesOutput[]; }; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; + } + """ + + spirv = kp.Shader.compile_source(shader) + + arr_in_a = np.array([123, 153, 231], dtype=np.uint32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32) + arr_out = np.array([0, 0, 0], dtype=np.uint32) + + mgr = kp.Manager() + + tensor_in_a = mgr.tensor_t(arr_in_a) + tensor_in_b = mgr.tensor_t(arr_in_b) + tensor_out = mgr.tensor_t(arr_out) + + params = [tensor_in_a, tensor_in_b, tensor_out] + + (mgr.sequence() + .record(kp.OpTensorSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpTensorSyncLocal([tensor_out])) + .eval()) + + print(f"Dtype value {tensor_out.data().dtype}") + + assert np.all(tensor_out.data() == arr_in_a * arr_in_b) + diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 8b96be163..947714693 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -175,7 +175,7 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::DescriptorBufferInfo Tensor::constructDescriptorBufferInfo() { - KP_LOG_WARN("Kompute Tensor construct descriptor buffer info size {}", this->memorySize()); + KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize()); vk::DeviceSize bufferSize = this->memorySize(); return vk::DescriptorBufferInfo(*this->mPrimaryBuffer, 0, // offset From 5ff7b4aa7821c5d1142a5e78ba67fc4027ad311a Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 14:10:38 +0000 Subject: [PATCH 14/16] Added single header --- single_include/kompute/Kompute.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 572f0e4da..9b41e1ead 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -2001,7 +2001,7 @@ class Manager * If zero (default), disables latching of timestamps. * @returns Shared pointer with initialised sequence */ - std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0); /** * Create a managed tensor that will be destroyed by this manager From 6fd19b9d05fb2de7fbc545f4f7144266f98c98d1 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 14:11:32 +0000 Subject: [PATCH 15/16] Fixed conflicts --- python/src/docstrings.hpp | 75 +++++++++++++++++++++++++++------------ python/src/main.cpp | 12 ------- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index a5bda0a4d..d4593edb8 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -252,7 +252,11 @@ nrOfTimestamps The maximum number of timestamps to allocate. If zero (default), disables latching of timestamps. @returns Shared pointer with initialised sequence)doc"; -static const char *__doc_kp_Manager_tensor = +static const char *__doc_kp_Manager_tensor = R"doc()doc"; + +static const char *__doc_kp_Manager_tensor_2 = R"doc()doc"; + +static const char *__doc_kp_Manager_tensorT = R"doc(Create a managed tensor that will be destroyed by this manager if it hasn't been destroyed by its reference count going to zero. @@ -679,6 +683,20 @@ across GPUs. Each tensor would have a respective Vulkan memory and buffer, which would be used to store their respective data. The tensors can be used for GPU data storage or transfer.)doc"; +static const char *__doc_kp_TensorT = R"doc()doc"; + +static const char *__doc_kp_TensorT_TensorT = R"doc()doc"; + +static const char *__doc_kp_TensorT_data = R"doc()doc"; + +static const char *__doc_kp_TensorT_dataType = R"doc()doc"; + +static const char *__doc_kp_TensorT_operator_array = R"doc()doc"; + +static const char *__doc_kp_TensorT_setData = R"doc()doc"; + +static const char *__doc_kp_TensorT_vector = R"doc()doc"; + static const char *__doc_kp_Tensor_Tensor = R"doc(Constructor with data provided which would be used to create the respective vulkan buffer and memory. @@ -689,6 +707,18 @@ respective vulkan buffer and memory. tensor @param tensorTypes Type for the tensor which is of type TensorTypes)doc"; +static const char *__doc_kp_Tensor_TensorDataTypes = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eBool = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eDouble = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eFloat = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eInt = R"doc()doc"; + +static const char *__doc_kp_Tensor_TensorDataTypes_eUnsignedInt = R"doc()doc"; + static const char *__doc_kp_Tensor_TensorTypes = R"doc(Type for tensors created: Device allows memory to be transferred from staging buffers. Staging are host memory visible. Storage are device @@ -714,13 +744,14 @@ without exposing it. static const char *__doc_kp_Tensor_createBuffer = R"doc()doc"; -static const char *__doc_kp_Tensor_data = -R"doc(Returns the vector of data currently contained by the Tensor. It is -important to ensure that there is no out-of-sync data with the GPU -memory. +static const char *__doc_kp_Tensor_data = R"doc()doc"; -@return Reference to vector of elements representing the data in the -tensor.)doc"; +static const char *__doc_kp_Tensor_dataType = +R"doc(Retrieve the underlying data type of the Tensor + +@return Data type of tensor of type kp::Tensor::TensorDataTypes)doc"; + +static const char *__doc_kp_Tensor_dataTypeMemorySize = R"doc()doc"; static const char *__doc_kp_Tensor_destroy = R"doc(Destroys and frees the GPU resources which include the buffer and @@ -740,7 +771,9 @@ resources. @returns Boolean stating whether tensor is initialized)doc"; -static const char *__doc_kp_Tensor_mData = R"doc()doc"; +static const char *__doc_kp_Tensor_mDataType = R"doc()doc"; + +static const char *__doc_kp_Tensor_mDataTypeMemorySize = R"doc()doc"; static const char *__doc_kp_Tensor_mDevice = R"doc()doc"; @@ -758,29 +791,21 @@ static const char *__doc_kp_Tensor_mPrimaryBuffer = R"doc()doc"; static const char *__doc_kp_Tensor_mPrimaryMemory = R"doc()doc"; +static const char *__doc_kp_Tensor_mRawData = R"doc()doc"; + +static const char *__doc_kp_Tensor_mSize = R"doc()doc"; + static const char *__doc_kp_Tensor_mStagingBuffer = R"doc()doc"; static const char *__doc_kp_Tensor_mStagingMemory = R"doc()doc"; static const char *__doc_kp_Tensor_mTensorType = R"doc()doc"; -static const char *__doc_kp_Tensor_mapDataFromHostMemory = -R"doc(Maps data from the Host Visible GPU memory into the data vector. It -requires the Tensor to be of staging type for it to work.)doc"; - -static const char *__doc_kp_Tensor_mapDataIntoHostMemory = -R"doc(Maps data from the data vector into the Host Visible GPU memory. It -requires the tensor to be of staging type for it to work.)doc"; +static const char *__doc_kp_Tensor_mapRawData = R"doc()doc"; static const char *__doc_kp_Tensor_memorySize = R"doc()doc"; -static const char *__doc_kp_Tensor_operator_array = -R"doc(Overrides the subscript operator to expose the underlying data's -subscript operator which in this case would be its underlying -vector's. - -@param i The index where the element will be returned from. @return -Returns the element in the position requested.)doc"; +static const char *__doc_kp_Tensor_rawData = R"doc()doc"; static const char *__doc_kp_Tensor_rebuild = R"doc(Function to trigger reinitialisation of the tensor buffer and memory @@ -829,7 +854,7 @@ would only be relevant for kp::Tensors of type eDevice. @param createBarrier Whether to create a barrier that ensures the data is copied before further operations. Default is true.)doc"; -static const char *__doc_kp_Tensor_setData = +static const char *__doc_kp_Tensor_setRawData = R"doc(Sets / resets the vector data of the tensor. This function does not perform any copies into GPU memory and is only performed on the host.)doc"; @@ -844,6 +869,10 @@ R"doc(Retrieve the tensor type of the Tensor @return Tensor type of tensor)doc"; +static const char *__doc_kp_Tensor_unmapRawData = R"doc()doc"; + +static const char *__doc_kp_Tensor_vector = R"doc()doc"; + #if defined(__GNUG__) #pragma GCC diagnostic pop #endif diff --git a/python/src/main.cpp b/python/src/main.cpp index 495d0ed0c..9e065c213 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -131,7 +131,6 @@ PYBIND11_MODULE(kp, m) { DOC(kp, Sequence, evalAwait)) .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); }, DOC(kp, Sequence, evalAwait)) -<<<<<<< HEAD .def("is_recording", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording)) .def("is_running", &kp::Sequence::isRunning, @@ -150,17 +149,6 @@ PYBIND11_MODULE(kp, m) { py::class_>(m, "Manager", DOC(kp, Manager)) .def(py::init(), DOC(kp, Manager, Manager)) .def(py::init(), DOC(kp, Manager, Manager_2)) -======= - .def("is_recording", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording)) - .def("is_running", &kp::Sequence::isRunning, DOC(kp, Sequence, isRunning)) - .def("is_init", &kp::Sequence::isInit, DOC(kp, Sequence, isInit)) - .def("clear", &kp::Sequence::clear, DOC(kp, Sequence, clear)) - .def("destroy", &kp::Sequence::destroy, DOC(kp, Sequence, destroy)); - - py::class_>(m, "Manager") - .def(py::init()) - .def(py::init()) ->>>>>>> cc1a6cc (Updated tests and rebased) .def(py::init&,const std::vector&>(), DOC(kp, Manager, Manager_2), py::arg("device") = 0, From 2e1022410baf5c8971b9c4b97c33f53d2cc31181 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 7 Mar 2021 14:20:31 +0000 Subject: [PATCH 16/16] Updated compile_shader to compileShader --- test/TestSequence.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index 090a6317b..ca3b9a485 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -117,7 +117,7 @@ TEST(TestSequence, SequenceTimestamps) pa[index] = pa[index] + 1; })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); auto seq = mgr.sequence(0, 100); //100 timestamps seq->record({ tensorA })