diff --git a/README.md b/README.md index 8adfd6c93..eb438a473 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,11 @@ int main() { kp::Manager mgr; // Selects device 0 unless explicitly requested // Creates tensor an initializes GPU memory (below we show more granularity) - auto tensorA = mgr.buildTensor({ 3, 4, 5 }); - auto tensorB = mgr.buildTensor({ 0, 0, 0 }); + auto tensorA = std::make_shared(kp::Tensor({ 3., 4., 5. })); + auto tensorB = std::make_shared(kp::Tensor({ 0., 0., 0. })); + + // Create tensors data explicitly in GPU with an operation + mgr.evalOpDefault({ tensorA, tensorB }); // Define your shader as a string (using string literals for simplicity) // (You can also pass the raw compiled bytes, or even path to file) @@ -82,11 +85,13 @@ int main() { )"); // Run Kompute operation on the parameters provided with dispatch layout - mgr.evalOpDefault>( + mgr.evalOpDefault>( { tensorA, tensorB }, - true, // Whether to retrieve the output from GPU memory std::vector(shader.begin(), shader.end())); + // Sync the GPU memory back to the local tensor + mgr.evalOpDefault({ tensorA, tensorB }); + // Prints the output which is A: { 0, 1, 2 } B: { 3, 4, 5 } std::cout << fmt::format("A: {}, B: {}", tensorA.data(), tensorB.data()) << std::endl; @@ -107,7 +112,7 @@ class OpMyCustom : public OpAlgoBase std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, true, "") + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "") { // Perform your custom steps such as reading from a shader file this->mShaderFilePath = "shaders/glsl/opmult.comp"; @@ -144,7 +149,7 @@ int main() { kp::Manager mgr; std::shared_ptr tensorLHS{ new kp::Tensor({ 1., 1., 1. }) }; - std::shared_ptr tensorRHS{ new kp::Tensor( { 2., 2., 2. }) }; + std::shared_ptr tensorRHS{ new kp::Tensor({ 2., 2., 2. }) }; std::shared_ptr tensorOutput{ new kp::Tensor({ 0., 0., 0. }) }; // Create all the tensors in memory @@ -159,17 +164,23 @@ int main() { sq.begin(); // Record batch commands to send to GPU - sq.record>({ tensorLHS, tensorRHS, tensorOutput }); - sq.record({tensorOutput, tensorLHS, tensorRHS}); + sq->record>({ tensorLHS, tensorRHS, tensorOutput }); + sq->record({tensorOutput, tensorLHS, tensorRHS}); // Stop recording - sq.end(); + sq->end(); // Submit multiple batch operations to GPU size_t ITERATIONS = 5; for (size_t i = 0; i < ITERATIONS; i++) { - sq.eval(); + sq->eval(); } + + // Sync GPU memory back to local tensor + sq->begin(); + sq->record({tensorOutput}); + sq->end(); + sq->eval(); } // Print the output which iterates through OpMult 5 times diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index afc873810..f4acff5af 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -971,11 +971,6 @@ namespace kp { * * All of these tensors are expected to be initlaised and this is checked with throw std exception in the init function. * - * It is possible to also choose if the user requires all of the tensors to be - * copied from device memory to their host data. This can be disabled by either - * passing the copyOutputData constructor parameter and/or by overriding the - * functions to carry out copy commands accordingly. - * * See OpLhsRhsOut for an example implementation on a more specific granularity on tensor parameters. * * The template parameters specify the processing GPU layout number of @@ -1000,14 +995,12 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param copyOutputData Whether to map device data for all output tensors back to their host data vectors * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors, - bool copyOutputData); + std::vector>& tensors); /** * Constructor that enables a file to be passed to the operation with @@ -1018,14 +1011,12 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param copyOutputData Whether to map device data for all output tensors back to their host data vectors * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - bool copyOutputData, std::string shaderFilePath); /** @@ -1036,14 +1027,12 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param copyOutputData Whether to map device data for all output tensors back to their host data vectors * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - bool copyOutputData, const std::vector& shaderDataRaw); /** @@ -1090,8 +1079,6 @@ class OpAlgoBase : public OpBase bool mFreeAlgorithm = false; // -------------- ALWAYS OWNED RESOURCES - std::vector> mOutputStagingTensors; ///< Array of output staging tensors which will be expected to be the same size as the number of inputs. - bool mCopyOutputData; ///< Configuration parameter which states whether data will be copied back to all provided tensors for convenience. This can be deactivated by setting this flag and or overriding the functions provided. uint32_t mX; uint32_t mY; @@ -1121,11 +1108,10 @@ template OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors, - bool copyOutputData) + std::vector>& tensors) : OpBase(physicalDevice, device, commandBuffer, tensors, false) { - SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {} copyOutputData: {}, shaderFilePath: {}", tensors.size(), copyOutputData); + SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {} , shaderFilePath: {}", tensors.size()); // The dispatch size is set up based on either explicitly provided template // parameters or by default it would take the shape and size of the tensors @@ -1145,8 +1131,6 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalD this->mY, this->mZ); - this->mCopyOutputData = copyOutputData; - this->mAlgorithm = std::make_shared(device, commandBuffer); } @@ -1155,9 +1139,8 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalD std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - bool copyOutputData, std::string shaderFilePath) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, copyOutputData) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); @@ -1169,9 +1152,8 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalD std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - bool copyOutputData, const std::vector& shaderDataRaw) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, copyOutputData) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); @@ -1182,13 +1164,6 @@ template OpAlgoBase::~OpAlgoBase() { SPDLOG_DEBUG("Kompute OpAlgoBase destructor started"); - - if (this->mCopyOutputData) { - SPDLOG_DEBUG("Kompute OpAlgoBase destroying staging tensors"); - for (std::shared_ptr stagingTensor : this->mOutputStagingTensors) { - stagingTensor->freeMemoryDestroyGPUResources(); - } - } } template @@ -1208,18 +1183,6 @@ OpAlgoBase::init() } } - if (this->mCopyOutputData) { - SPDLOG_DEBUG("Kompute OpAlgoBase creating staging output tensors"); - - for (std::shared_ptr tensor : this->mTensors) { - std::shared_ptr stagingTensor = std::make_shared( - tensor->data(), Tensor::TensorTypes::eStaging); - stagingTensor->init( - this->mPhysicalDevice, this->mDevice); - this->mOutputStagingTensors.push_back(stagingTensor); - } - } - SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); std::vector& shaderFileData = this->fetchSpirvBinaryData(); @@ -1246,27 +1209,6 @@ OpAlgoBase::record() } this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); - - if (this->mCopyOutputData) { - // Barrier to ensure the shader code is executed before buffer read - for (const std::shared_ptr& tensor : this->mTensors) { - tensor->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer); - } - - // Record copy from and create barrier for STAGING tensors - // TODO: This only accounts for device tensors need to account for staging and storage - for (size_t i = 0; i < this->mTensors.size(); i++) { - this->mOutputStagingTensors[i]->recordCopyFrom( - this->mCommandBuffer, - this->mTensors[i], - true); - } - } } template @@ -1281,14 +1223,6 @@ void OpAlgoBase::postEval() { SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); - - if (this->mCopyOutputData) { - for (size_t i = 0; i < this->mTensors.size(); i++) { - this->mOutputStagingTensors[i]->mapDataFromHostMemory(); - - this->mTensors[i]->setData(this->mOutputStagingTensors[i]->data()); - } - } } template @@ -1429,7 +1363,7 @@ OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr // The inheritance is initialised with the copyOutputData to false given that // this depencendant class handles the transfer of data via staging buffers in // a granular way. - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, false) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); } @@ -1575,7 +1509,7 @@ class OpMult : public OpAlgoBase std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, true, "") + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "") { SPDLOG_DEBUG("Kompute OpMult constructor with params"); diff --git a/src/include/kompute/operations/OpAlgoBase.hpp b/src/include/kompute/operations/OpAlgoBase.hpp index 417a05550..03109ec34 100644 --- a/src/include/kompute/operations/OpAlgoBase.hpp +++ b/src/include/kompute/operations/OpAlgoBase.hpp @@ -21,11 +21,6 @@ namespace kp { * * All of these tensors are expected to be initlaised and this is checked with throw std exception in the init function. * - * It is possible to also choose if the user requires all of the tensors to be - * copied from device memory to their host data. This can be disabled by either - * passing the copyOutputData constructor parameter and/or by overriding the - * functions to carry out copy commands accordingly. - * * See OpLhsRhsOut for an example implementation on a more specific granularity on tensor parameters. * * The template parameters specify the processing GPU layout number of @@ -50,14 +45,12 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param copyOutputData Whether to map device data for all output tensors back to their host data vectors * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors, - bool copyOutputData); + std::vector>& tensors); /** * Constructor that enables a file to be passed to the operation with @@ -68,14 +61,12 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param copyOutputData Whether to map device data for all output tensors back to their host data vectors * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - bool copyOutputData, std::string shaderFilePath); /** @@ -86,14 +77,12 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param copyOutputData Whether to map device data for all output tensors back to their host data vectors * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - bool copyOutputData, const std::vector& shaderDataRaw); /** @@ -141,8 +130,6 @@ class OpAlgoBase : public OpBase bool mFreeAlgorithm = false; // -------------- ALWAYS OWNED RESOURCES - std::vector> mOutputStagingTensors; ///< Array of output staging tensors which will be expected to be the same size as the number of inputs. - bool mCopyOutputData; ///< Configuration parameter which states whether data will be copied back to all provided tensors for convenience. This can be deactivated by setting this flag and or overriding the functions provided. uint32_t mX; uint32_t mY; @@ -172,11 +159,10 @@ template OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors, - bool copyOutputData) + std::vector>& tensors) : OpBase(physicalDevice, device, commandBuffer, tensors, false) { - SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {} copyOutputData: {}, shaderFilePath: {}", tensors.size(), copyOutputData); + SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {} , shaderFilePath: {}", tensors.size()); // The dispatch size is set up based on either explicitly provided template // parameters or by default it would take the shape and size of the tensors @@ -196,8 +182,6 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalD this->mY, this->mZ); - this->mCopyOutputData = copyOutputData; - this->mAlgorithm = std::make_shared(device, commandBuffer); } @@ -206,9 +190,8 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalD std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - bool copyOutputData, std::string shaderFilePath) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, copyOutputData) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); @@ -220,9 +203,8 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalD std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - bool copyOutputData, const std::vector& shaderDataRaw) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, copyOutputData) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); @@ -233,13 +215,6 @@ template OpAlgoBase::~OpAlgoBase() { SPDLOG_DEBUG("Kompute OpAlgoBase destructor started"); - - if (this->mCopyOutputData) { - SPDLOG_DEBUG("Kompute OpAlgoBase destroying staging tensors"); - for (std::shared_ptr stagingTensor : this->mOutputStagingTensors) { - stagingTensor->freeMemoryDestroyGPUResources(); - } - } } template @@ -259,18 +234,6 @@ OpAlgoBase::init() } } - if (this->mCopyOutputData) { - SPDLOG_DEBUG("Kompute OpAlgoBase creating staging output tensors"); - - for (std::shared_ptr tensor : this->mTensors) { - std::shared_ptr stagingTensor = std::make_shared( - tensor->data(), Tensor::TensorTypes::eStaging); - stagingTensor->init( - this->mPhysicalDevice, this->mDevice); - this->mOutputStagingTensors.push_back(stagingTensor); - } - } - SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); std::vector& shaderFileData = this->fetchSpirvBinaryData(); @@ -297,27 +260,6 @@ OpAlgoBase::record() } this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); - - if (this->mCopyOutputData) { - // Barrier to ensure the shader code is executed before buffer read - for (const std::shared_ptr& tensor : this->mTensors) { - tensor->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer); - } - - // Record copy from and create barrier for STAGING tensors - // TODO: This only accounts for device tensors need to account for staging and storage - for (size_t i = 0; i < this->mTensors.size(); i++) { - this->mOutputStagingTensors[i]->recordCopyFrom( - this->mCommandBuffer, - this->mTensors[i], - true); - } - } } template @@ -332,14 +274,6 @@ void OpAlgoBase::postEval() { SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); - - if (this->mCopyOutputData) { - for (size_t i = 0; i < this->mTensors.size(); i++) { - this->mOutputStagingTensors[i]->mapDataFromHostMemory(); - - this->mTensors[i]->setData(this->mOutputStagingTensors[i]->data()); - } - } } template diff --git a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp index 1f9605878..e513bb820 100644 --- a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp +++ b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp @@ -107,7 +107,7 @@ OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr // The inheritance is initialised with the copyOutputData to false given that // this depencendant class handles the transfer of data via staging buffers in // a granular way. - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, false) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); } diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index 5d51286a7..ba3cb21a0 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -47,7 +47,7 @@ class OpMult : public OpAlgoBase std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, true, "") + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "") { SPDLOG_DEBUG("Kompute OpMult constructor with params"); diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 603a49c7d..ae746c68f 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -48,7 +48,6 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) { sq->record>( params, - false, // Whether to copy output from device "test/shaders/glsl/test_logistic_regression.comp"); sq->record({wOutI, wOutJ, bOut}); @@ -125,9 +124,10 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) { sq->record>( params, - true, // Whether to copy output from device "test/shaders/glsl/test_logistic_regression.comp"); + sq->record({wOutI, wOutJ, bOut}); + sq->end(); // Iterate across all expected iterations diff --git a/test/TestManager.cpp b/test/TestManager.cpp index 666ff3978..6d4690ef7 100755 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -10,16 +10,17 @@ TEST(TestManager, EndToEndOpMultFlow) std::shared_ptr tensorLHS{ new kp::Tensor({ 0, 1, 2 }) }; mgr.evalOp({ tensorLHS }); - std::shared_ptr tensorRHS{ new kp::Tensor( - { 2, 4, 6 }) }; + std::shared_ptr tensorRHS{ new kp::Tensor( { 2, 4, 6 }) }; mgr.evalOp({ tensorRHS }); - std::shared_ptr tensorOutput{ new kp::Tensor( - { 0, 0, 0 }) }; + std::shared_ptr tensorOutput{ new kp::Tensor( { 0, 0, 0 }) }; + mgr.evalOp({ tensorOutput }); mgr.evalOp>({ tensorLHS, tensorRHS, tensorOutput }); + mgr.evalOp({ tensorOutput }); + EXPECT_EQ(tensorOutput->data(), std::vector({0, 4, 12})); } @@ -46,6 +47,8 @@ TEST(TestManager, OpMultSequenceFlow) { sq->record>({ tensorLHS, tensorRHS, tensorOutput }); + sq->record({ tensorOutput }); + sq->end(); sq->eval(); } @@ -100,6 +103,8 @@ TEST(TestManager, TestMultipleTensorsAtOnce) { sq->record>({ tensorLHS, tensorRHS, tensorOutput }); + sq->record({ tensorOutput }); + sq->end(); sq->eval(); } diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index df381b97a..e6ab0c6fa 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -27,17 +27,16 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) { sq->record>( { tensorA }, - false, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->record>( { tensorA }, - false, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->record>( { tensorA }, - true, // Whether to copy output from device std::vector(shader.begin(), shader.end())); + sq->record({ tensorA }); + sq->end(); sq->eval(); } @@ -70,7 +69,6 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) { sq->record>( { tensorA }, - false, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->end(); @@ -80,7 +78,6 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) { sq->record>( { tensorA }, - false, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->end(); @@ -90,11 +87,18 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) { sq->record>( { tensorA }, - true, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->end(); sq->eval(); + + sq->begin(); + + sq->record( + { tensorA }); + + sq->end(); + sq->eval(); } sqWeakPtr.reset(); @@ -126,7 +130,6 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) { sq->record>( { tensorA }, - true, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->end(); @@ -134,12 +137,11 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) { } std::weak_ptr sqWeakPtr2 = mgr.getOrCreateManagedSequence("newSequence2"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + if (std::shared_ptr sq = sqWeakPtr2.lock()) { sq->begin(); sq->record>( { tensorA }, - true, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->end(); @@ -148,18 +150,28 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) { std::weak_ptr sqWeakPtr3 = mgr.getOrCreateManagedSequence("newSequence3"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + if (std::shared_ptr sq = sqWeakPtr3.lock()) { sq->begin(); sq->record>( { tensorA }, - true, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->end(); sq->eval(); } + std::weak_ptr sqWeakPtr4 = mgr.getOrCreateManagedSequence("newSequence5"); + if (std::shared_ptr sq = sqWeakPtr4.lock()) { + sq->begin(); + + sq->record( + { tensorA }); + + sq->end(); + sq->eval(); + } + EXPECT_EQ(tensorA->data(), std::vector({3, 3, 3})); } @@ -190,12 +202,11 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) { } std::weak_ptr sqWeakPtr2 = mgr.getOrCreateManagedSequence("newSequence2"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + if (std::shared_ptr sq = sqWeakPtr2.lock()) { sq->begin(); sq->record>( { tensorA }, - true, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->end(); @@ -205,6 +216,20 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) { sq->eval(); } + std::weak_ptr sqWeakPtr3 = mgr.getOrCreateManagedSequence("newSequence3"); + if (std::shared_ptr sq = sqWeakPtr2.lock()) { + sq->begin(); + + sq->record( + { tensorA }); + + sq->end(); + + sq->eval(); + sq->eval(); + sq->eval(); + } + EXPECT_EQ(tensorA->data(), std::vector({3, 3, 3})); } diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index 218eddb9d..9370686f3 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -50,7 +50,6 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) { sq->record>( { tensorA, tensorB }, - true, // Whether to copy output from device std::vector(shader.begin(), shader.end())); sq->record({tensorB, tensorA}); diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 92d2c50e0..f8d927b75 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -29,9 +29,10 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) { mgr.evalOpDefault>( { tensorA, tensorB }, - true, // Whether to copy output from device std::vector(shader.begin(), shader.end())); + mgr.evalOpDefault({tensorA, tensorB}); + EXPECT_EQ(tensorA->data(), std::vector({0, 1, 2})); EXPECT_EQ(tensorB->data(), std::vector({3, 4, 5})); } @@ -45,12 +46,13 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) { mgr.evalOpDefault>( { tensorA, tensorB }, - true, // Whether to copy output from device std::vector( kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv, kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv + kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv_len)); + mgr.evalOpDefault({tensorA, tensorB}); + EXPECT_EQ(tensorA->data(), std::vector({0, 1, 2})); EXPECT_EQ(tensorB->data(), std::vector({3, 4, 5})); } @@ -64,9 +66,10 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile) { mgr.evalOpDefault>( { tensorA, tensorB }, - true, // Whether to copy output from device "test/shaders/glsl/test_op_custom_shader.comp"); + mgr.evalOpDefault({tensorA, tensorB}); + EXPECT_EQ(tensorA->data(), std::vector({0, 1, 2})); EXPECT_EQ(tensorB->data(), std::vector({3, 4, 5})); } @@ -80,9 +83,10 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) { mgr.evalOpDefault>( { tensorA, tensorB }, - true, // Whether to copy output from device "test/shaders/glsl/test_op_custom_shader.comp.spv"); + mgr.evalOpDefault({tensorA, tensorB}); + EXPECT_EQ(tensorA->data(), std::vector({0, 1, 2})); EXPECT_EQ(tensorB->data(), std::vector({3, 4, 5})); }