From 9f74679dd5250e9a3b4b587a6b7a480501456a79 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 22 Aug 2020 20:13:30 +0100 Subject: [PATCH] Fully working end to end flow --- README.md | 27 --------------------------- shaders/glsl/opmult.comp | 2 ++ shaders/glsl/opmult.comp.spv | Bin 1308 -> 1500 bytes src/Algorithm.cpp | 26 ++++++++++++++++++++------ src/Manager.cpp | 6 ++++++ src/Manager.hpp | 3 ++- src/OpMult.cpp | 12 +++++++++++- src/Sequence.hpp | 4 ++-- src/Tensor.cpp | 4 ++++ src/main.cpp | 14 ++++++++++---- 10 files changed, 57 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index ddc03cd09..89556399f 100644 --- a/README.md +++ b/README.md @@ -88,33 +88,6 @@ int main() { } ``` -Use equations to group operations on memory and execution step - -```c++ -int main() { - kp::Manager kManager(); // Chooses device 0 - - kp::Sequence sq; - kManager.createSequence(&sq); - - sq.begin(); - - kp::Tensor inputOne; - sq.record(&inputOne, {0, 1, 2, 3}); // Mounts to device and binds to 0 - - kp::Tensor inputTwo; - sq.record(&inputTwo, {0, 1, 2, 3}); // Mounts to device and binds to 1 - - kp::Tensor output; - sq.record(&inputOne, &inputTwo, &output); - - sq.end(); - sq.eval(); - - std::cout << output << std::endl; -} -``` - ## Development diff --git a/shaders/glsl/opmult.comp b/shaders/glsl/opmult.comp index e72461f95..2da50470d 100644 --- a/shaders/glsl/opmult.comp +++ b/shaders/glsl/opmult.comp @@ -22,6 +22,8 @@ void main() //valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; // FOR TESTING valuesOutput[index] = valuesLhs[index] + valuesRhs[index]; + valuesLhs[index] = 100 + index; + valuesRhs[index] = 100 + index; } diff --git a/shaders/glsl/opmult.comp.spv b/shaders/glsl/opmult.comp.spv index fb062d53bc82a761196ec0b193fa7f59e1a5dee0..f4d86116b9eeaf400e1858e67e94dfc3f574a21d 100755 GIT binary patch delta 266 zcmYk1O$x#=6ols`{#2z)5yX{l)V6BtMi&a6Mfc(f^dMeBdL6G)aHb?6;p1f{uOIY1 z{EkxZ7Ja~w@a#Y7)mc?P+}(;5fE&rsL65T}MslVBXB9g2W!?CRJx^hE$toc+uS0&J z+p02V>D6D@%s6v0C1axR&+u)h-RLQJ|MzY}4&EcSEF{)L6z_PS6Nx2|SeP?@+LDeYHluM%4d|-U;qg-Fz7I_F>nHDO&|t|f$?Tu O7I8+7zd!{RKnws>bO>& tensorParams) this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, this->mDescriptorSet.get()); - std::vector descriptorBufferInfos; - for (size_t i = 0; i < tensorParams.size(); i++) { - descriptorBufferInfos.push_back(tensorParams[i]->constructDescriptorBufferInfo()); - } + ////std::vector descriptorBufferInfos; + ////for (size_t i = 0; i < tensorParams.size(); i++) { + //// descriptorBufferInfos.push_back(tensorParams[i]->constructDescriptorBufferInfo()); + ////} + ////std::vector computeWriteDescriptorSets; + + ////computeWriteDescriptorSets.push_back( + //// vk::WriteDescriptorSet(*this->mDescriptorSet, + //// 0, // Destination binding + //// 0, // Destination array element + //// 1, // Descriptor count + //// vk::DescriptorType::eStorageBuffer, + //// nullptr, // Descriptor image info + //// descriptorBufferInfos.data() + //// )); + // TODO: Explore design exposing the destination array element - std::vector computeWriteDescriptorSets; for (size_t i = 0; i < tensorParams.size(); i++) { + std::vector computeWriteDescriptorSets; vk::DescriptorBufferInfo descriptorBufferInfo = tensorParams[i]->constructDescriptorBufferInfo(); @@ -123,10 +135,12 @@ Algorithm::createParameters(std::vector>& tensorParams) vk::DescriptorType::eStorageBuffer, nullptr, // Descriptor image info &descriptorBufferInfo)); + + this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); } SPDLOG_DEBUG("Kompute Algorithm updating descriptor sets"); - this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); + //this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); SPDLOG_DEBUG("Kompue Algorithm successfully run init"); } diff --git a/src/Manager.cpp b/src/Manager.cpp index ffcc98e62..0e85e658b 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -65,6 +65,12 @@ Manager::~Manager() } } +Sequence +Manager::constructSequence() { + SPDLOG_DEBUG("Kompute Manager creating Sequence object"); + return Sequence(this->mPhysicalDevice, this->mDevice, this->mComputeQueue, this->mComputeQueueFamilyIndex); +} + void Manager::createInstance() { diff --git a/src/Manager.hpp b/src/Manager.hpp index 8b94f3f82..4d2acf51f 100644 --- a/src/Manager.hpp +++ b/src/Manager.hpp @@ -26,7 +26,8 @@ class Manager ~Manager(); - // Evaluate actions + Sequence constructSequence(); + template void evalOp(std::vector> tensors) { diff --git a/src/OpMult.cpp b/src/OpMult.cpp index 65c8fbc80..fb80e39d6 100644 --- a/src/OpMult.cpp +++ b/src/OpMult.cpp @@ -123,6 +123,11 @@ OpMult::record() this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); // Barrier to ensure the shader code is executed before buffer read + this->mTensorLHS->recordBufferMemoryBarrier( + vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer); this->mTensorOutput->recordBufferMemoryBarrier( vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eTransferRead, @@ -132,6 +137,11 @@ OpMult::record() this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput); // Buffer to ensure wait until data is copied to staging buffer + this->mTensorLHS->recordBufferMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, + vk::AccessFlagBits::eHostRead, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eHost); this->mTensorOutput->recordBufferMemoryBarrier( vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eHostRead, @@ -143,7 +153,7 @@ template void OpMult::postSubmit() { - SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called"); + SPDLOG_DEBUG("Kompute OpMult postSubmit called"); this->mTensorOutputStaging->mapDataFromHostMemory(); diff --git a/src/Sequence.hpp b/src/Sequence.hpp index 47f051671..3d46eb9cd 100644 --- a/src/Sequence.hpp +++ b/src/Sequence.hpp @@ -31,7 +31,7 @@ class Sequence // TODO: Explore design without template using just top level class template - void record(TArgs&&... args) + void record(std::vector> tensors) { static_assert(std::is_base_of::value, "Template only valid with OpBase derived classes"); @@ -46,7 +46,7 @@ class Sequence std::unique_ptr baseOpPtr{ baseOp }; SPDLOG_DEBUG("Kompute Sequence running init on OpBase derived class instance"); - baseOpPtr->init(std::forward(args)...); + baseOpPtr->init(tensors); SPDLOG_DEBUG("Kompute Sequence running record on OpBase derived class instance"); baseOpPtr->record(); diff --git a/src/Tensor.cpp b/src/Tensor.cpp index d24826314..3c42e6cff 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -211,6 +211,10 @@ Tensor::mapDataIntoHostMemory() void* mapped = this->mDevice->mapMemory( *this->mMemory, 0, bufferSize, vk::MemoryMapFlags()); memcpy(mapped, this->mData.data(), bufferSize); + this->mDevice->unmapMemory(*this->mMemory); + + mapped = this->mDevice->mapMemory( + *this->mMemory, 0, bufferSize, vk::MemoryMapFlags()); vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize); this->mDevice->flushMappedMemoryRanges(1, &mappedRange); this->mDevice->unmapMemory(*this->mMemory); diff --git a/src/main.cpp b/src/main.cpp index 07aebc90f..f576b6436 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -623,22 +623,25 @@ main() spdlog::info("Creating manager"); kp::Manager mgr; + kp::Sequence sq = mgr.constructSequence(); + sq.begin(); spdlog::info("Creating first tensor"); std::shared_ptr tensorLHS{ new kp::Tensor( { 0.0, 1.0, 2.0 }) }; - mgr.evalOp({ tensorLHS }); spdlog::info("Creating second tensor"); std::shared_ptr tensorRHS{ new kp::Tensor( { 2.0, 4.0, 6.0 }) }; - mgr.evalOp({ tensorRHS }); // TODO: Add capabilities for just output tensor types spdlog::info("Creating output tensor"); std::shared_ptr tensorOutput{ new kp::Tensor( { 0.0, 0.0, 0.0 }) }; - mgr.evalOp({ tensorOutput }); + + sq.record({ tensorLHS }); + sq.record({ tensorRHS }); + sq.record({ tensorOutput }); spdlog::info("OpCreateTensor success for tensors"); spdlog::info("Tensor one: {}", tensorLHS->data()); @@ -646,7 +649,10 @@ main() spdlog::info("Tensor output: {}", tensorOutput->data()); spdlog::info("Calling op mult"); - mgr.evalOp>({ tensorLHS, tensorRHS, tensorOutput }); + sq.record>({ tensorLHS, tensorRHS, tensorOutput }); + + sq.end(); + sq.eval(); spdlog::info("OpMult call success"); spdlog::info("Tensor output: {}", tensorOutput->data());