diff --git a/README.md b/README.md index ddc03cd09..89556399f 100644 --- a/README.md +++ b/README.md @@ -88,33 +88,6 @@ int main() { } ``` -Use equations to group operations on memory and execution step - -```c++ -int main() { - kp::Manager kManager(); // Chooses device 0 - - kp::Sequence sq; - kManager.createSequence(&sq); - - sq.begin(); - - kp::Tensor inputOne; - sq.record(&inputOne, {0, 1, 2, 3}); // Mounts to device and binds to 0 - - kp::Tensor inputTwo; - sq.record(&inputTwo, {0, 1, 2, 3}); // Mounts to device and binds to 1 - - kp::Tensor output; - sq.record(&inputOne, &inputTwo, &output); - - sq.end(); - sq.eval(); - - std::cout << output << std::endl; -} -``` - ## Development diff --git a/shaders/glsl/opmult.comp b/shaders/glsl/opmult.comp index e72461f95..2da50470d 100644 --- a/shaders/glsl/opmult.comp +++ b/shaders/glsl/opmult.comp @@ -22,6 +22,8 @@ void main() //valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; // FOR TESTING valuesOutput[index] = valuesLhs[index] + valuesRhs[index]; + valuesLhs[index] = 100 + index; + valuesRhs[index] = 100 + index; } diff --git a/shaders/glsl/opmult.comp.spv b/shaders/glsl/opmult.comp.spv index fb062d53b..f4d86116b 100755 Binary files a/shaders/glsl/opmult.comp.spv and b/shaders/glsl/opmult.comp.spv differ diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 48c211a50..3920b08f6 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -103,13 +103,25 @@ Algorithm::createParameters(std::vector>& tensorParams) this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, this->mDescriptorSet.get()); - std::vector descriptorBufferInfos; - for (size_t i = 0; i < tensorParams.size(); i++) { - descriptorBufferInfos.push_back(tensorParams[i]->constructDescriptorBufferInfo()); - } + ////std::vector descriptorBufferInfos; + ////for (size_t i = 0; i < tensorParams.size(); i++) { + //// descriptorBufferInfos.push_back(tensorParams[i]->constructDescriptorBufferInfo()); + ////} + ////std::vector computeWriteDescriptorSets; + + ////computeWriteDescriptorSets.push_back( + //// vk::WriteDescriptorSet(*this->mDescriptorSet, + //// 0, // Destination binding + //// 0, // Destination array element + //// 1, // Descriptor count + //// vk::DescriptorType::eStorageBuffer, + //// nullptr, // Descriptor image info + //// descriptorBufferInfos.data() + //// )); + // TODO: Explore design exposing the destination array element - std::vector computeWriteDescriptorSets; for (size_t i = 0; i < tensorParams.size(); i++) { + std::vector computeWriteDescriptorSets; vk::DescriptorBufferInfo descriptorBufferInfo = tensorParams[i]->constructDescriptorBufferInfo(); @@ -123,10 +135,12 @@ Algorithm::createParameters(std::vector>& tensorParams) vk::DescriptorType::eStorageBuffer, nullptr, // Descriptor image info &descriptorBufferInfo)); + + this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); } SPDLOG_DEBUG("Kompute Algorithm updating descriptor sets"); - this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); + //this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); SPDLOG_DEBUG("Kompue Algorithm successfully run init"); } diff --git a/src/Manager.cpp b/src/Manager.cpp index ffcc98e62..0e85e658b 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -65,6 +65,12 @@ Manager::~Manager() } } +Sequence +Manager::constructSequence() { + SPDLOG_DEBUG("Kompute Manager creating Sequence object"); + return Sequence(this->mPhysicalDevice, this->mDevice, this->mComputeQueue, this->mComputeQueueFamilyIndex); +} + void Manager::createInstance() { diff --git a/src/Manager.hpp b/src/Manager.hpp index 8b94f3f82..4d2acf51f 100644 --- a/src/Manager.hpp +++ b/src/Manager.hpp @@ -26,7 +26,8 @@ class Manager ~Manager(); - // Evaluate actions + Sequence constructSequence(); + template void evalOp(std::vector> tensors) { diff --git a/src/OpMult.cpp b/src/OpMult.cpp index 65c8fbc80..fb80e39d6 100644 --- a/src/OpMult.cpp +++ b/src/OpMult.cpp @@ -123,6 +123,11 @@ OpMult::record() this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); // Barrier to ensure the shader code is executed before buffer read + this->mTensorLHS->recordBufferMemoryBarrier( + vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer); this->mTensorOutput->recordBufferMemoryBarrier( vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eTransferRead, @@ -132,6 +137,11 @@ OpMult::record() this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput); // Buffer to ensure wait until data is copied to staging buffer + this->mTensorLHS->recordBufferMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, + vk::AccessFlagBits::eHostRead, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eHost); this->mTensorOutput->recordBufferMemoryBarrier( vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eHostRead, @@ -143,7 +153,7 @@ template void OpMult::postSubmit() { - SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called"); + SPDLOG_DEBUG("Kompute OpMult postSubmit called"); this->mTensorOutputStaging->mapDataFromHostMemory(); diff --git a/src/Sequence.hpp b/src/Sequence.hpp index 47f051671..3d46eb9cd 100644 --- a/src/Sequence.hpp +++ b/src/Sequence.hpp @@ -31,7 +31,7 @@ class Sequence // TODO: Explore design without template using just top level class template - void record(TArgs&&... args) + void record(std::vector> tensors) { static_assert(std::is_base_of::value, "Template only valid with OpBase derived classes"); @@ -46,7 +46,7 @@ class Sequence std::unique_ptr baseOpPtr{ baseOp }; SPDLOG_DEBUG("Kompute Sequence running init on OpBase derived class instance"); - baseOpPtr->init(std::forward(args)...); + baseOpPtr->init(tensors); SPDLOG_DEBUG("Kompute Sequence running record on OpBase derived class instance"); baseOpPtr->record(); diff --git a/src/Tensor.cpp b/src/Tensor.cpp index d24826314..3c42e6cff 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -211,6 +211,10 @@ Tensor::mapDataIntoHostMemory() void* mapped = this->mDevice->mapMemory( *this->mMemory, 0, bufferSize, vk::MemoryMapFlags()); memcpy(mapped, this->mData.data(), bufferSize); + this->mDevice->unmapMemory(*this->mMemory); + + mapped = this->mDevice->mapMemory( + *this->mMemory, 0, bufferSize, vk::MemoryMapFlags()); vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize); this->mDevice->flushMappedMemoryRanges(1, &mappedRange); this->mDevice->unmapMemory(*this->mMemory); diff --git a/src/main.cpp b/src/main.cpp index 07aebc90f..f576b6436 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -623,22 +623,25 @@ main() spdlog::info("Creating manager"); kp::Manager mgr; + kp::Sequence sq = mgr.constructSequence(); + sq.begin(); spdlog::info("Creating first tensor"); std::shared_ptr tensorLHS{ new kp::Tensor( { 0.0, 1.0, 2.0 }) }; - mgr.evalOp({ tensorLHS }); spdlog::info("Creating second tensor"); std::shared_ptr tensorRHS{ new kp::Tensor( { 2.0, 4.0, 6.0 }) }; - mgr.evalOp({ tensorRHS }); // TODO: Add capabilities for just output tensor types spdlog::info("Creating output tensor"); std::shared_ptr tensorOutput{ new kp::Tensor( { 0.0, 0.0, 0.0 }) }; - mgr.evalOp({ tensorOutput }); + + sq.record({ tensorLHS }); + sq.record({ tensorRHS }); + sq.record({ tensorOutput }); spdlog::info("OpCreateTensor success for tensors"); spdlog::info("Tensor one: {}", tensorLHS->data()); @@ -646,7 +649,10 @@ main() spdlog::info("Tensor output: {}", tensorOutput->data()); spdlog::info("Calling op mult"); - mgr.evalOp>({ tensorLHS, tensorRHS, tensorOutput }); + sq.record>({ tensorLHS, tensorRHS, tensorOutput }); + + sq.end(); + sq.eval(); spdlog::info("OpMult call success"); spdlog::info("Tensor output: {}", tensorOutput->data());