From e047aa3b43059b20f66845e868f5ef52060f854c Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 22 Aug 2020 18:08:56 +0100 Subject: [PATCH] Updated tensor to copy to memory explicitly, assessing why copy doesn't last through operations --- shaders/glsl/opmult.comp | 15 ++++-------- shaders/glsl/opmult.comp.spv | Bin 1692 -> 1308 bytes src/Algorithm.cpp | 4 ++++ src/Algorithm.hpp | 5 +++- src/OpCreateTensor.cpp | 11 ++++++--- src/OpMult.cpp | 45 ++++++++++------------------------- src/Sequence.cpp | 8 +++---- src/Sequence.hpp | 6 ++++- src/Tensor.cpp | 41 ++++++++++++++++--------------- src/Tensor.hpp | 9 ++++--- 10 files changed, 65 insertions(+), 79 deletions(-) diff --git a/shaders/glsl/opmult.comp b/shaders/glsl/opmult.comp index 109b48cdd..e72461f95 100644 --- a/shaders/glsl/opmult.comp +++ b/shaders/glsl/opmult.comp @@ -1,21 +1,17 @@ #version 450 -layout(binding = 0) buffer tensorLhs { +layout(set = 0, binding = 0) buffer tensorLhs { uint valuesLhs[ ]; }; -layout(binding = 1) buffer tensorRhs { +layout(set = 0, binding = 1) buffer tensorRhs { uint valuesRhs[ ]; }; -layout(binding = 2) buffer tensorOutput { +layout(set = 0, binding = 2) buffer tensorOutput { uint valuesOutput[ ]; }; -layout(binding = 3) buffer tensorInvalid { - uint valuesInvalid[ ]; -}; - // TODO: Explore how to make layout inside shader dynamic layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; @@ -25,10 +21,7 @@ void main() //valuesOutput[index] = valuesLhs[index] * valuesRhs[index]; // FOR TESTING - valuesOutput[index] = 100 + index; - valuesRhs[index] = 100 + index; - valuesLhs[index] = 100 + index; - valuesInvalid[index] = 100 + index; + valuesOutput[index] = valuesLhs[index] + valuesRhs[index]; } diff --git a/shaders/glsl/opmult.comp.spv b/shaders/glsl/opmult.comp.spv index 69ffb1c8ba52d2f3e09ae7142c04a77f7715686b..fb062d53bc82a761196ec0b193fa7f59e1a5dee0 100755 GIT binary patch literal 1308 zcmYk5Yikoh6o#jXn^vp6SZlqtZkl?R;s+H$6beEt1O)v75^c~xx)HM}e)PBbtNbGP zJTn>HSq|sC?>Xn4nX|j?R&P0krLe;DFr>8^S}-B3hEDYT!H2=&tUft>`s|4nYoQ$r zty!n0z@NiThvUk`Dz*VHw~1MotxazW{0pL>qS+^2Avdm0MnCWcNUblEkNwH?({OTJ zolQRv>+!TYeiQSZs9%q&+4TFndHro(L$mX8P@_THxeEDO5)V_{$HP*EA=JbzWqdpKKJuJ+AU1Hf$w0>;VsI}%hgMcns(%z zxb>dBsVDAY&eZ%4zNwe*($tg7dfut2Csxm;-fetSuQb7G>XqQEr&d$XSUp~M)O&#U z{@1ZBPTj(H>9*f^n?2pH#9Sqq4>X$m4){FY*dFs2@aoIITwwQ*djz)jBIdIG65gEr zE-LvQ>=8SEv2~a6o=Hw!vGdO7yh2=JYPpYi8*{0f--Wu)T&%8lS3^!+v2&L*@3&!` z&zIkUIwic=x-LHTBpvbog3I^54_?9^qvm{^zg)KGd~<5}{kfd!Zy5h+&*tp?C-q0L z&Tb8JzRvU&HJ)=FlYfnZ_2QhnN-UT0%>_Q9)4T3s-rw1o%XNFi<}&UPtCRZrPWq5QR@EEhx%GM7#k7yn}cHVq!EQF+q|hCi(!T3ehIDHMS-G^x1qW|CsoF zXP<$+PbM>K&CJ@TGcD97Hm8cBR!sBk7wIe%6EHaxsdE`8)H}UqZCA#dpOPu=JXMJ*>)R%bs?kAeQX7QLf_3>dkz3wsC_m%gg-t(OO zPg1HlWIoPAyN!wG@lDJ*zT*bx?b%F@ntI8(G3)*88)vzM*TL?@yt=tw%p%v53q5C& z>xtEK-|F4L=Xx=tTu(0aoK>zTR?nl}Ha^#jx#fCtq37&!J+XR`-y?ROH?T#f;2hS; znm1nJPVZ}Bo)(xNOlk7V;0t(Td(2G$c61z{a zbyx7dlbpI@_ZjbbmAHkeuzh|D)=y#{ zck>sffmhQV`nN`Z0P7BBG56$dUQ^>-<}mp;C|EC!+%;mkz}GAM8$EmdPC0M+0~qIh z3v*}g#eQ|=tPPzOajv^w;n3Xxt1BP6o5b!!-4>>EESmDrp1Xfo*bhn7z pnY#9?D`#!!`cEO(eOBSneGXPvK6GCYyHj=TS69y3e{A+8_8%{0V7CAO diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 571d25252..48c211a50 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -41,6 +41,10 @@ Algorithm::init(std::string shaderFilePath, this->createPipeline(); } +void Algorithm::createDescriptorPool() { + +} + void Algorithm::createParameters(std::vector>& tensorParams) { diff --git a/src/Algorithm.hpp b/src/Algorithm.hpp index 103960a27..934e9ba53 100644 --- a/src/Algorithm.hpp +++ b/src/Algorithm.hpp @@ -55,9 +55,12 @@ class Algorithm bool mFreePipeline = false; // Create util functions - void createParameters(std::vector>& tensorParams); void createShaderModule(std::string shaderFilePath); void createPipeline(); + // Parameters + void createParameters(std::vector>& tensorParams); + void createDescriptorPool(); + }; } // End namespace kp diff --git a/src/OpCreateTensor.cpp b/src/OpCreateTensor.cpp index 63d32f9ca..4362ffb64 100644 --- a/src/OpCreateTensor.cpp +++ b/src/OpCreateTensor.cpp @@ -37,7 +37,6 @@ OpCreateTensor::init(std::vector> tensors) } this->mPrimaryTensor = tensors[0]; - std::vector data = this->mPrimaryTensor->data(); if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) { this->mPrimaryTensor->init( @@ -47,11 +46,13 @@ OpCreateTensor::init(std::vector> tensors) this->mPrimaryTensor->data(), Tensor::TensorTypes::eStaging); this->mStagingTensor->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, data); + this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); + + this->mStagingTensor->mapDataIntoHostMemory(); } else { this->mPrimaryTensor->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, data); + this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); } } @@ -69,6 +70,10 @@ void OpCreateTensor::postSubmit() { SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called"); + + this->mStagingTensor->mapDataFromHostMemory(); + + this->mPrimaryTensor->setData(this->mStagingTensor->data()); } } diff --git a/src/OpMult.cpp b/src/OpMult.cpp index 0fa47845b..65c8fbc80 100644 --- a/src/OpMult.cpp +++ b/src/OpMult.cpp @@ -96,8 +96,7 @@ OpMult::init(std::vector> tensors) this->mTensorOutputStaging->init(this->mPhysicalDevice, this->mDevice, - this->mCommandBuffer, - this->mTensorOutput->data()); + this->mCommandBuffer); // TODO: Make this path configurable this->mAlgorithm->init("shaders/glsl/opmult.comp.spv", tensors); @@ -110,30 +109,20 @@ OpMult::record() SPDLOG_DEBUG("Kompute OpMult record called"); // Barrier to ensure the data is finished writing to buffer memory - //this->mTensorLHS->recordBufferMemoryBarrier( - // vk::AccessFlagBits::eHostWrite, - // vk::AccessFlagBits::eShaderRead, - // vk::PipelineStageFlagBits::eHost, - // vk::PipelineStageFlagBits::eComputeShader); - //this->mTensorRHS->recordBufferMemoryBarrier( - // vk::AccessFlagBits::eHostWrite, - // vk::AccessFlagBits::eShaderRead, - // vk::PipelineStageFlagBits::eHost, - // vk::PipelineStageFlagBits::eComputeShader); + this->mTensorLHS->recordBufferMemoryBarrier( + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + this->mTensorRHS->recordBufferMemoryBarrier( + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); // Barrier to ensure the shader code is executed before buffer read - //this->mTensorLHS->recordBufferMemoryBarrier( - // vk::AccessFlagBits::eShaderWrite, - // vk::AccessFlagBits::eTransferRead, - // vk::PipelineStageFlagBits::eComputeShader, - // vk::PipelineStageFlagBits::eTransfer); - //this->mTensorRHS->recordBufferMemoryBarrier( - // vk::AccessFlagBits::eShaderWrite, - // vk::AccessFlagBits::eTransferRead, - // vk::PipelineStageFlagBits::eComputeShader, - // vk::PipelineStageFlagBits::eTransfer); this->mTensorOutput->recordBufferMemoryBarrier( vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eTransferRead, @@ -148,16 +137,6 @@ OpMult::record() vk::AccessFlagBits::eHostRead, vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eHost); - //this->mTensorLHS->recordBufferMemoryBarrier( - // vk::AccessFlagBits::eTransferWrite, - // vk::AccessFlagBits::eHostRead, - // vk::PipelineStageFlagBits::eTransfer, - // vk::PipelineStageFlagBits::eHost); - //this->mTensorRHS->recordBufferMemoryBarrier( - // vk::AccessFlagBits::eTransferWrite, - // vk::AccessFlagBits::eHostRead, - // vk::PipelineStageFlagBits::eTransfer, - // vk::PipelineStageFlagBits::eHost); } template @@ -166,7 +145,7 @@ OpMult::postSubmit() { SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called"); - this->mTensorOutputStaging->copyDataFromHostBuffer(); + this->mTensorOutputStaging->mapDataFromHostMemory(); this->mTensorOutput->setData(this->mTensorOutputStaging->data()); } diff --git a/src/Sequence.cpp b/src/Sequence.cpp index a06797fc6..6cd06c9ac 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -43,7 +43,7 @@ Sequence::~Sequence() } this->mDevice->freeCommandBuffers( *this->mCommandPool, 1, this->mCommandBuffer.get()); - SPDLOG_DEBUG("Kompute Manager Freed CommandBuffer"); + SPDLOG_DEBUG("Kompute Sequence Freed CommandBuffer"); } if (this->mFreeCommandPool) { @@ -54,7 +54,7 @@ Sequence::~Sequence() return; } this->mDevice->destroy(*this->mCommandPool); - SPDLOG_DEBUG("Kompute Manager Destroyed CommandPool"); + SPDLOG_DEBUG("Kompute Sequence Destroyed CommandPool"); } } @@ -139,7 +139,7 @@ Sequence::createCommandPool() this->mCommandPool = std::make_shared(); this->mDevice->createCommandPool( &commandPoolInfo, nullptr, this->mCommandPool.get()); - SPDLOG_DEBUG("Kompute Manager Command Pool Created"); + SPDLOG_DEBUG("Kompute Sequence Command Pool Created"); } void @@ -161,7 +161,7 @@ Sequence::createCommandBuffer() this->mCommandBuffer = std::make_shared(); this->mDevice->allocateCommandBuffers(&commandBufferAllocateInfo, this->mCommandBuffer.get()); - SPDLOG_DEBUG("Kompute Manager Command Buffer Created"); + SPDLOG_DEBUG("Kompute Sequence Command Buffer Created"); } } diff --git a/src/Sequence.hpp b/src/Sequence.hpp index 8eeec6aff..47f051671 100644 --- a/src/Sequence.hpp +++ b/src/Sequence.hpp @@ -36,15 +36,19 @@ class Sequence static_assert(std::is_base_of::value, "Template only valid with OpBase derived classes"); - SPDLOG_DEBUG("Kompute Sequence record"); + SPDLOG_DEBUG("Kompute Sequence record function started"); + SPDLOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); T* op = new T(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); OpBase* baseOp = dynamic_cast(op); std::unique_ptr baseOpPtr{ baseOp }; + SPDLOG_DEBUG("Kompute Sequence running init on OpBase derived class instance"); baseOpPtr->init(std::forward(args)...); + + SPDLOG_DEBUG("Kompute Sequence running record on OpBase derived class instance"); baseOpPtr->record(); mOperations.push_back(std::move(baseOpPtr)); diff --git a/src/Tensor.cpp b/src/Tensor.cpp index b4cc1434f..d24826314 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -1,4 +1,8 @@ +#if DEBUG +#include +#endif + #include "Tensor.hpp" namespace kp { @@ -11,7 +15,7 @@ Tensor::Tensor() Tensor::Tensor(std::vector data, TensorTypes tensorType) { - SPDLOG_DEBUG("Kompute Tensor constructor data and type"); + SPDLOG_DEBUG("Kompute Tensor constructor data: {}, and type: {}", data, tensorType); this->mData = data; this->mShape = { data.size() }; @@ -54,11 +58,10 @@ Tensor::~Tensor() void Tensor::init(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector data) + std::shared_ptr commandBuffer) { SPDLOG_DEBUG( - "Kompute Tensor running init with Vulkan params and data size: {}", data.size()); + "Kompute Tensor running init with Vulkan params and num data elementS: {}", this->mData.size()); this->mPhysicalDevice = physicalDevice; this->mDevice = device; @@ -66,7 +69,7 @@ Tensor::init(std::shared_ptr physicalDevice, this->mIsInit = true; - this->createBuffer(data.data()); + this->createBuffer(); } std::vector @@ -131,9 +134,6 @@ Tensor::recordCopyFrom(std::shared_ptr copyFromTensor) // TODO: Ensure command buffer is in same device from buffer this->mCommandBuffer->copyBuffer( *copyFromTensor->mBuffer, *this->mBuffer, copyRegion); - - // TODO: Ensure copied data is consistent with device - this->mData = copyFromTensor->mData; } void @@ -173,13 +173,14 @@ Tensor::constructDescriptorBufferInfo() } void -Tensor::copyDataFromHostBuffer() +Tensor::mapDataFromHostMemory() { - SPDLOG_DEBUG("Kompute Tensor copying data from host buffer"); + SPDLOG_DEBUG("Kompute Tensor mapping data from host buffer"); if (this->mTensorType != TensorTypes::eStaging) { - spdlog::warn("Copying tensor data manually to DEVICE buffer instead of " - "using record GPU command"); + spdlog::error("Mapping tensor data manually from DEVICE buffer instead of " + "using record GPU command with staging buffer"); + return; } vk::DeviceSize bufferSize = this->memorySize(); @@ -192,14 +193,16 @@ Tensor::copyDataFromHostBuffer() } void -Tensor::copyDataToHostBuffer() +Tensor::mapDataIntoHostMemory() { - SPDLOG_DEBUG("Kompute Tensor copying data to buffer"); + SPDLOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer"); + // TODO: Verify if there are situations where we want to copy to device memory if (this->mTensorType != TensorTypes::eStaging) { - spdlog::warn("Copying tensor data manually to DEVICE buffer instead of " - "using record GPU command"); + spdlog::error("Mapping tensor data manually to DEVICE memory instead of " + "using record GPU command with staging buffer"); + return; } vk::DeviceSize bufferSize = this->memorySize(); @@ -253,7 +256,7 @@ Tensor::getMemoryPropertyFlags() } void -Tensor::createBuffer(void* data) +Tensor::createBuffer() { SPDLOG_DEBUG("Kompute Tensor creating buffer"); @@ -331,10 +334,6 @@ Tensor::createBuffer(void* data) this->mDevice->bindBufferMemory(*this->mBuffer, *this->mMemory, 0); SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful"); - - if (data != nullptr) { - this->copyDataToHostBuffer(); - } } } diff --git a/src/Tensor.hpp b/src/Tensor.hpp index 8c5c2e95b..cda6f6cab 100644 --- a/src/Tensor.hpp +++ b/src/Tensor.hpp @@ -33,11 +33,10 @@ class Tensor void init(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector data = std::vector()); + std::shared_ptr commandBuffer); // Create functions - void createBuffer(void* data = nullptr); + void createBuffer(); // Getter functions std::vector data(); @@ -60,8 +59,8 @@ class Tensor // Util functions vk::DescriptorBufferInfo constructDescriptorBufferInfo(); - void copyDataFromHostBuffer(); - void copyDataToHostBuffer(); + void mapDataFromHostMemory(); + void mapDataIntoHostMemory(); private: std::shared_ptr mPhysicalDevice;