Updated tensor to copy to memory explicitly, assessing why copy doesn't last through operations

2020-08-22 18:08:56 +01:00 · 2020-08-22 18:08:56 +01:00 · e047aa3b43
commit e047aa3b43
parent 6f0203b863
10 changed files with 65 additions and 79 deletions
--- a/shaders/glsl/opmult.comp
+++ b/shaders/glsl/opmult.comp
@ -1,21 +1,17 @@
 #version 450

-layout(binding = 0) buffer tensorLhs {
+layout(set = 0, binding = 0) buffer tensorLhs {
   uint valuesLhs[ ];
 };

-layout(binding = 1) buffer tensorRhs {
+layout(set = 0, binding = 1) buffer tensorRhs {
   uint valuesRhs[ ];
 };

-layout(binding = 2) buffer tensorOutput {
+layout(set = 0, binding = 2) buffer tensorOutput {
   uint valuesOutput[ ];
 };

-layout(binding = 3) buffer tensorInvalid {
-   uint valuesInvalid[ ];
-};
-
 // TODO: Explore how to make layout inside shader dynamic
 layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

@ -25,10 +21,7 @@ void main()

    //valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
    // FOR TESTING
-    valuesOutput[index] = 100 + index;
-    valuesRhs[index] = 100 + index;
-    valuesLhs[index] = 100 + index;
-    valuesInvalid[index] = 100 + index;
+    valuesOutput[index] = valuesLhs[index] + valuesRhs[index];
 }


--- a/shaders/glsl/opmult.comp.spv
+++ b/shaders/glsl/opmult.comp.spv
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@ -41,6 +41,10 @@ Algorithm::init(std::string shaderFilePath,
    this->createPipeline();
 }

+void Algorithm::createDescriptorPool() {
+
+}
+
 void
 Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
 {
--- a/src/Algorithm.hpp
+++ b/src/Algorithm.hpp
@ -55,9 +55,12 @@ class Algorithm
    bool mFreePipeline = false;

    // Create util functions
-    void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
    void createShaderModule(std::string shaderFilePath);
    void createPipeline();
+    // Parameters
+    void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
+    void createDescriptorPool();
+
 };

 } // End namespace kp
--- a/src/OpCreateTensor.cpp
+++ b/src/OpCreateTensor.cpp
@ -37,7 +37,6 @@ OpCreateTensor::init(std::vector<std::shared_ptr<Tensor>> tensors)
    }

    this->mPrimaryTensor = tensors[0];
-    std::vector<uint32_t> data = this->mPrimaryTensor->data();

    if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) {
        this->mPrimaryTensor->init(
@ -47,11 +46,13 @@ OpCreateTensor::init(std::vector<std::shared_ptr<Tensor>> tensors)
          this->mPrimaryTensor->data(), Tensor::TensorTypes::eStaging);

        this->mStagingTensor->init(
-          this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, data);
+          this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
+
+        this->mStagingTensor->mapDataIntoHostMemory();

    } else {
        this->mPrimaryTensor->init(
-          this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, data);
+          this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
    }
 }

@ -69,6 +70,10 @@ void
 OpCreateTensor::postSubmit()
 {
    SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
+
+    this->mStagingTensor->mapDataFromHostMemory();
+
+    this->mPrimaryTensor->setData(this->mStagingTensor->data());
 }

 }
--- a/src/OpMult.cpp
+++ b/src/OpMult.cpp
@ -96,8 +96,7 @@ OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)

    this->mTensorOutputStaging->init(this->mPhysicalDevice,
                                     this->mDevice,
-                                     this->mCommandBuffer,
-                                     this->mTensorOutput->data());
+                                     this->mCommandBuffer);

    // TODO: Make this path configurable
    this->mAlgorithm->init("shaders/glsl/opmult.comp.spv", tensors);
@ -110,30 +109,20 @@ OpMult<tX, tY, tZ>::record()
    SPDLOG_DEBUG("Kompute OpMult record called");

    // Barrier to ensure the data is finished writing to buffer memory
-    //this->mTensorLHS->recordBufferMemoryBarrier(
-    //    vk::AccessFlagBits::eHostWrite,
-    //    vk::AccessFlagBits::eShaderRead,
-    //    vk::PipelineStageFlagBits::eHost,
-    //    vk::PipelineStageFlagBits::eComputeShader);
-    //this->mTensorRHS->recordBufferMemoryBarrier(
-    //    vk::AccessFlagBits::eHostWrite,
-    //    vk::AccessFlagBits::eShaderRead,
-    //    vk::PipelineStageFlagBits::eHost,
-    //    vk::PipelineStageFlagBits::eComputeShader);
+    this->mTensorLHS->recordBufferMemoryBarrier(
+        vk::AccessFlagBits::eHostWrite,
+        vk::AccessFlagBits::eShaderRead,
+        vk::PipelineStageFlagBits::eHost,
+        vk::PipelineStageFlagBits::eComputeShader);
+    this->mTensorRHS->recordBufferMemoryBarrier(
+        vk::AccessFlagBits::eHostWrite,
+        vk::AccessFlagBits::eShaderRead,
+        vk::PipelineStageFlagBits::eHost,
+        vk::PipelineStageFlagBits::eComputeShader);

    this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);

    // Barrier to ensure the shader code is executed before buffer read
-    //this->mTensorLHS->recordBufferMemoryBarrier(
-    //    vk::AccessFlagBits::eShaderWrite,
-    //    vk::AccessFlagBits::eTransferRead,
-    //    vk::PipelineStageFlagBits::eComputeShader,
-    //    vk::PipelineStageFlagBits::eTransfer);
-    //this->mTensorRHS->recordBufferMemoryBarrier(
-    //    vk::AccessFlagBits::eShaderWrite,
-    //    vk::AccessFlagBits::eTransferRead,
-    //    vk::PipelineStageFlagBits::eComputeShader,
-    //    vk::PipelineStageFlagBits::eTransfer);
    this->mTensorOutput->recordBufferMemoryBarrier(
        vk::AccessFlagBits::eShaderWrite,
        vk::AccessFlagBits::eTransferRead,
@ -148,16 +137,6 @@ OpMult<tX, tY, tZ>::record()
        vk::AccessFlagBits::eHostRead,
        vk::PipelineStageFlagBits::eTransfer,
        vk::PipelineStageFlagBits::eHost);
-    //this->mTensorLHS->recordBufferMemoryBarrier(
-    //    vk::AccessFlagBits::eTransferWrite,
-    //    vk::AccessFlagBits::eHostRead,
-    //    vk::PipelineStageFlagBits::eTransfer,
-    //    vk::PipelineStageFlagBits::eHost);
-    //this->mTensorRHS->recordBufferMemoryBarrier(
-    //    vk::AccessFlagBits::eTransferWrite,
-    //    vk::AccessFlagBits::eHostRead,
-    //    vk::PipelineStageFlagBits::eTransfer,
-    //    vk::PipelineStageFlagBits::eHost);
 }

 template<uint32_t tX, uint32_t tY, uint32_t tZ>
@ -166,7 +145,7 @@ OpMult<tX, tY, tZ>::postSubmit()
 {
    SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");

-    this->mTensorOutputStaging->copyDataFromHostBuffer();
+    this->mTensorOutputStaging->mapDataFromHostMemory();

    this->mTensorOutput->setData(this->mTensorOutputStaging->data());
 }
--- a/src/Sequence.cpp
+++ b/src/Sequence.cpp
@ -43,7 +43,7 @@ Sequence::~Sequence()
        }
        this->mDevice->freeCommandBuffers(
          *this->mCommandPool, 1, this->mCommandBuffer.get());
-        SPDLOG_DEBUG("Kompute Manager Freed CommandBuffer");
+        SPDLOG_DEBUG("Kompute Sequence Freed CommandBuffer");
    }

    if (this->mFreeCommandPool) {
@ -54,7 +54,7 @@ Sequence::~Sequence()
            return;
        }
        this->mDevice->destroy(*this->mCommandPool);
-        SPDLOG_DEBUG("Kompute Manager Destroyed CommandPool");
+        SPDLOG_DEBUG("Kompute Sequence Destroyed CommandPool");
    }
 }

@ -139,7 +139,7 @@ Sequence::createCommandPool()
    this->mCommandPool = std::make_shared<vk::CommandPool>();
    this->mDevice->createCommandPool(
      &commandPoolInfo, nullptr, this->mCommandPool.get());
-    SPDLOG_DEBUG("Kompute Manager Command Pool Created");
+    SPDLOG_DEBUG("Kompute Sequence Command Pool Created");
 }

 void
@ -161,7 +161,7 @@ Sequence::createCommandBuffer()
    this->mCommandBuffer = std::make_shared<vk::CommandBuffer>();
    this->mDevice->allocateCommandBuffers(&commandBufferAllocateInfo,
                                          this->mCommandBuffer.get());
-    SPDLOG_DEBUG("Kompute Manager Command Buffer Created");
+    SPDLOG_DEBUG("Kompute Sequence Command Buffer Created");
 }

 }
--- a/src/Sequence.hpp
+++ b/src/Sequence.hpp
@ -36,15 +36,19 @@ class Sequence
        static_assert(std::is_base_of<OpBase, T>::value,
                      "Template only valid with OpBase derived classes");

-        SPDLOG_DEBUG("Kompute Sequence record");
+        SPDLOG_DEBUG("Kompute Sequence record function started");

+        SPDLOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
        T* op =
          new T(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
        OpBase* baseOp = dynamic_cast<OpBase*>(op);

        std::unique_ptr<OpBase> baseOpPtr{ baseOp };

+        SPDLOG_DEBUG("Kompute Sequence running init on OpBase derived class instance");
        baseOpPtr->init(std::forward<TArgs>(args)...);
+
+        SPDLOG_DEBUG("Kompute Sequence running record on OpBase derived class instance");
        baseOpPtr->record();

        mOperations.push_back(std::move(baseOpPtr));
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@ -1,4 +1,8 @@

+#if DEBUG
+#include <spdlog/fmt/bundled/ranges.h>
+#endif
+
 #include "Tensor.hpp"

 namespace kp {
@ -11,7 +15,7 @@ Tensor::Tensor()

 Tensor::Tensor(std::vector<uint32_t> data, TensorTypes tensorType)
 {
-    SPDLOG_DEBUG("Kompute Tensor constructor data and type");
+    SPDLOG_DEBUG("Kompute Tensor constructor data: {}, and type: {}", data, tensorType);

    this->mData = data;
    this->mShape = { data.size() };
@ -54,11 +58,10 @@ Tensor::~Tensor()
 void
 Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
             std::shared_ptr<vk::Device> device,
-             std::shared_ptr<vk::CommandBuffer> commandBuffer,
-             std::vector<uint32_t> data)
+             std::shared_ptr<vk::CommandBuffer> commandBuffer)
 {
    SPDLOG_DEBUG(
-      "Kompute Tensor running init with Vulkan params and data size: {}", data.size());
+      "Kompute Tensor running init with Vulkan params and num data elementS: {}", this->mData.size());

    this->mPhysicalDevice = physicalDevice;
    this->mDevice = device;
@ -66,7 +69,7 @@ Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,

    this->mIsInit = true;

-    this->createBuffer(data.data());
+    this->createBuffer();
 }

 std::vector<uint32_t>
@ -131,9 +134,6 @@ Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
    // TODO: Ensure command buffer is in same device from buffer
    this->mCommandBuffer->copyBuffer(
      *copyFromTensor->mBuffer, *this->mBuffer, copyRegion);
-
-    // TODO: Ensure copied data is consistent with device
-    this->mData = copyFromTensor->mData;
 }

 void
@ -173,13 +173,14 @@ Tensor::constructDescriptorBufferInfo()
 }

 void
-Tensor::copyDataFromHostBuffer()
+Tensor::mapDataFromHostMemory()
 {
-    SPDLOG_DEBUG("Kompute Tensor copying data from host buffer");
+    SPDLOG_DEBUG("Kompute Tensor mapping data from host buffer");

    if (this->mTensorType != TensorTypes::eStaging) {
-        spdlog::warn("Copying tensor data manually to DEVICE buffer instead of "
-                     "using record GPU command");
+        spdlog::error("Mapping tensor data manually from DEVICE buffer instead of "
+                     "using record GPU command with staging buffer");
+        return;
    }

    vk::DeviceSize bufferSize = this->memorySize();
@ -192,14 +193,16 @@ Tensor::copyDataFromHostBuffer()
 }

 void
-Tensor::copyDataToHostBuffer()
+Tensor::mapDataIntoHostMemory()
 {

-    SPDLOG_DEBUG("Kompute Tensor copying data to buffer");
+    SPDLOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");

+    // TODO: Verify if there are situations where we want to copy to device memory
    if (this->mTensorType != TensorTypes::eStaging) {
-        spdlog::warn("Copying tensor data manually to DEVICE buffer instead of "
-                     "using record GPU command");
+        spdlog::error("Mapping tensor data manually to DEVICE memory instead of "
+                     "using record GPU command with staging buffer");
+        return;
    }

    vk::DeviceSize bufferSize = this->memorySize();
@ -253,7 +256,7 @@ Tensor::getMemoryPropertyFlags()
 }

 void
-Tensor::createBuffer(void* data)
+Tensor::createBuffer()
 {
    SPDLOG_DEBUG("Kompute Tensor creating buffer");

@ -331,10 +334,6 @@ Tensor::createBuffer(void* data)
    this->mDevice->bindBufferMemory(*this->mBuffer, *this->mMemory, 0);

    SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");
-
-    if (data != nullptr) {
-        this->copyDataToHostBuffer();
-    }
 }

 }
--- a/src/Tensor.hpp
+++ b/src/Tensor.hpp
@ -33,11 +33,10 @@ class Tensor

    void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
              std::shared_ptr<vk::Device> device,
-              std::shared_ptr<vk::CommandBuffer> commandBuffer,
-              std::vector<uint32_t> data = std::vector<uint32_t>());
+              std::shared_ptr<vk::CommandBuffer> commandBuffer);

    // Create functions
-    void createBuffer(void* data = nullptr);
+    void createBuffer();

    // Getter functions
    std::vector<uint32_t> data();
@ -60,8 +59,8 @@ class Tensor

    // Util functions
    vk::DescriptorBufferInfo constructDescriptorBufferInfo();
-    void copyDataFromHostBuffer();
-    void copyDataToHostBuffer();
+    void mapDataFromHostMemory();
+    void mapDataIntoHostMemory();

  private:
    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;