Added option for creating barrier on copyfrom tensor

2020-08-29 15:49:43 +01:00 · 2020-08-29 15:49:43 +01:00 · 2298159586
commit 2298159586
parent a2efc441db
4 changed files with 20 additions and 38 deletions
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@ -246,8 +246,12 @@ class Tensor
     * Records a copy from the memory of the tensor provided to the current
     * thensor. This is intended to pass memory into a processing, to perform
     * a staging buffer transfer, or to gather output (between others).
+     *
+     * @param copyFromTensor Tensor to copy the data from
+     * @param createBarrier Whether to create a barrier that ensures the data is copied before further operations. Default is true.
     */
-    void recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor);
+    void recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor,
+            bool createBarrier = true);

    /**
     * Records the buffer memory barrier into the command buffer which
@ -1077,7 +1081,6 @@ OpMult<tX, tY, tZ>::init()
 #endif

    SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
-    SPDLOG_DEBUG("Kompute vector size {}", shaderFileData.size());

    this->mAlgorithm->init(shaderFileData, this->mTensors);
 }
@ -1103,11 +1106,6 @@ OpMult<tX, tY, tZ>::record()
    this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);

    // Barrier to ensure the shader code is executed before buffer read
-    this->mTensorLHS->recordBufferMemoryBarrier(
-      vk::AccessFlagBits::eShaderWrite,
-      vk::AccessFlagBits::eTransferRead,
-      vk::PipelineStageFlagBits::eComputeShader,
-      vk::PipelineStageFlagBits::eTransfer);
    this->mTensorOutput->recordBufferMemoryBarrier(
      vk::AccessFlagBits::eShaderWrite,
      vk::AccessFlagBits::eTransferRead,
@ -1115,18 +1113,6 @@ OpMult<tX, tY, tZ>::record()
      vk::PipelineStageFlagBits::eTransfer);

    this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput);
-
-    // Buffer to ensure wait until data is copied to staging buffer
-    this->mTensorLHS->recordBufferMemoryBarrier(
-      vk::AccessFlagBits::eTransferWrite,
-      vk::AccessFlagBits::eHostRead,
-      vk::PipelineStageFlagBits::eTransfer,
-      vk::PipelineStageFlagBits::eHost);
-    this->mTensorOutput->recordBufferMemoryBarrier(
-      vk::AccessFlagBits::eTransferWrite,
-      vk::AccessFlagBits::eHostRead,
-      vk::PipelineStageFlagBits::eTransfer,
-      vk::PipelineStageFlagBits::eHost);
 }

 template<uint32_t tX, uint32_t tY, uint32_t tZ>
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@ -95,7 +95,7 @@ Tensor::setData(const std::vector<uint32_t>& data)
 }

 void
-Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
+Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor, bool createBarrier)
 {
    SPDLOG_DEBUG("Kompute Tensor recordCopyFrom called");

@ -114,6 +114,15 @@ Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
    // TODO: Ensure command buffer is in same device from buffer
    this->mCommandBuffer->copyBuffer(
      *copyFromTensor->mBuffer, *this->mBuffer, copyRegion);
+
+    if (createBarrier) {
+        // Buffer to ensure wait until data is copied to staging buffer
+        this->recordBufferMemoryBarrier(
+          vk::AccessFlagBits::eTransferWrite,
+          vk::AccessFlagBits::eHostRead,
+          vk::PipelineStageFlagBits::eTransfer,
+          vk::PipelineStageFlagBits::eHost);
+        }
 }

 void
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@ -96,8 +96,12 @@ class Tensor
     * Records a copy from the memory of the tensor provided to the current
     * thensor. This is intended to pass memory into a processing, to perform
     * a staging buffer transfer, or to gather output (between others).
+     *
+     * @param copyFromTensor Tensor to copy the data from
+     * @param createBarrier Whether to create a barrier that ensures the data is copied before further operations. Default is true.
     */
-    void recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor);
+    void recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor,
+            bool createBarrier = true);

    /**
     * Records the buffer memory barrier into the command buffer which
--- a/src/include/kompute/operations/OpMult.hpp
+++ b/src/include/kompute/operations/OpMult.hpp
@ -198,11 +198,6 @@ OpMult<tX, tY, tZ>::record()
    this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);

    // Barrier to ensure the shader code is executed before buffer read
-    this->mTensorLHS->recordBufferMemoryBarrier(
-      vk::AccessFlagBits::eShaderWrite,
-      vk::AccessFlagBits::eTransferRead,
-      vk::PipelineStageFlagBits::eComputeShader,
-      vk::PipelineStageFlagBits::eTransfer);
    this->mTensorOutput->recordBufferMemoryBarrier(
      vk::AccessFlagBits::eShaderWrite,
      vk::AccessFlagBits::eTransferRead,
@ -210,18 +205,6 @@ OpMult<tX, tY, tZ>::record()
      vk::PipelineStageFlagBits::eTransfer);

    this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput);
-
-    // Buffer to ensure wait until data is copied to staging buffer
-    this->mTensorLHS->recordBufferMemoryBarrier(
-      vk::AccessFlagBits::eTransferWrite,
-      vk::AccessFlagBits::eHostRead,
-      vk::PipelineStageFlagBits::eTransfer,
-      vk::PipelineStageFlagBits::eHost);
-    this->mTensorOutput->recordBufferMemoryBarrier(
-      vk::AccessFlagBits::eTransferWrite,
-      vk::AccessFlagBits::eHostRead,
-      vk::PipelineStageFlagBits::eTransfer,
-      vk::PipelineStageFlagBits::eHost);
 }

 template<uint32_t tX, uint32_t tY, uint32_t tZ>