Added initial implementation for algorithm and opMult

2020-08-21 19:15:07 +01:00 · 2020-08-21 19:15:07 +01:00 · d59dc41ffc
commit d59dc41ffc
parent 0d18dc50e6
11 changed files with 385 additions and 28 deletions
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@ -0,0 +1,178 @@
+#include <fstream>
+
+#include "Algorithm.hpp"
+
+namespace kp {
+
+Algorithm::Algorithm()
+{
+    SPDLOG_DEBUG("Kompute Algorithm base constructor");
+}
+
+Algorithm::Algorithm(std::shared_ptr<vk::Device> device, std::shared_ptr<vk::CommandBuffer> commandBuffer)
+{
+    SPDLOG_DEBUG("Kompute Algorithm Constructor with device");
+
+    this->mDevice = device;
+    this->mCommandBuffer = commandBuffer;
+}
+
+Algorithm::~Algorithm()
+{
+    SPDLOG_DEBUG("Kompute Algorithm Destructor started");
+
+    if (!this->mDevice) {
+        spdlog::error(
+          "Kompute Algorithm destructor reached with null Device pointer");
+        return;
+    }
+}
+
+void Algorithm::init(std::string shaderFilePath,
+                   std::vector<std::shared_ptr<Tensor>> tensorParams) {
+    SPDLOG_DEBUG("Kompute Algorithm init started");
+
+    spdlog::info("Loading shader with file path {}", shaderFilePath);
+
+    // TODO: Move to util function
+    this->createParameters(tensorParams);
+    this->createShaderModule(shaderFilePath);
+    this->createPipeline();
+}
+
+void Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams) {
+    std::vector<vk::DescriptorPoolSize> descriptorPoolSizes;
+
+    for (std::shared_ptr<Tensor> tensorParam : tensorParams) {
+        descriptorPoolSizes.push_back(
+          vk::DescriptorPoolSize(
+            vk::DescriptorType::eStorageBuffer, 
+            1 // Descriptor count
+          )
+        );
+    }
+
+    // TODO: Explore design for having more than 1 set configurable
+    vk::DescriptorPoolCreateInfo descriptorPoolInfo(
+        vk::DescriptorPoolCreateFlags(), 
+        1, // Max sets
+        descriptorPoolSizes.size(),
+        descriptorPoolSizes.data());
+
+    this->mDescriptorPool = std::make_shared<vk::DescriptorPool>();
+    this->mDevice->createDescriptorPool(&descriptorPoolInfo, nullptr, this->mDescriptorPool.get());
+
+    // TODO: Explore allowing descriptor set bind index
+    std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
+    for (size_t i = 0; i < tensorParams.size(); i++) {
+        descriptorSetBindings.push_back(
+            vk::DescriptorSetLayoutBinding(
+                i, // Binding index
+                vk::DescriptorType::eStorageBuffer,
+                1, // Descriptor count
+                vk::ShaderStageFlagBits::eCompute)
+        );
+    }
+
+    // This is the component that is fed into the pipeline
+    vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo(
+        vk::DescriptorSetLayoutCreateFlags(),
+        descriptorSetBindings.size(),
+        descriptorSetBindings.data()
+    );
+
+    // TODO: We createa  signle descriptor set layout which would have to be extended if multiple set layouts to be supported
+    this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
+    this->mDevice->createDescriptorSetLayout(&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
+
+    vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
+        *this->mDescriptorPool, 
+        1, // Descriptor set layout count
+        this->mDescriptorSetLayout.get());
+
+    std::vector<vk::DescriptorSet> descriptorSets =
+        this->mDevice->allocateDescriptorSets(descriptorSetAllocateInfo);
+
+    if (descriptorSets.size() != tensorParams.size()) {
+        throw std::runtime_error("Number of descriptor sets does not match number of paramters");
+    }
+
+    std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
+    for (size_t i = 0; i < descriptorSets.size(); i++) {
+
+        std::shared_ptr<Tensor> currTensor = tensorParams[i];
+        vk::DescriptorSet& currDescriptorSet = descriptorSets[i];
+        this->mDescriptorSets.push_back(std::make_shared<vk::DescriptorSet>(currDescriptorSet));
+
+        vk::DescriptorBufferInfo descriptorBufferInfo = currTensor->constructDescriptorBufferInfo();
+
+        computeWriteDescriptorSets.push_back(
+            vk::WriteDescriptorSet());
+    }
+
+    this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr);
+}
+
+void Algorithm::createShaderModule(std::string shaderFilePath) {
+    std::ifstream fileStream(
+      shaderFilePath, std::ios::binary | std::ios::in | std::ios::ate);
+
+    size_t shaderFileSize = fileStream.tellg();
+    fileStream.seekg(0, std::ios::beg);
+    char* shaderFileData = new char[shaderFileSize];
+    fileStream.read(shaderFileData, shaderFileSize);
+    fileStream.close();
+
+    vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(), shaderFileSize, (uint32_t*)shaderFileData);
+
+    this->mFreeShaderModule = true;
+    this->mShaderModule = std::shared_ptr<vk::ShaderModule>();
+    this->mDevice->createShaderModule(&shaderModuleInfo, nullptr, this->mShaderModule.get());
+}
+
+void Algorithm::createPipeline() {
+    SPDLOG_DEBUG("Kompute Algorithm calling create Pipeline");
+
+    vk::PipelineLayoutCreateInfo pipelineLayoutInfo(
+        vk::PipelineLayoutCreateFlags(),
+        1, // Set layout count
+        this->mDescriptorSetLayout.get());
+
+    this->mPipelineLayout = std::make_shared<vk::PipelineLayout>();
+    this->mDevice->createPipelineLayout(&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get());
+
+    vk::PipelineShaderStageCreateInfo shaderStage(vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, *this->mShaderModule, "main", nullptr);
+
+    vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), shaderStage, *this->mPipelineLayout, vk::Pipeline(), 0);
+
+    // TODO: Confirm what the best structure is with pipeline cache
+    this->mFreePipelineCache = true;
+    this->mPipelineCache = std::make_shared<vk::PipelineCache>(vk::PipelineCacheCreateInfo());
+
+    vk::ResultValue<vk::Pipeline> pipelineResult = this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo);
+
+    if (pipelineResult.result != vk::Result::eSuccess) {
+        throw std::runtime_error("Failed to create pipeline result: " + vk::to_string(pipelineResult.result));
+    }
+
+    this->mFreePipeline = true;
+    this->mPipeline = std::make_shared<vk::Pipeline>(pipelineResult.value);
+}
+
+void Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z) {
+    SPDLOG_DEBUG("Kompute Algorithm calling record dispatch");
+
+    this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, *this->mPipeline);
+
+    // TODO: Simplify interaction given we store array of pointers
+    std::vector<vk::DescriptorSet&> descriptorSetRefs(this->mDescriptorSets.size());
+    for (size_t i = 0; i < this->mDescriptorSets.size(); i++) {
+        descriptorSetRefs[i] = this->mDescriptorSets[i];
+    }
+
+    this->mCommandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute, *this->mPipelineLayout, 0, descriptorSetRefs, nullptr);
+
+    this->mCommandBuffer->dispatch(x, y, z);
+}
+
+}
--- a/src/Algorithm.hpp
+++ b/src/Algorithm.hpp
@ -19,13 +19,43 @@ class Algorithm
  public:
    Algorithm();

-    Algorithm(std::shared_ptr<vk::Device> device);
+    Algorithm(std::shared_ptr<vk::Device> device, std::shared_ptr<vk::CommandBuffer> commandBuffer);

    // TODO: Add specialisation data
+    // TODO: Explore other ways of passing shader (ie raw bytes)
    void init(std::string shaderFilePath,
              std::vector<std::shared_ptr<Tensor>> tensorParams);

    ~Algorithm();
+
+    // Record commands
+    void recordDispatch(uint32_t x, uint32_t y, uint32_t z);
+
+private:
+    // Shared resources
+    std::shared_ptr<vk::Device> mDevice;
+    std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
+
+    // Resources owned by default
+    std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
+    bool mFreeDescriptorSetLayout = false;
+    std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
+    bool mFreeDescriptorPool = false;
+    std::vector<std::shared_ptr<vk::DescriptorSet>> mDescriptorSets;
+    bool mFreeDescriptorSet = false;
+    std::shared_ptr<vk::ShaderModule> mShaderModule;
+    bool mFreeShaderModule = false;
+    std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
+    bool mFreePipelineLayout = false;
+    std::shared_ptr<vk::PipelineCache> mPipelineCache;
+    bool mFreePipelineCache = false;
+    std::shared_ptr<vk::Pipeline> mPipeline;
+    bool mFreePipeline = false;
+
+    // Create util functions
+    void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
+    void createShaderModule(std::string shaderFilePath);
+    void createPipeline();
 };

 } // End namespace kp
--- a/src/OpBase.hpp
+++ b/src/OpBase.hpp
@ -42,6 +42,12 @@ class OpBase

    virtual void record() { SPDLOG_DEBUG("Kompute OpBase record called"); }

+    virtual void postSubmit()
+    {
+        SPDLOG_DEBUG("Kompute OpBase init called");
+    }
+
+
  protected:
    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
    std::shared_ptr<vk::Device> mDevice;
--- a/src/OpCreateTensor.cpp
+++ b/src/OpCreateTensor.cpp
@ -65,4 +65,10 @@ OpCreateTensor::record()
    }
 }

+void OpCreateTensor::postSubmit()
+{
+    SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
+
+}
+
 }
--- a/src/OpCreateTensor.hpp
+++ b/src/OpCreateTensor.hpp
@ -31,6 +31,8 @@ class OpCreateTensor : public OpBase

    void record() override;

+    void postSubmit() override;
+
  private:
    std::shared_ptr<Tensor> mPrimaryTensor;
    std::shared_ptr<Tensor> mStagingTensor;
--- a/src/OpMult.cpp
+++ b/src/OpMult.cpp
@ -11,12 +11,15 @@ OpMult::OpMult()
    SPDLOG_DEBUG("Kompute OpMult constructor base");
 }

+// TODO: Remove physicalDevice from main initialiser
 OpMult::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
               std::shared_ptr<vk::Device> device,
               std::shared_ptr<vk::CommandBuffer> commandBuffer)
  : OpBase(physicalDevice, device, commandBuffer)
 {
    SPDLOG_DEBUG("Kompute OpMult constructor with params");
+
+    this->mAlgorithm = Algorithm(device, commandBuffer);
 }

 OpMult::~OpMult()
@ -29,18 +32,40 @@ OpMult::init(std::vector<std::shared_ptr<Tensor>> tensors)
 {
    SPDLOG_DEBUG("Kompute OpMult init called");

-    if (tensors.size() < 2) {
+    if (tensors.size() < 3) {
        throw std::runtime_error(
          "Kompute OpMult called with less than 1 tensor");
-    } else if (tensors.size() > 2) {
+    } else if (tensors.size() > 3) {
        spdlog::warn("Kompute OpMult called with more than 2 tensor");
    }
+
+    this->mTensorLHS = tensors[0];
+    this->mTensorRHS = tensors[1];
+    this->mTensorOutput = tensors[2];
+
+    this->mTensorOutputStaging= std::make_shared<Tensor>(
+      this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
+
+    this->mAlgorithm.init(
+        "shaders/glsl/computeheadless.comp.spv", tensors);
 }

 void
 OpMult::record()
 {
    SPDLOG_DEBUG("Kompute OpMult record called");
+
+    this->mAlgorithm.recordDispatch(1, 1, 1);
+
+    this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput);
+}
+
+void OpMult::postSubmit()
+{
+    SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
+
+    this->mTensorOutputStaging->copyDataFromHostBuffer();
+    this->mTensorOutput->setData(this->mTensorOutputStaging->data());
 }

 }
--- a/src/OpMult.hpp
+++ b/src/OpMult.hpp
@ -11,6 +11,7 @@
 #include <spdlog/spdlog.h>

 #include "Tensor.hpp"
+#include "Algorithm.hpp"

 #include "OpBase.hpp"

@ -31,9 +32,14 @@ class OpMult : public OpBase

    void record() override;

+    void postSubmit() override;
+
  private:
-    std::shared_ptr<Tensor> mPrimaryTensor;
-    std::shared_ptr<Tensor> mStagingTensor;
+    Algorithm mAlgorithm;
+    std::shared_ptr<Tensor> mTensorLHS;
+    std::shared_ptr<Tensor> mTensorRHS;
+    std::shared_ptr<Tensor> mTensorOutput;
+    std::shared_ptr<Tensor> mTensorOutputStaging;
 };

 } // End namespace kp
--- a/src/Parameter.hpp
+++ b/src/Parameter.hpp
@ -1,13 +1,57 @@
 #pragma once

+#include <vulkan/vulkan.h>
+#include <vulkan/vulkan.hpp>
+
+// SPDLOG_ACTIVE_LEVEL must be defined before spdlog.h import
+#if DEBUG
+#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_DEBUG
+#endif
+
+#include <spdlog/spdlog.h>
+
+#include "Tensor.hpp"
+
 namespace kp {

-class Parameter
+class Algorithm
 {
-  private:
  public:
-    Parameter();
-    virtual ~Parameter();
+    Algorithm();
+
+    Algorithm(std::shared_ptr<vk::Device> device);
+
+    // TODO: Add specialisation data
+    // TODO: Explore other ways of passing shader (ie raw bytes)
+    void init(std::string shaderFilePath,
+              std::vector<std::shared_ptr<Tensor>> tensorParams);
+
+    ~Algorithm();
+
+private:
+    // Shared resources
+    std::shared_ptr<vk::Device> mDevice;
+
+    // Resources owned by default
+    std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
+    bool mFreeDescriptorSetLayout = false;
+    std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
+    bool mFreeDescriptorPool = false;
+    std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
+    bool mFreeDescriptorSet = false;
+    std::shared_ptr<vk::ShaderModule> mShaderModule;
+    bool mFreeShaderModule = false;
+    std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
+    bool mFreePipelineLayout = false;
+    std::shared_ptr<vk::PipelineCache> mPipelineCache;
+    bool mFreePipelineCache = false;
+    std::shared_ptr<vk::Pipeline> mPipeline;
+    bool mFreePipeline = false;
+
+    // Create util functions
+    void createParameters();
+    void createShaderModule(std::string shaderFilePath);
+    void createPipeline();
 };

 } // End namespace kp
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@ -105,12 +105,16 @@ Tensor::isInit()
    return this->mIsInit;
 }

+void Tensor::setData(const std::vector<uint32_t>& data) {
+    this->mData = data;
+}
+
 void
 Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
 {
    SPDLOG_DEBUG("Kompute Tensor recordCopyFrom called");

-    if (!this->mIsInit) {
+    if (!this->mIsInit || !copyFromTensor->mIsInit) {
        throw std::runtime_error(
          "Kompute Tensor attempted to run createBuffer without init");
    }
@ -126,9 +130,53 @@ Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
    this->mCommandBuffer->copyBuffer(
      *copyFromTensor->mBuffer, *this->mBuffer, copyRegion);

+    // TODO: Ensure copied data is consistent with device
    this->mData = copyFromTensor->mData;
 }

+// TODO: Explore if this function should be here or expose buffer
+vk::DescriptorBufferInfo Tensor::constructDescriptorBufferInfo() {
+    return vk::DescriptorBufferInfo(
+        *this->mBuffer,
+        0, // offset
+        this->memorySize()
+    );
+}
+
+void Tensor::copyDataFromHostBuffer() {
+    SPDLOG_DEBUG("Kompute Tensor copying data from host buffer");
+
+    if (this->mTensorType != TensorTypes::eStaging) {
+        spdlog::warn("Copying tensor data manually to DEVICE buffer instead of using record GPU command");
+    }
+
+    vk::DeviceSize bufferSize = this->memorySize();
+    void* mapped = this->mDevice->mapMemory(*this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
+    vk::MappedMemoryRange mappedMemoryRange(*this->mMemory, 0, bufferSize);
+    this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
+    memcpy(this->mData.data(), mapped, bufferSize);
+    this->mDevice->unmapMemory(*this->mMemory);
+}
+
+void Tensor::copyDataToHostBuffer() {
+
+    SPDLOG_DEBUG("Kompute Tensor copying data to buffer");
+
+    if (this->mTensorType != TensorTypes::eStaging) {
+        spdlog::warn("Copying tensor data manually to DEVICE buffer instead of using record GPU command");
+    }
+
+    vk::DeviceSize bufferSize = this->memorySize();
+
+    // TODO: Verify if flushed memory ranges should happend in sequence
+    void* mapped = this->mDevice->mapMemory(
+      *this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
+    memcpy(mapped, this->mData.data(), bufferSize);
+    vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize);
+    this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
+    this->mDevice->unmapMemory(*this->mMemory);
+}
+
 vk::BufferUsageFlags
 Tensor::getBufferUsageFlags()
 {
@ -249,17 +297,7 @@ Tensor::createBuffer(void* data)
    SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");

    if (data != nullptr) {
-        SPDLOG_DEBUG("Kompute Tensor mapping data to buffer");
-
-        // TODO: Verify if flushed memory ranges should happend in sequence
-        void* mapped = this->mDevice->mapMemory(
-          *this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
-        memcpy(mapped, data, bufferSize);
-        vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize);
-        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
-        this->mDevice->unmapMemory(*this->mMemory);
-
-        SPDLOG_DEBUG("Kompute Tensor successful copy data to tensor");
+        this->copyDataToHostBuffer();
    }
 }

--- a/src/Tensor.hpp
+++ b/src/Tensor.hpp
@ -46,8 +46,18 @@ class Tensor
    TensorTypes tensorType();
    bool isInit();

+    // Setters
+    void setData(const std::vector<uint32_t>& data);
+
    // Record functions
    void recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor);
+    // TODO: Add memory buffer barrier capabilities
+    //void recordBufferMemoryBarrier();
+
+    // Util functions
+    vk::DescriptorBufferInfo constructDescriptorBufferInfo();
+    void copyDataFromHostBuffer();
+    void copyDataToHostBuffer();

  private:
    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
--- a/src/main.cpp
+++ b/src/main.cpp
@ -22,6 +22,7 @@

 #include "Manager.hpp"
 #include "OpCreateTensor.hpp"
+#include "OpMult.hpp"
 #include "Tensor.hpp"

 #define BUFFER_ELEMENTS 32
@ -493,6 +494,7 @@ class VulkanCompute
                  nullptr,
                  bufferMemoryBarrier,
                  nullptr);
+
                this->mCommandBuffer.bindPipeline(
                  vk::PipelineBindPoint::eCompute, this->mPipeline);
                this->mCommandBuffer.bindDescriptorSets(
@ -623,18 +625,28 @@ main()
        kp::Manager mgr;

        spdlog::info("Creating first tensor");
-        std::shared_ptr<kp::Tensor> tensorOne{ new kp::Tensor(
+        std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor(
          { 0.0, 1.0, 2.0 }) };
-        mgr.evalOp<kp::OpCreateTensor>({ tensorOne });
+        mgr.evalOp<kp::OpCreateTensor>({ tensorLHS });

        spdlog::info("Creating second tensor");
-        std::shared_ptr<kp::Tensor> tensorTwo{ new kp::Tensor(
-          { 0.0, 1.0, 2.0 }) };
-        mgr.evalOp<kp::OpCreateTensor>({ tensorTwo });
+        std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor(
+          { 2.0, 4.0, 6.0 }) };
+        mgr.evalOp<kp::OpCreateTensor>({ tensorRHS });
+
+        // TODO: Add capabilities for just output tensor types
+        spdlog::info("Creating output tensor");
+        std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor(
+            { 0.0, 0.0, 0.0 }) };
+        mgr.evalOp<kp::OpCreateTensor>({ tensorOutput });

        spdlog::info("Called manager eval success");
-        spdlog::info("Tensor one: {}", tensorOne->data());
-        spdlog::info("Tensor two: {}", tensorTwo->data());
+        spdlog::info("Tensor one: {}", tensorLHS->data());
+        spdlog::info("Tensor two: {}", tensorRHS->data());
+        spdlog::info("Tensor two: {}", tensorOutput->data());
+
+        spdlog::info("Calling op mult");
+        mgr.evalOp<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });

        return 0;
    } catch (const std::exception& exc) {