Added initial implementation for algorithm and opMult

This commit is contained in:
Alejandro Saucedo 2020-08-21 19:15:07 +01:00
parent 0d18dc50e6
commit d59dc41ffc
11 changed files with 385 additions and 28 deletions

178
src/Algorithm.cpp Normal file
View file

@ -0,0 +1,178 @@
#include <fstream>
#include "Algorithm.hpp"
namespace kp {
Algorithm::Algorithm()
{
SPDLOG_DEBUG("Kompute Algorithm base constructor");
}
Algorithm::Algorithm(std::shared_ptr<vk::Device> device, std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
SPDLOG_DEBUG("Kompute Algorithm Constructor with device");
this->mDevice = device;
this->mCommandBuffer = commandBuffer;
}
Algorithm::~Algorithm()
{
SPDLOG_DEBUG("Kompute Algorithm Destructor started");
if (!this->mDevice) {
spdlog::error(
"Kompute Algorithm destructor reached with null Device pointer");
return;
}
}
void Algorithm::init(std::string shaderFilePath,
std::vector<std::shared_ptr<Tensor>> tensorParams) {
SPDLOG_DEBUG("Kompute Algorithm init started");
spdlog::info("Loading shader with file path {}", shaderFilePath);
// TODO: Move to util function
this->createParameters(tensorParams);
this->createShaderModule(shaderFilePath);
this->createPipeline();
}
void Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams) {
std::vector<vk::DescriptorPoolSize> descriptorPoolSizes;
for (std::shared_ptr<Tensor> tensorParam : tensorParams) {
descriptorPoolSizes.push_back(
vk::DescriptorPoolSize(
vk::DescriptorType::eStorageBuffer,
1 // Descriptor count
)
);
}
// TODO: Explore design for having more than 1 set configurable
vk::DescriptorPoolCreateInfo descriptorPoolInfo(
vk::DescriptorPoolCreateFlags(),
1, // Max sets
descriptorPoolSizes.size(),
descriptorPoolSizes.data());
this->mDescriptorPool = std::make_shared<vk::DescriptorPool>();
this->mDevice->createDescriptorPool(&descriptorPoolInfo, nullptr, this->mDescriptorPool.get());
// TODO: Explore allowing descriptor set bind index
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
for (size_t i = 0; i < tensorParams.size(); i++) {
descriptorSetBindings.push_back(
vk::DescriptorSetLayoutBinding(
i, // Binding index
vk::DescriptorType::eStorageBuffer,
1, // Descriptor count
vk::ShaderStageFlagBits::eCompute)
);
}
// This is the component that is fed into the pipeline
vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo(
vk::DescriptorSetLayoutCreateFlags(),
descriptorSetBindings.size(),
descriptorSetBindings.data()
);
// TODO: We createa signle descriptor set layout which would have to be extended if multiple set layouts to be supported
this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
this->mDevice->createDescriptorSetLayout(&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
*this->mDescriptorPool,
1, // Descriptor set layout count
this->mDescriptorSetLayout.get());
std::vector<vk::DescriptorSet> descriptorSets =
this->mDevice->allocateDescriptorSets(descriptorSetAllocateInfo);
if (descriptorSets.size() != tensorParams.size()) {
throw std::runtime_error("Number of descriptor sets does not match number of paramters");
}
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
for (size_t i = 0; i < descriptorSets.size(); i++) {
std::shared_ptr<Tensor> currTensor = tensorParams[i];
vk::DescriptorSet& currDescriptorSet = descriptorSets[i];
this->mDescriptorSets.push_back(std::make_shared<vk::DescriptorSet>(currDescriptorSet));
vk::DescriptorBufferInfo descriptorBufferInfo = currTensor->constructDescriptorBufferInfo();
computeWriteDescriptorSets.push_back(
vk::WriteDescriptorSet());
}
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr);
}
void Algorithm::createShaderModule(std::string shaderFilePath) {
std::ifstream fileStream(
shaderFilePath, std::ios::binary | std::ios::in | std::ios::ate);
size_t shaderFileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
char* shaderFileData = new char[shaderFileSize];
fileStream.read(shaderFileData, shaderFileSize);
fileStream.close();
vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(), shaderFileSize, (uint32_t*)shaderFileData);
this->mFreeShaderModule = true;
this->mShaderModule = std::shared_ptr<vk::ShaderModule>();
this->mDevice->createShaderModule(&shaderModuleInfo, nullptr, this->mShaderModule.get());
}
void Algorithm::createPipeline() {
SPDLOG_DEBUG("Kompute Algorithm calling create Pipeline");
vk::PipelineLayoutCreateInfo pipelineLayoutInfo(
vk::PipelineLayoutCreateFlags(),
1, // Set layout count
this->mDescriptorSetLayout.get());
this->mPipelineLayout = std::make_shared<vk::PipelineLayout>();
this->mDevice->createPipelineLayout(&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get());
vk::PipelineShaderStageCreateInfo shaderStage(vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, *this->mShaderModule, "main", nullptr);
vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), shaderStage, *this->mPipelineLayout, vk::Pipeline(), 0);
// TODO: Confirm what the best structure is with pipeline cache
this->mFreePipelineCache = true;
this->mPipelineCache = std::make_shared<vk::PipelineCache>(vk::PipelineCacheCreateInfo());
vk::ResultValue<vk::Pipeline> pipelineResult = this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo);
if (pipelineResult.result != vk::Result::eSuccess) {
throw std::runtime_error("Failed to create pipeline result: " + vk::to_string(pipelineResult.result));
}
this->mFreePipeline = true;
this->mPipeline = std::make_shared<vk::Pipeline>(pipelineResult.value);
}
void Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z) {
SPDLOG_DEBUG("Kompute Algorithm calling record dispatch");
this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, *this->mPipeline);
// TODO: Simplify interaction given we store array of pointers
std::vector<vk::DescriptorSet&> descriptorSetRefs(this->mDescriptorSets.size());
for (size_t i = 0; i < this->mDescriptorSets.size(); i++) {
descriptorSetRefs[i] = this->mDescriptorSets[i];
}
this->mCommandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute, *this->mPipelineLayout, 0, descriptorSetRefs, nullptr);
this->mCommandBuffer->dispatch(x, y, z);
}
}

View file

@ -19,13 +19,43 @@ class Algorithm
public:
Algorithm();
Algorithm(std::shared_ptr<vk::Device> device);
Algorithm(std::shared_ptr<vk::Device> device, std::shared_ptr<vk::CommandBuffer> commandBuffer);
// TODO: Add specialisation data
// TODO: Explore other ways of passing shader (ie raw bytes)
void init(std::string shaderFilePath,
std::vector<std::shared_ptr<Tensor>> tensorParams);
~Algorithm();
// Record commands
void recordDispatch(uint32_t x, uint32_t y, uint32_t z);
private:
// Shared resources
std::shared_ptr<vk::Device> mDevice;
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
// Resources owned by default
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
bool mFreeDescriptorSetLayout = false;
std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
bool mFreeDescriptorPool = false;
std::vector<std::shared_ptr<vk::DescriptorSet>> mDescriptorSets;
bool mFreeDescriptorSet = false;
std::shared_ptr<vk::ShaderModule> mShaderModule;
bool mFreeShaderModule = false;
std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
bool mFreePipelineLayout = false;
std::shared_ptr<vk::PipelineCache> mPipelineCache;
bool mFreePipelineCache = false;
std::shared_ptr<vk::Pipeline> mPipeline;
bool mFreePipeline = false;
// Create util functions
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
void createShaderModule(std::string shaderFilePath);
void createPipeline();
};
} // End namespace kp

View file

@ -42,6 +42,12 @@ class OpBase
virtual void record() { SPDLOG_DEBUG("Kompute OpBase record called"); }
virtual void postSubmit()
{
SPDLOG_DEBUG("Kompute OpBase init called");
}
protected:
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
std::shared_ptr<vk::Device> mDevice;

View file

@ -65,4 +65,10 @@ OpCreateTensor::record()
}
}
void OpCreateTensor::postSubmit()
{
SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
}
}

View file

@ -31,6 +31,8 @@ class OpCreateTensor : public OpBase
void record() override;
void postSubmit() override;
private:
std::shared_ptr<Tensor> mPrimaryTensor;
std::shared_ptr<Tensor> mStagingTensor;

View file

@ -11,12 +11,15 @@ OpMult::OpMult()
SPDLOG_DEBUG("Kompute OpMult constructor base");
}
// TODO: Remove physicalDevice from main initialiser
OpMult::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer)
: OpBase(physicalDevice, device, commandBuffer)
{
SPDLOG_DEBUG("Kompute OpMult constructor with params");
this->mAlgorithm = Algorithm(device, commandBuffer);
}
OpMult::~OpMult()
@ -29,18 +32,40 @@ OpMult::init(std::vector<std::shared_ptr<Tensor>> tensors)
{
SPDLOG_DEBUG("Kompute OpMult init called");
if (tensors.size() < 2) {
if (tensors.size() < 3) {
throw std::runtime_error(
"Kompute OpMult called with less than 1 tensor");
} else if (tensors.size() > 2) {
} else if (tensors.size() > 3) {
spdlog::warn("Kompute OpMult called with more than 2 tensor");
}
this->mTensorLHS = tensors[0];
this->mTensorRHS = tensors[1];
this->mTensorOutput = tensors[2];
this->mTensorOutputStaging= std::make_shared<Tensor>(
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
this->mAlgorithm.init(
"shaders/glsl/computeheadless.comp.spv", tensors);
}
void
OpMult::record()
{
SPDLOG_DEBUG("Kompute OpMult record called");
this->mAlgorithm.recordDispatch(1, 1, 1);
this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput);
}
void OpMult::postSubmit()
{
SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
this->mTensorOutputStaging->copyDataFromHostBuffer();
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
}
}

View file

@ -11,6 +11,7 @@
#include <spdlog/spdlog.h>
#include "Tensor.hpp"
#include "Algorithm.hpp"
#include "OpBase.hpp"
@ -31,9 +32,14 @@ class OpMult : public OpBase
void record() override;
void postSubmit() override;
private:
std::shared_ptr<Tensor> mPrimaryTensor;
std::shared_ptr<Tensor> mStagingTensor;
Algorithm mAlgorithm;
std::shared_ptr<Tensor> mTensorLHS;
std::shared_ptr<Tensor> mTensorRHS;
std::shared_ptr<Tensor> mTensorOutput;
std::shared_ptr<Tensor> mTensorOutputStaging;
};
} // End namespace kp

View file

@ -1,13 +1,57 @@
#pragma once
#include <vulkan/vulkan.h>
#include <vulkan/vulkan.hpp>
// SPDLOG_ACTIVE_LEVEL must be defined before spdlog.h import
#if DEBUG
#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_DEBUG
#endif
#include <spdlog/spdlog.h>
#include "Tensor.hpp"
namespace kp {
class Parameter
class Algorithm
{
private:
public:
Parameter();
virtual ~Parameter();
Algorithm();
Algorithm(std::shared_ptr<vk::Device> device);
// TODO: Add specialisation data
// TODO: Explore other ways of passing shader (ie raw bytes)
void init(std::string shaderFilePath,
std::vector<std::shared_ptr<Tensor>> tensorParams);
~Algorithm();
private:
// Shared resources
std::shared_ptr<vk::Device> mDevice;
// Resources owned by default
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
bool mFreeDescriptorSetLayout = false;
std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
bool mFreeDescriptorPool = false;
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
bool mFreeDescriptorSet = false;
std::shared_ptr<vk::ShaderModule> mShaderModule;
bool mFreeShaderModule = false;
std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
bool mFreePipelineLayout = false;
std::shared_ptr<vk::PipelineCache> mPipelineCache;
bool mFreePipelineCache = false;
std::shared_ptr<vk::Pipeline> mPipeline;
bool mFreePipeline = false;
// Create util functions
void createParameters();
void createShaderModule(std::string shaderFilePath);
void createPipeline();
};
} // End namespace kp

View file

@ -105,12 +105,16 @@ Tensor::isInit()
return this->mIsInit;
}
void Tensor::setData(const std::vector<uint32_t>& data) {
this->mData = data;
}
void
Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
{
SPDLOG_DEBUG("Kompute Tensor recordCopyFrom called");
if (!this->mIsInit) {
if (!this->mIsInit || !copyFromTensor->mIsInit) {
throw std::runtime_error(
"Kompute Tensor attempted to run createBuffer without init");
}
@ -126,9 +130,53 @@ Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
this->mCommandBuffer->copyBuffer(
*copyFromTensor->mBuffer, *this->mBuffer, copyRegion);
// TODO: Ensure copied data is consistent with device
this->mData = copyFromTensor->mData;
}
// TODO: Explore if this function should be here or expose buffer
vk::DescriptorBufferInfo Tensor::constructDescriptorBufferInfo() {
return vk::DescriptorBufferInfo(
*this->mBuffer,
0, // offset
this->memorySize()
);
}
void Tensor::copyDataFromHostBuffer() {
SPDLOG_DEBUG("Kompute Tensor copying data from host buffer");
if (this->mTensorType != TensorTypes::eStaging) {
spdlog::warn("Copying tensor data manually to DEVICE buffer instead of using record GPU command");
}
vk::DeviceSize bufferSize = this->memorySize();
void* mapped = this->mDevice->mapMemory(*this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
vk::MappedMemoryRange mappedMemoryRange(*this->mMemory, 0, bufferSize);
this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
memcpy(this->mData.data(), mapped, bufferSize);
this->mDevice->unmapMemory(*this->mMemory);
}
void Tensor::copyDataToHostBuffer() {
SPDLOG_DEBUG("Kompute Tensor copying data to buffer");
if (this->mTensorType != TensorTypes::eStaging) {
spdlog::warn("Copying tensor data manually to DEVICE buffer instead of using record GPU command");
}
vk::DeviceSize bufferSize = this->memorySize();
// TODO: Verify if flushed memory ranges should happend in sequence
void* mapped = this->mDevice->mapMemory(
*this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
memcpy(mapped, this->mData.data(), bufferSize);
vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*this->mMemory);
}
vk::BufferUsageFlags
Tensor::getBufferUsageFlags()
{
@ -249,17 +297,7 @@ Tensor::createBuffer(void* data)
SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");
if (data != nullptr) {
SPDLOG_DEBUG("Kompute Tensor mapping data to buffer");
// TODO: Verify if flushed memory ranges should happend in sequence
void* mapped = this->mDevice->mapMemory(
*this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
memcpy(mapped, data, bufferSize);
vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*this->mMemory);
SPDLOG_DEBUG("Kompute Tensor successful copy data to tensor");
this->copyDataToHostBuffer();
}
}

View file

@ -46,8 +46,18 @@ class Tensor
TensorTypes tensorType();
bool isInit();
// Setters
void setData(const std::vector<uint32_t>& data);
// Record functions
void recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor);
// TODO: Add memory buffer barrier capabilities
//void recordBufferMemoryBarrier();
// Util functions
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
void copyDataFromHostBuffer();
void copyDataToHostBuffer();
private:
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;

View file

@ -22,6 +22,7 @@
#include "Manager.hpp"
#include "OpCreateTensor.hpp"
#include "OpMult.hpp"
#include "Tensor.hpp"
#define BUFFER_ELEMENTS 32
@ -493,6 +494,7 @@ class VulkanCompute
nullptr,
bufferMemoryBarrier,
nullptr);
this->mCommandBuffer.bindPipeline(
vk::PipelineBindPoint::eCompute, this->mPipeline);
this->mCommandBuffer.bindDescriptorSets(
@ -623,18 +625,28 @@ main()
kp::Manager mgr;
spdlog::info("Creating first tensor");
std::shared_ptr<kp::Tensor> tensorOne{ new kp::Tensor(
std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor(
{ 0.0, 1.0, 2.0 }) };
mgr.evalOp<kp::OpCreateTensor>({ tensorOne });
mgr.evalOp<kp::OpCreateTensor>({ tensorLHS });
spdlog::info("Creating second tensor");
std::shared_ptr<kp::Tensor> tensorTwo{ new kp::Tensor(
{ 0.0, 1.0, 2.0 }) };
mgr.evalOp<kp::OpCreateTensor>({ tensorTwo });
std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor(
{ 2.0, 4.0, 6.0 }) };
mgr.evalOp<kp::OpCreateTensor>({ tensorRHS });
// TODO: Add capabilities for just output tensor types
spdlog::info("Creating output tensor");
std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor(
{ 0.0, 0.0, 0.0 }) };
mgr.evalOp<kp::OpCreateTensor>({ tensorOutput });
spdlog::info("Called manager eval success");
spdlog::info("Tensor one: {}", tensorOne->data());
spdlog::info("Tensor two: {}", tensorTwo->data());
spdlog::info("Tensor one: {}", tensorLHS->data());
spdlog::info("Tensor two: {}", tensorRHS->data());
spdlog::info("Tensor two: {}", tensorOutput->data());
spdlog::info("Calling op mult");
mgr.evalOp<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
return 0;
} catch (const std::exception& exc) {