Initial checkpoint with reasonable workflow
This commit is contained in:
parent
5db9abd06e
commit
9aae5d69db
46 changed files with 1158 additions and 695 deletions
|
|
@ -4,36 +4,62 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
Algorithm::Algorithm()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm base constructor");
|
||||
}
|
||||
|
||||
Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
const Constants& specializationConstants)
|
||||
Algorithm::Algorithm(
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants,
|
||||
const Constants& pushConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
||||
|
||||
this->mDevice = device;
|
||||
this->mCommandBuffer = commandBuffer;
|
||||
this->mSpecializationConstants = specializationConstants;
|
||||
this->setWorkgroup(workgroup);
|
||||
this->mPushConstants = pushConstants;
|
||||
this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
}
|
||||
|
||||
Algorithm::~Algorithm()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
|
||||
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::rebuild(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants,
|
||||
const Constants& pushConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
|
||||
|
||||
// Descriptor pool is created first so if available then destroy all before rebuild
|
||||
if (this->mFreeDescriptorPool) {
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
this->createParameters(tensors);
|
||||
this->createShaderModule();
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::freeMemoryDestroyGPUResources() {
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute Algorithm destructor reached with null Device pointer");
|
||||
KP_LOG_WARN(
|
||||
"Kompute Algorithm destroy function reached with null Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreePipeline) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
|
||||
if (!this->mPipeline) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -44,7 +70,7 @@ Algorithm::~Algorithm()
|
|||
if (this->mFreePipelineCache) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
|
||||
if (!this->mPipelineCache) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline cache but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -55,7 +81,7 @@ Algorithm::~Algorithm()
|
|||
if (this->mFreePipelineLayout) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
|
||||
if (!this->mPipelineLayout) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -66,7 +92,7 @@ Algorithm::~Algorithm()
|
|||
if (this->mFreeShaderModule) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
|
||||
if (!this->mShaderModule) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy shader "
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
|
||||
"module but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -77,7 +103,7 @@ Algorithm::~Algorithm()
|
|||
if (this->mFreeDescriptorSet) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set");
|
||||
if (!this->mDescriptorSet) {
|
||||
KP_LOG_ERROR(
|
||||
KP_LOG_WARN(
|
||||
"Kompute Algorithm Error requested to free descriptor set");
|
||||
}
|
||||
this->mDevice->freeDescriptorSets(
|
||||
|
|
@ -87,7 +113,7 @@ Algorithm::~Algorithm()
|
|||
if (this->mFreeDescriptorSetLayout) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
|
||||
if (!this->mDescriptorSetLayout) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor set layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -98,7 +124,7 @@ Algorithm::~Algorithm()
|
|||
if (this->mFreeDescriptorPool) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool");
|
||||
if (!this->mDescriptorPool) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor pool but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -108,27 +134,7 @@ Algorithm::~Algorithm()
|
|||
}
|
||||
|
||||
void
|
||||
Algorithm::init(const std::vector<uint32_t>& shaderFileData,
|
||||
std::vector<std::shared_ptr<Tensor>> tensorParams)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm init started");
|
||||
|
||||
this->createParameters(tensorParams);
|
||||
this->createShaderModule(shaderFileData);
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : tensorParams) {
|
||||
this->mSpecializationConstants.push_back(tensor->size());
|
||||
}
|
||||
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createDescriptorPool()
|
||||
{}
|
||||
|
||||
void
|
||||
Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
||||
Algorithm::createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
|
||||
|
||||
|
|
@ -207,17 +213,17 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
|||
}
|
||||
|
||||
void
|
||||
Algorithm::createShaderModule(const std::vector<uint32_t>& shaderFileData)
|
||||
Algorithm::createShaderModule()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
|
||||
|
||||
vk::ShaderModuleCreateInfo shaderModuleInfo(
|
||||
vk::ShaderModuleCreateFlags(),
|
||||
sizeof(uint32_t) * shaderFileData.size(),
|
||||
shaderFileData.data());
|
||||
sizeof(uint32_t) * this->mSpirv.size(),
|
||||
this->mSpirv.data());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
|
||||
shaderFileData.size());
|
||||
this->mSpirv.size());
|
||||
this->mFreeShaderModule = true;
|
||||
this->mShaderModule = std::make_shared<vk::ShaderModule>();
|
||||
this->mDevice->createShaderModule(
|
||||
|
|
@ -300,21 +306,42 @@ Algorithm::createPipeline()
|
|||
}
|
||||
|
||||
void
|
||||
Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z)
|
||||
Algorithm::recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm calling record dispatch");
|
||||
|
||||
this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute,
|
||||
commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipeline);
|
||||
|
||||
this->mCommandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute,
|
||||
commandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipelineLayout,
|
||||
0, // First set
|
||||
*this->mDescriptorSet,
|
||||
nullptr // Dispatcher
|
||||
);
|
||||
|
||||
this->mCommandBuffer->dispatch(x, y, z);
|
||||
commandBuffer->dispatch(this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (workgroup[0] > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mWorkgroup = {
|
||||
workgroup[0],
|
||||
workgroup[1] > 0 ? workgroup[1] : 1,
|
||||
workgroup[2] > 0 ? workgroup[2] : 1
|
||||
};
|
||||
} else {
|
||||
this->mWorkgroup = { minSize, 1, 1 };
|
||||
}
|
||||
KP_LOG_INFO("Kompute OpAlgoCreate dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mWorkgroup[0],
|
||||
this->mWorkgroup[1],
|
||||
this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,21 +61,30 @@ Manager::~Manager()
|
|||
if (this->mManagedSequences.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly running destructor for "
|
||||
"managed sequences");
|
||||
for (const std::pair<std::string, std::shared_ptr<Sequence>>& sqPair :
|
||||
this->mManagedSequences) {
|
||||
sqPair.second->freeMemoryDestroyGPUResources();
|
||||
for (const std::weak_ptr<Sequence>& weakSq : this->mManagedSequences) {
|
||||
if (std::shared_ptr<Sequence> sq = weakSq.lock()) {
|
||||
sq->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
}
|
||||
this->mManagedSequences.clear();
|
||||
}
|
||||
|
||||
if (this->mManagedAlgorithms.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
|
||||
for (const std::weak_ptr<Algorithm>& weakAlgorithm : this->mManagedAlgorithms) {
|
||||
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
|
||||
algorithm->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
}
|
||||
this->mManagedTensors.clear();
|
||||
}
|
||||
|
||||
if (this->mManagedTensors.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
KP_LOG_ERROR("Kompute Manager attempted to free managed tensor "
|
||||
"but not tensor is not initialised");
|
||||
for (const std::weak_ptr<Tensor>& weakTensor : this->mManagedTensors) {
|
||||
if (std::shared_ptr<Tensor> tensor = weakTensor.lock()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
this->mManagedTensors.clear();
|
||||
}
|
||||
|
|
@ -111,32 +120,21 @@ Manager::~Manager()
|
|||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Manager::sequence(std::string sequenceName, uint32_t queueIndex)
|
||||
Manager::sequence(uint32_t queueIndex)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager sequence() with sequenceName: {} "
|
||||
"and queueIndex: {}",
|
||||
sequenceName,
|
||||
queueIndex);
|
||||
|
||||
std::shared_ptr<Sequence> sq = nullptr;
|
||||
std::shared_ptr<Sequence> sq =
|
||||
std::make_shared<Sequence>(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]);
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
|
||||
this->mManagedSequences.find(sequenceName);
|
||||
this->mManagedSequences.insert(sq);
|
||||
|
||||
if (found == this->mManagedSequences.end()) {
|
||||
std::shared_ptr<Sequence> sq =
|
||||
std::make_shared<Sequence>(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]);
|
||||
sq->init();
|
||||
|
||||
this->mManagedSequences.insert({ sequenceName, sq });
|
||||
|
||||
return sq;
|
||||
} else {
|
||||
return found->second;
|
||||
}
|
||||
return sq;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -334,13 +332,10 @@ Manager::tensor(
|
|||
Tensor::TensorTypes tensorType,
|
||||
bool syncDataToGPU)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager tensor triggered");
|
||||
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager creating new tensor shared ptr");
|
||||
std::shared_ptr<Tensor> tensor =
|
||||
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
|
||||
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>(
|
||||
kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType));
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
|
|
@ -349,6 +344,29 @@ Manager::tensor(
|
|||
|
||||
return tensor;
|
||||
}
|
||||
std::shared_ptr<Algorithm>
|
||||
Manager::algorithm(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants,
|
||||
const Constants& pushConstants) {
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm = std::make_shared<Algorithm>(
|
||||
kp::Algorithm(
|
||||
this->mDevice,
|
||||
tensors,
|
||||
spirv,
|
||||
workgroup,
|
||||
specializationConstants,
|
||||
pushConstants));
|
||||
|
||||
this->mManagedAlgorithms.insert(algorithm);
|
||||
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
void
|
||||
Manager::rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
|
|
|
|||
|
|
@ -1,176 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoBase::OpAlgoBase()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase constructor base");
|
||||
}
|
||||
|
||||
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const Workgroup& komputeWorkgroup,
|
||||
const Constants& specializationConstants)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}",
|
||||
tensors.size());
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (komputeWorkgroup[0] > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mKomputeWorkgroup = {
|
||||
komputeWorkgroup[0],
|
||||
komputeWorkgroup[1] > 0 ? komputeWorkgroup[1] : 1,
|
||||
komputeWorkgroup[2] > 0 ? komputeWorkgroup[2] : 1
|
||||
};
|
||||
} else {
|
||||
this->mKomputeWorkgroup = { tensors[0]->size(), 1, 1 };
|
||||
}
|
||||
KP_LOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mKomputeWorkgroup[0],
|
||||
this->mKomputeWorkgroup[1],
|
||||
this->mKomputeWorkgroup[2]);
|
||||
|
||||
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer, specializationConstants);
|
||||
}
|
||||
|
||||
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::string shaderFilePath,
|
||||
const Workgroup& komputeWorkgroup,
|
||||
const Constants& specializationConstants)
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}",
|
||||
shaderFilePath);
|
||||
|
||||
this->mShaderFilePath = shaderFilePath;
|
||||
}
|
||||
|
||||
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& shaderDataRaw,
|
||||
const Workgroup& komputeWorkgroup,
|
||||
const Constants& specializationConstants)
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw "
|
||||
"data length: {}",
|
||||
shaderDataRaw.size());
|
||||
|
||||
this->mShaderDataRaw = shaderDataRaw;
|
||||
}
|
||||
|
||||
OpAlgoBase::~OpAlgoBase()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoBase::init()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoBase called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoBase validation failed; all tensor parameters "
|
||||
"must be initialised.");
|
||||
}
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase fetching spirv data");
|
||||
|
||||
std::vector<uint32_t> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoBase::record()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
tensor->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
}
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup[0],
|
||||
this->mKomputeWorkgroup[1],
|
||||
this->mKomputeWorkgroup[2]);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoBase::preEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoBase::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase postSubmit called");
|
||||
}
|
||||
|
||||
std::vector<uint32_t>
|
||||
OpAlgoBase::fetchSpirvBinaryData()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase Running fetchSpirvBinaryData");
|
||||
|
||||
if (this->mShaderFilePath.size()) {
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase Reading data from file path");
|
||||
|
||||
std::ifstream fileStream(this->mShaderFilePath,
|
||||
std::ios::binary | std::ios::in |
|
||||
std::ios::ate);
|
||||
|
||||
if (!fileStream.good()) {
|
||||
throw std::runtime_error("Error reading file: " +
|
||||
this->mShaderFilePath);
|
||||
}
|
||||
|
||||
size_t shaderFileSize = fileStream.tellg();
|
||||
fileStream.seekg(0, std::ios::beg);
|
||||
char* shaderDataRaw = new char[shaderFileSize];
|
||||
fileStream.read(shaderDataRaw, shaderFileSize);
|
||||
fileStream.close();
|
||||
|
||||
KP_LOG_WARN("Kompute OpAlgoBase fetched {} bytes", shaderFileSize);
|
||||
|
||||
return std::vector<uint32_t>((uint32_t*)shaderDataRaw, (uint32_t*)(shaderDataRaw + shaderFileSize));
|
||||
} else if (this->mShaderDataRaw.size()) {
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase Reading data from data provided");
|
||||
return this->mShaderDataRaw;
|
||||
} else {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither "
|
||||
"filepath nor data provided");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
51
src/OpAlgoCreate.cpp
Normal file
51
src/OpAlgoCreate.cpp
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpAlgoCreate.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoCreate::OpAlgoCreate(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
: OpBase(tensors, algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate constructor");
|
||||
|
||||
this->mManagesAlgorithm = true;
|
||||
this->mManagesTensors = false;
|
||||
}
|
||||
|
||||
OpAlgoCreate::~OpAlgoCreate()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoCreate::init(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) {
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate init started");
|
||||
|
||||
// Explicitly calling top level function to create algo
|
||||
OpBase::init(physicalDevice, device);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoCreate::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate record called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoCreate::preEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoCreate::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate postSubmit called");
|
||||
}
|
||||
|
||||
}
|
||||
59
src/OpAlgoDispatch.cpp
Normal file
59
src/OpAlgoDispatch.cpp
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoDispatch::OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
: OpBase(tensors, algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
this->mManagesAlgorithm = false;
|
||||
this->mManagesTensors = false;
|
||||
}
|
||||
|
||||
OpAlgoDispatch::~OpAlgoDispatch()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch init called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
tensor->recordBufferMemoryBarrier(
|
||||
commandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
}
|
||||
|
||||
this->mAlgorithm->recordDispatch(commandBuffer);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::preEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch postSubmit called");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -10,15 +10,12 @@ OpAlgoLhsRhsOut::OpAlgoLhsRhsOut()
|
|||
}
|
||||
|
||||
OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
const Workgroup& komputeWorkgroup)
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
// The inheritance is initialised with the copyOutputData to false given that
|
||||
// this depencendant class handles the transfer of data via staging buffers in
|
||||
// a granular way.
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup)
|
||||
: OpAlgoCreate(tensors, algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
|
||||
}
|
||||
|
|
@ -29,7 +26,8 @@ OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut()
|
|||
}
|
||||
|
||||
void
|
||||
OpAlgoLhsRhsOut::init()
|
||||
OpAlgoLhsRhsOut::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
|
||||
|
||||
|
|
@ -70,12 +68,10 @@ OpAlgoLhsRhsOut::init()
|
|||
std::vector<uint32_t> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoLhsRhsOut::record()
|
||||
OpAlgoLhsRhsOut::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");
|
||||
|
||||
|
|
|
|||
|
|
@ -3,18 +3,13 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
OpTensorCopy::OpTensorCopy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy constructor base");
|
||||
}
|
||||
|
||||
OpTensorCopy::OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
OpTensorCopy::OpTensorCopy(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(tensors, nullptr)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params");
|
||||
|
||||
this->mManagesTensors = false;
|
||||
this->mManagesAlgorithm = false;
|
||||
}
|
||||
|
||||
OpTensorCopy::~OpTensorCopy()
|
||||
|
|
@ -23,7 +18,8 @@ OpTensorCopy::~OpTensorCopy()
|
|||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::init()
|
||||
OpTensorCopy::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy init called");
|
||||
|
||||
|
|
@ -46,14 +42,14 @@ OpTensorCopy::init()
|
|||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::record()
|
||||
OpTensorCopy::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy record called");
|
||||
|
||||
// We iterate from the second tensor onwards and record a copy to all
|
||||
for (size_t i = 1; i < this->mTensors.size(); i++) {
|
||||
this->mTensors[i]->recordCopyFrom(
|
||||
this->mCommandBuffer, this->mTensors[0], false);
|
||||
commandBuffer, this->mTensors[0], false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
46
src/OpTensorCreate.cpp
Normal file
46
src/OpTensorCreate.cpp
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
#include "kompute/operations/OpTensorCreate.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorCreate::OpTensorCreate(
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpBase(tensors, nullptr)
|
||||
{
|
||||
KP_LOG_DEBUG("Compute OpTensorCreate constructor with params");
|
||||
this->mManagesTensors = true;
|
||||
}
|
||||
|
||||
OpTensorCreate::~OpTensorCreate()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate init called");
|
||||
|
||||
OpBase::init(physicalDevice, device);
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate record called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::preEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate postEval called");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,21 +1,11 @@
|
|||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorSyncDevice::OpTensorSyncDevice()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor base");
|
||||
}
|
||||
|
||||
OpTensorSyncDevice::OpTensorSyncDevice(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
: OpBase(tensors, nullptr)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
|
||||
}
|
||||
|
|
@ -26,7 +16,8 @@ OpTensorSyncDevice::~OpTensorSyncDevice()
|
|||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::init()
|
||||
OpTensorSyncDevice::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice init called");
|
||||
|
||||
|
|
@ -50,14 +41,14 @@ OpTensorSyncDevice::init()
|
|||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::record()
|
||||
OpTensorSyncDevice::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice record called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(
|
||||
this->mCommandBuffer, false);
|
||||
commandBuffer, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,19 +5,14 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
OpTensorSyncLocal::OpTensorSyncLocal()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor base");
|
||||
}
|
||||
|
||||
OpTensorSyncLocal::OpTensorSyncLocal(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
: OpBase(tensors, nullptr)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
|
||||
|
||||
this->mManagesTensors = false;
|
||||
this->mManagesAlgorithm = false;
|
||||
}
|
||||
|
||||
OpTensorSyncLocal::~OpTensorSyncLocal()
|
||||
|
|
@ -26,7 +21,8 @@ OpTensorSyncLocal::~OpTensorSyncLocal()
|
|||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::init()
|
||||
OpTensorSyncLocal::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal init called");
|
||||
|
||||
|
|
@ -40,24 +36,18 @@ OpTensorSyncLocal::init()
|
|||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal: Tensor has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute OpTensorSyncLocal tensor parameter is of type "
|
||||
"TensorTypes::eStorage and hence cannot be used to receive or "
|
||||
"pass data.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::record()
|
||||
OpTensorSyncLocal::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal record called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFromDeviceToStaging(
|
||||
this->mCommandBuffer, true);
|
||||
commandBuffer, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,12 +3,6 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
Sequence::Sequence()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence base constructor");
|
||||
this->mIsInit = false;
|
||||
}
|
||||
|
||||
Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::Queue> computeQueue,
|
||||
|
|
@ -20,28 +14,16 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
this->mDevice = device;
|
||||
this->mComputeQueue = computeQueue;
|
||||
this->mQueueIndex = queueIndex;
|
||||
this->mIsInit = false;
|
||||
|
||||
this->createCommandPool();
|
||||
this->createCommandBuffer();
|
||||
}
|
||||
|
||||
Sequence::~Sequence()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence Destructor started");
|
||||
|
||||
if (!this->mIsInit) {
|
||||
KP_LOG_INFO("Kompute Sequence destructor called but sequence is not "
|
||||
"initialized so no need to removing GPU resources.");
|
||||
return;
|
||||
} else {
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::init()
|
||||
{
|
||||
this->createCommandPool();
|
||||
this->createCommandBuffer();
|
||||
this->mIsInit = true;
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -194,28 +176,14 @@ Sequence::isRecording()
|
|||
return this->mRecording;
|
||||
}
|
||||
|
||||
bool
|
||||
Sequence::isInit()
|
||||
{
|
||||
return this->mIsInit;
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::freeMemoryDestroyGPUResources()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");
|
||||
|
||||
if (!this->mIsInit) {
|
||||
KP_LOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
|
||||
"but Sequence is not initialized so there's no relevant "
|
||||
"GPU resources.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
|
||||
"with null Device pointer");
|
||||
this->mIsInit = false;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -225,7 +193,6 @@ Sequence::freeMemoryDestroyGPUResources()
|
|||
KP_LOG_ERROR(
|
||||
"Kompute Sequence freeMemoryDestroyGPUResources called with null "
|
||||
"CommandPool pointer");
|
||||
this->mIsInit = false;
|
||||
return;
|
||||
}
|
||||
this->mDevice->freeCommandBuffers(
|
||||
|
|
@ -239,7 +206,6 @@ Sequence::freeMemoryDestroyGPUResources()
|
|||
KP_LOG_ERROR(
|
||||
"Kompute Sequence freeMemoryDestroyGPUResources called with null "
|
||||
"CommandPool pointer");
|
||||
this->mIsInit = false;
|
||||
return;
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -253,7 +219,6 @@ Sequence::freeMemoryDestroyGPUResources()
|
|||
this->mOperations.clear();
|
||||
}
|
||||
|
||||
this->mIsInit = false;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -3,23 +3,19 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
Tensor::Tensor()
|
||||
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
const TensorTypes& tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor base constructor");
|
||||
this->mTensorType = TensorTypes::eDevice;
|
||||
}
|
||||
|
||||
Tensor::Tensor(const std::vector<float>& data, TensorTypes tensorType)
|
||||
{
|
||||
#if DEBUG
|
||||
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
|
||||
data.size(),
|
||||
tensorType);
|
||||
#endif
|
||||
|
||||
this->mData = data;
|
||||
this->mShape = { static_cast<uint32_t>(data.size()) };
|
||||
this->mTensorType = tensorType;
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
|
||||
this->rebuild(data, tensorType);
|
||||
}
|
||||
|
||||
Tensor::~Tensor()
|
||||
|
|
@ -27,25 +23,25 @@ Tensor::~Tensor()
|
|||
KP_LOG_DEBUG("Kompute Tensor destructor started. Type: {}",
|
||||
this->tensorType());
|
||||
|
||||
if (this->isInit()) {
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor destructor success");
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
Tensor::rebuild(const std::vector<float>& data,
|
||||
TensorTypes tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor running init with Vulkan params and num data "
|
||||
"elementS: {}",
|
||||
this->mData.size());
|
||||
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}",
|
||||
data.size());
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mData = data;
|
||||
this->mTensorType = tensorType;
|
||||
|
||||
this->mIsInit = true;
|
||||
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
|
||||
KP_LOG_DEBUG("Kompute Tensor destroying existing resources before rebuild");
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
this->allocateMemoryCreateGPUResources();
|
||||
}
|
||||
|
|
@ -71,13 +67,7 @@ Tensor::memorySize()
|
|||
uint32_t
|
||||
Tensor::size()
|
||||
{
|
||||
return this->mShape[0];
|
||||
}
|
||||
|
||||
std::array<uint32_t, KP_MAX_DIM_SIZE>
|
||||
Tensor::shape()
|
||||
{
|
||||
return this->mShape;
|
||||
return static_cast<uint32_t>(this->mData.size());
|
||||
}
|
||||
|
||||
Tensor::TensorTypes
|
||||
|
|
@ -86,12 +76,6 @@ Tensor::tensorType()
|
|||
return this->mTensorType;
|
||||
}
|
||||
|
||||
bool
|
||||
Tensor::isInit()
|
||||
{
|
||||
return this->mIsInit && this->mPrimaryBuffer && this->mPrimaryMemory;
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::setData(const std::vector<float>& data)
|
||||
{
|
||||
|
|
@ -166,11 +150,6 @@ Tensor::copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
|||
bool createBarrier)
|
||||
{
|
||||
|
||||
if (!this->mIsInit) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Tensor attempted to run copyBuffer without init");
|
||||
}
|
||||
|
||||
commandBuffer->copyBuffer(*bufferFrom, *bufferTo, copyRegion);
|
||||
|
||||
if (createBarrier) {
|
||||
|
|
@ -344,11 +323,6 @@ Tensor::allocateMemoryCreateGPUResources()
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor creating buffer");
|
||||
|
||||
if (!this->mIsInit) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Tensor attempted to run createBuffer without init");
|
||||
}
|
||||
|
||||
if (!this->mPhysicalDevice) {
|
||||
throw std::runtime_error("Kompute Tensor phyisical device is null");
|
||||
}
|
||||
|
|
@ -457,9 +431,7 @@ Tensor::allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
|
|||
void
|
||||
Tensor::freeMemoryDestroyGPUResources()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources");
|
||||
|
||||
this->mIsInit = false;
|
||||
KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources()");
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_ERROR(
|
||||
|
|
@ -519,7 +491,7 @@ Tensor::freeMemoryDestroyGPUResources()
|
|||
}
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources");
|
||||
KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources()");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,11 +13,6 @@ namespace kp {
|
|||
class Algorithm
|
||||
{
|
||||
public:
|
||||
/**
|
||||
Base constructor for Algorithm. Should not be used unless explicit
|
||||
intended.
|
||||
*/
|
||||
Algorithm();
|
||||
|
||||
/**
|
||||
* Default constructor for Algorithm
|
||||
|
|
@ -26,9 +21,13 @@ public:
|
|||
* @param commandBuffer The vulkan command buffer to bind the pipeline and
|
||||
* shaders
|
||||
*/
|
||||
Algorithm(std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
const Constants& specializationConstants = {});
|
||||
Algorithm(
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
/**
|
||||
* Initialiser for the shader data provided to the algorithm as well as
|
||||
|
|
@ -39,8 +38,16 @@ public:
|
|||
* @specalizationInstalces The specialization parameters to pass to the function
|
||||
* processing
|
||||
*/
|
||||
void init(const std::vector<uint32_t>& shaderFileData,
|
||||
std::vector<std::shared_ptr<Tensor>> tensorParams);
|
||||
void rebuild(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
bool isInit();
|
||||
|
||||
void freeMemoryDestroyGPUResources();
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
|
|
@ -56,12 +63,13 @@ public:
|
|||
* @param y Layout Y dispatch value
|
||||
* @param z Layout Z dispatch value
|
||||
*/
|
||||
void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1);
|
||||
void recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
|
||||
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
|
|
@ -80,15 +88,19 @@ private:
|
|||
bool mFreePipeline = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<uint32_t> mSpirv;
|
||||
Constants mSpecializationConstants;
|
||||
Constants mPushConstants;
|
||||
Workgroup mWorkgroup;
|
||||
|
||||
bool mIsInit;
|
||||
|
||||
// Create util functions
|
||||
void createShaderModule(const std::vector<uint32_t>& shaderFileData);
|
||||
void createShaderModule();
|
||||
void createPipeline();
|
||||
|
||||
// Parameters
|
||||
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createDescriptorPool();
|
||||
void createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -67,9 +67,7 @@ class Manager
|
|||
* @param queueIndex The queue to use from the available queues
|
||||
* @return Shared pointer to the manager owned sequence resource
|
||||
*/
|
||||
std::shared_ptr<Sequence> sequence(
|
||||
std::string sequenceName = KP_DEFAULT_SESSION,
|
||||
uint32_t queueIndex = 0);
|
||||
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0);
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against named sequence.
|
||||
|
|
@ -228,6 +226,13 @@ class Manager
|
|||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
|
||||
bool syncDataToGPU = true);
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
|
|
@ -312,10 +317,10 @@ class Manager
|
|||
bool mFreeDevice = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::set<std::shared_ptr<Tensor>> mManagedTensors;
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>
|
||||
mManagedSequences;
|
||||
std::set<std::weak_ptr<Tensor>> mManagedTensors;
|
||||
std::set<std::weak_ptr<Sequence>> mManagedSequences;
|
||||
std::set<std::weak_ptr<Algorithm>> mManagedAlgorithms;
|
||||
//std::unique_ptr<Sequence> mDefaultSequence;
|
||||
|
||||
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
|
|
|||
|
|
@ -12,11 +12,6 @@ namespace kp {
|
|||
class Sequence
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor for Sequence. Should not be used unless explicit
|
||||
* intended.
|
||||
*/
|
||||
Sequence();
|
||||
/**
|
||||
* Main constructor for sequence which requires core vulkan components to
|
||||
* generate all dependent resources.
|
||||
|
|
@ -36,12 +31,6 @@ class Sequence
|
|||
*/
|
||||
~Sequence();
|
||||
|
||||
/**
|
||||
* Initialises sequence including the creation of the command pool and the
|
||||
* command buffer.
|
||||
*/
|
||||
void init();
|
||||
|
||||
/**
|
||||
* Begins recording commands for commands to be submitted into the command
|
||||
* buffer.
|
||||
|
|
@ -99,13 +88,6 @@ class Sequence
|
|||
*/
|
||||
bool isRunning();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence has been successfully initialised.
|
||||
*
|
||||
* @return Boolean stating if sequence has been initialised.
|
||||
*/
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory
|
||||
* and sets the sequence as init=False.
|
||||
|
|
@ -179,7 +161,6 @@ class Sequence
|
|||
std::vector<std::unique_ptr<OpBase>> mOperations;
|
||||
|
||||
// State
|
||||
bool mIsInit = false;
|
||||
bool mRecording = false;
|
||||
bool mIsRunning = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@
|
|||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#define KP_MAX_DIM_SIZE 1
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
|
|
@ -30,11 +28,6 @@ class Tensor
|
|||
eStorage = 2, ///< Type is Device memory (only)
|
||||
};
|
||||
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
Tensor();
|
||||
|
||||
/**
|
||||
* Default constructor with data provided which would be used to create the
|
||||
* respective vulkan buffer and memory.
|
||||
|
|
@ -43,8 +36,10 @@ class Tensor
|
|||
* tensor
|
||||
* @param tensorType Type for the tensor which is of type TensorTypes
|
||||
*/
|
||||
Tensor(const std::vector<float>& data,
|
||||
TensorTypes tensorType = TensorTypes::eDevice);
|
||||
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Destructor which is in charge of freeing vulkan resources unless they
|
||||
|
|
@ -58,8 +53,8 @@ class Tensor
|
|||
* would only be created for the tensors of type TensorType::eDevice as
|
||||
* otherwise there is no need to copy from host memory.
|
||||
*/
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device);
|
||||
void rebuild(const std::vector<float>& data,
|
||||
TensorTypes tensorType = TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
|
|
@ -91,26 +86,13 @@ class Tensor
|
|||
* @return Unsigned integer representing the total number of elements
|
||||
*/
|
||||
uint32_t size();
|
||||
/**
|
||||
* Returns the shape of the tensor, which includes the number of dimensions
|
||||
* and the size per dimension.
|
||||
*
|
||||
* @return Array containing the sizes for each dimension. Zero means
|
||||
* respective dimension is not active.
|
||||
*/
|
||||
std::array<uint32_t, KP_MAX_DIM_SIZE> shape();
|
||||
|
||||
/**
|
||||
* Retrieve the tensor type of the Tensor
|
||||
*
|
||||
* @return Tensor type of tensor
|
||||
*/
|
||||
TensorTypes tensorType();
|
||||
/**
|
||||
* Returns true if the tensor initialisation function has been carried out
|
||||
* successful, which would mean that the buffer and memory will have been
|
||||
* provisioned.
|
||||
*/
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Sets / resets the vector data of the tensor. This function does not
|
||||
|
|
@ -214,9 +196,6 @@ class Tensor
|
|||
|
||||
TensorTypes mTensorType = TensorTypes::eDevice;
|
||||
|
||||
std::array<uint32_t, KP_MAX_DIM_SIZE> mShape;
|
||||
bool mIsInit = false;
|
||||
|
||||
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
|
||||
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
|
||||
vk::BufferUsageFlags bufferUsageFlags);
|
||||
|
|
|
|||
77
src/include/kompute/operations/OpAlgoCreate.hpp
Normal file
77
src/include/kompute/operations/OpAlgoCreate.hpp
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* By default it enables the user to provide a dynamic number of tensors
|
||||
* which are then passed as inputs.
|
||||
*/
|
||||
class OpAlgoCreate : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoCreate(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<kp::Algorithm> algorithm);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpAlgoCreate() override;
|
||||
|
||||
|
||||
virtual void init(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
|
@ -1,14 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
|
@ -19,15 +13,10 @@ namespace kp {
|
|||
* By default it enables the user to provide a dynamic number of tensors
|
||||
* which are then passed as inputs.
|
||||
*/
|
||||
class OpAlgoBase : public OpBase
|
||||
class OpAlgoDispatch : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpAlgoBase();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
|
|
@ -40,12 +29,8 @@ class OpAlgoBase : public OpBase
|
|||
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const Workgroup& komputeWorkgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<kp::Algorithm> algorithm);
|
||||
|
||||
/**
|
||||
* Constructor that enables a file to be passed to the operation with
|
||||
|
|
@ -59,13 +44,9 @@ class OpAlgoBase : public OpBase
|
|||
* @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::string shaderFilePath,
|
||||
const Workgroup& komputeWorkgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
std::string shaderFilePath);
|
||||
|
||||
/**
|
||||
* Constructor that enables raw shader data to be passed to the main operation
|
||||
|
|
@ -78,19 +59,15 @@ class OpAlgoBase : public OpBase
|
|||
* @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& shaderDataRaw,
|
||||
const Workgroup& komputeWorkgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const std::vector<uint32_t>& shaderDataRaw);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpAlgoBase() override;
|
||||
virtual ~OpAlgoDispatch() override;
|
||||
|
||||
/**
|
||||
* The init function is responsible for the initialisation of the algorithm
|
||||
|
|
@ -98,7 +75,8 @@ class OpAlgoBase : public OpBase
|
|||
* on the options provided. Further dependent classes can perform more
|
||||
* specific checks such as ensuring tensors provided are initialised, etc.
|
||||
*/
|
||||
virtual void init() override;
|
||||
virtual void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
|
|
@ -108,7 +86,7 @@ class OpAlgoBase : public OpBase
|
|||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record() override;
|
||||
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -123,21 +101,6 @@ class OpAlgoBase : public OpBase
|
|||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
bool mFreeAlgorithm = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
|
||||
Workgroup mKomputeWorkgroup;
|
||||
|
||||
std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
|
||||
std::vector<uint32_t> mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content
|
||||
|
||||
virtual std::vector<uint32_t> fetchSpirvBinaryData();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -7,7 +7,7 @@
|
|||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
#include "kompute/operations/OpAlgoCreate.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -16,13 +16,9 @@ namespace kp {
|
|||
* right hand and left hand side datapoints together with a single output.
|
||||
* The expected data passed is two input tensors and one output tensor.
|
||||
*/
|
||||
class OpAlgoLhsRhsOut : public OpAlgoBase
|
||||
class OpAlgoLhsRhsOut : public OpAlgoCreate
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpAlgoLhsRhsOut();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
|
|
@ -36,11 +32,8 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
|
|||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
const Workgroup& komputeWorkgroup = {});
|
||||
OpAlgoLhsRhsOut(std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<Algorithm> algorithm);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -54,7 +47,8 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
|
|||
* tensors, and creates the algorithm component which processes the
|
||||
* computation.
|
||||
*/
|
||||
virtual void init() override;
|
||||
virtual void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
|
|
@ -64,7 +58,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
|
|||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record() override;
|
||||
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -17,10 +18,6 @@ namespace kp {
|
|||
class OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpBase() { KP_LOG_DEBUG("Compute OpBase base constructor"); }
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
|
|
@ -32,17 +29,13 @@ class OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
*/
|
||||
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
OpBase(std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Compute OpBase constructor with params");
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mCommandBuffer = commandBuffer;
|
||||
this->mTensors = tensors;
|
||||
this->mAlgorithm = algorithm;
|
||||
this->mIsInit = false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -53,37 +46,89 @@ class OpBase
|
|||
virtual ~OpBase()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBase destructor started");
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN("Kompute OpBase destructor called with empty device");
|
||||
return;
|
||||
}
|
||||
virtual std::shared_ptr<kp::Algorithm> algorithm() {
|
||||
return this->mAlgorithm;
|
||||
}
|
||||
|
||||
if (this->mFreeTensors) {
|
||||
KP_LOG_DEBUG("Kompute OpBase freeing tensors");
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (tensor && tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute OpBase expected to free "
|
||||
"tensor but has already been freed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
virtual std::vector<std::shared_ptr<kp::Tensor>> tensors() {
|
||||
return this->mTensors;
|
||||
}
|
||||
|
||||
virtual bool isInit() {
|
||||
return this->mIsInit;
|
||||
}
|
||||
|
||||
/**
|
||||
* The init function is responsible for setting up all the resources and
|
||||
* should be called after the Operation has been created.
|
||||
*/
|
||||
virtual void init() = 0;
|
||||
// TODO: Potentially remove physicalDevice in favour of memoryProperties (for tensor)
|
||||
virtual void init(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) {
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error("Kompute OpBase init called with 0 tensors");
|
||||
}
|
||||
|
||||
if (this->mManagesTensors) {
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (tensor->isInit()) {
|
||||
// TODO: Evaluate whether throwing runtime error or just writing error log
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCreate: Tensor has already been initialized");
|
||||
}
|
||||
else {
|
||||
tensor->init(physicalDevice, device);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (this->mManagesAlgorithm) {
|
||||
this->mAlgorithm->init(device, this->mTensors);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void destroy() {
|
||||
if (!this->mIsInit) {
|
||||
KP_LOG_WARN("Kompute OpBase destroy called but not initialised");
|
||||
}
|
||||
|
||||
if (this->mManagesTensors) {
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
KP_LOG_WARN("Kompute OpBase attempted to free managed tensor "
|
||||
"but tensor is not initialised");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompute OpBase freeing tensor");
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
}
|
||||
this->mTensors.clear();
|
||||
}
|
||||
|
||||
if (this->mManagesAlgorithm) {
|
||||
if (this->mAlgorithm && this->mAlgorithm->isInit()) {
|
||||
KP_LOG_DEBUG("Kompute OpBase freeing tensor");
|
||||
this->mAlgorithm->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute OpBase attempted to free managed algorithm"
|
||||
"but algorithm is not initialised");
|
||||
}
|
||||
}
|
||||
|
||||
this->mIsInit = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* The record function is intended to only send a record command or run
|
||||
* commands that are expected to record operations that are to be submitted
|
||||
* as a batch into the GPU.
|
||||
*/
|
||||
virtual void record() = 0;
|
||||
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) = 0;
|
||||
|
||||
/**
|
||||
* Pre eval is called before the Sequence has called eval and submitted the commands to
|
||||
|
|
@ -106,19 +151,14 @@ class OpBase
|
|||
virtual void postEval() = 0;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice>
|
||||
mPhysicalDevice; ///< Vulkan Physical Device
|
||||
std::shared_ptr<vk::Device> mDevice; ///< Vulkan Logical Device
|
||||
std::shared_ptr<vk::CommandBuffer>
|
||||
mCommandBuffer; ///< Vulkan Command Buffer
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>>
|
||||
mTensors; ///< Tensors referenced by operation that can be managed
|
||||
///< optionally by operation
|
||||
bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
|
||||
///< tensors are freed (if they are managed)
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
bool mManagesTensors = false;
|
||||
std::shared_ptr<kp::Algorithm> mAlgorithm;
|
||||
bool mManagesAlgorithm = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
bool mIsInit;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
#include "kompute/operations/OpAlgoCreate.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -19,7 +19,7 @@ namespace kp {
|
|||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor.
|
||||
*/
|
||||
class OpMult : public OpAlgoBase
|
||||
class OpMult : public OpAlgoCreate
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
|
@ -45,7 +45,7 @@ class OpMult : public OpAlgoBase
|
|||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
const Workgroup& komputeWorkgroup = {})
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
|
||||
: OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ namespace kp {
|
|||
class OpTensorCopy : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorCopy();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation.
|
||||
*
|
||||
|
|
@ -24,10 +22,7 @@ class OpTensorCopy : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorCopy(std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
|
|
@ -37,12 +32,13 @@ class OpTensorCopy : public OpBase
|
|||
/**
|
||||
* Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage.
|
||||
*/
|
||||
void init() override;
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier.
|
||||
*/
|
||||
void record() override;
|
||||
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
|
|
|
|||
71
src/include/kompute/operations/OpTensorCreate.hpp
Normal file
71
src/include/kompute/operations/OpTensorCreate.hpp
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Base Operation which provides the high level interface that Kompute
|
||||
* operations implement in order to perform a set of actions in the GPU.
|
||||
*
|
||||
* Operations can perform actions on tensors, and optionally can also own an
|
||||
* Algorithm with respective parameters. kp::Operations with kp::Algorithms
|
||||
* would inherit from kp::OpBaseAlgo.
|
||||
*/
|
||||
class OpTensorCreate : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
*/
|
||||
OpTensorCreate(std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor for OpTensorCreate class. This OpTensorCreate destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
* intended to destroy the resources in the parent class.
|
||||
*/
|
||||
virtual ~OpTensorCreate() override;
|
||||
|
||||
/**
|
||||
* The init function is responsible for setting up all the resources and
|
||||
* should be called after the Operation has been created.
|
||||
*/
|
||||
virtual void init(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* Record runs the core actions to create the tensors. For device tensors
|
||||
* it records a copyCommand to move the data from the staging tensor to the
|
||||
* device tensor. The mapping for staging tensors happens in the init function
|
||||
* not in the record function.
|
||||
*/
|
||||
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
|
||||
/**
|
||||
* Performs a copy back into the main tensor to ensure that the data
|
||||
* contained is the one that is now being stored in the GPU.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
|
@ -14,8 +13,6 @@ namespace kp {
|
|||
class OpTensorSyncDevice : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorSyncDevice();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
|
|
@ -24,10 +21,7 @@ class OpTensorSyncDevice : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorSyncDevice(std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
|
|
@ -37,12 +31,13 @@ class OpTensorSyncDevice : public OpBase
|
|||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init() override;
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
|
||||
*/
|
||||
void record() override;
|
||||
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ namespace kp {
|
|||
class OpTensorSyncLocal : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorSyncLocal();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
|
|
@ -24,10 +22,7 @@ class OpTensorSyncLocal : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncLocal(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorSyncLocal(std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
|
|
@ -37,12 +32,13 @@ class OpTensorSyncLocal : public OpBase
|
|||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init() override;
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
|
||||
*/
|
||||
void record() override;
|
||||
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue