Added baseline functionality including new memory models
This commit is contained in:
parent
9aae5d69db
commit
635fdb02be
22 changed files with 283 additions and 1919 deletions
|
|
@ -6,8 +6,6 @@
|
|||
#include "kompute/Manager.hpp"
|
||||
#include "kompute/Sequence.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/operations/OpAlgoCreate.hpp"
|
||||
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
|
||||
#include "kompute/operations/OpMult.hpp"
|
||||
#include "kompute/operations/OpTensorCopy.hpp"
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
134
src/Manager.cpp
134
src/Manager.cpp
|
|
@ -132,7 +132,7 @@ Manager::sequence(uint32_t queueIndex)
|
|||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]);
|
||||
|
||||
this->mManagedSequences.insert(sq);
|
||||
this->mManagedSequences.push_back(sq);
|
||||
|
||||
return sq;
|
||||
}
|
||||
|
|
@ -337,10 +337,7 @@ Manager::tensor(
|
|||
std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>(
|
||||
kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType));
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
}
|
||||
this->mManagedTensors.insert(tensor);
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
|
@ -363,134 +360,9 @@ Manager::algorithm(
|
|||
specializationConstants,
|
||||
pushConstants));
|
||||
|
||||
this->mManagedAlgorithms.insert(algorithm);
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
void
|
||||
Manager::rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
bool syncDataToGPU)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild triggered");
|
||||
for (std::shared_ptr<Tensor> tensor : tensors) {
|
||||
|
||||
// False syncData to run all tensors at once instead one by one
|
||||
this->rebuild(tensor, false);
|
||||
}
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>(tensors);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::rebuild(std::shared_ptr<kp::Tensor> tensor,
|
||||
bool syncDataToGPU)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered");
|
||||
|
||||
if (tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
std::set<std::shared_ptr<Tensor>>::iterator it =
|
||||
this->mManagedTensors.find(tensor);
|
||||
if (it == this->mManagedTensors.end()) {
|
||||
this->mManagedTensors.insert(tensor);
|
||||
}
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(std::shared_ptr<kp::Tensor> tensor)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered");
|
||||
|
||||
if (tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
// TODO: Confirm not limiting destroying tensors owned by this manager allowed
|
||||
std::set<std::shared_ptr<Tensor>>::iterator it =
|
||||
this->mManagedTensors.find(tensor);
|
||||
|
||||
if (it != this->mManagedTensors.end()) {
|
||||
this->mManagedTensors.erase(tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered");
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : tensors) {
|
||||
this->destroy(tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered");
|
||||
|
||||
for (std::shared_ptr<kp::Sequence> sequence : sequences) {
|
||||
this->destroy(sequence);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(std::shared_ptr<kp::Sequence> sequence)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered");
|
||||
|
||||
// Inefficient but required to delete by value
|
||||
// Depending on the amount of named sequences created may be worth creating
|
||||
// a set to ensure efficient delete.
|
||||
for (std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator it = this->mManagedSequences.begin(); it != this->mManagedSequences.end(); it++) {
|
||||
if (it->second == sequence) {
|
||||
this->mManagedSequences.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (sequence->isInit()) {
|
||||
sequence->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(const std::string& sequenceName)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered");
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
|
||||
found = this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found != this->mManagedSequences.end()) {
|
||||
// We don't call destroy(sequence) as erasing sequence by name more efficient
|
||||
if (found->second->isInit()) {
|
||||
found->second->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
this->mManagedSequences.erase(sequenceName);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(const std::vector<std::string>& sequenceNames)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered");
|
||||
|
||||
for (const std::string& sequenceName : sequenceNames) {
|
||||
this->destroy(sequenceName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,51 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpAlgoCreate.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoCreate::OpAlgoCreate(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
: OpBase(tensors, algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate constructor");
|
||||
|
||||
this->mManagesAlgorithm = true;
|
||||
this->mManagesTensors = false;
|
||||
}
|
||||
|
||||
OpAlgoCreate::~OpAlgoCreate()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoCreate::init(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) {
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate init started");
|
||||
|
||||
// Explicitly calling top level function to create algo
|
||||
OpBase::init(physicalDevice, device);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoCreate::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate record called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoCreate::preEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoCreate::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoCreate postSubmit called");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -4,14 +4,11 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoDispatch::OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
: OpBase(tensors, algorithm)
|
||||
OpAlgoDispatch::OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::shared_ptr<kp::Algorithm>& algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
this->mManagesAlgorithm = false;
|
||||
this->mManagesTensors = false;
|
||||
}
|
||||
|
||||
OpAlgoDispatch::~OpAlgoDispatch()
|
||||
|
|
@ -19,13 +16,6 @@ OpAlgoDispatch::~OpAlgoDispatch()
|
|||
KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch init called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,118 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoLhsRhsOut::OpAlgoLhsRhsOut()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base");
|
||||
}
|
||||
|
||||
OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
// The inheritance is initialised with the copyOutputData to false given that
|
||||
// this depencendant class handles the transfer of data via staging buffers in
|
||||
// a granular way.
|
||||
: OpAlgoCreate(tensors, algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
|
||||
}
|
||||
|
||||
OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoLhsRhsOut::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
|
||||
|
||||
if (this->mTensors.size() < 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut called with less than 1 tensor");
|
||||
} else if (this->mTensors.size() > 3) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors");
|
||||
}
|
||||
|
||||
this->mTensorLHS = this->mTensors[0];
|
||||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
this->mTensorOutput->isInit())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. "
|
||||
"LHS: " +
|
||||
std::to_string(this->mTensorLHS->isInit()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->isInit()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->isInit()));
|
||||
}
|
||||
|
||||
if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
|
||||
this->mTensorRHS->size() == this->mTensorOutput->size())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size "
|
||||
"LHS: " +
|
||||
std::to_string(this->mTensorLHS->size()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->size()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->size()));
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
|
||||
|
||||
std::vector<uint32_t> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoLhsRhsOut::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup[0],
|
||||
this->mKomputeWorkgroup[1],
|
||||
this->mKomputeWorkgroup[2]);
|
||||
|
||||
// Barrier to ensure the shader code is executed before buffer read
|
||||
this->mTensorOutput->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
|
||||
if (this->mTensorOutput->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensorOutput->recordCopyFromDeviceToStaging(this->mCommandBuffer,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoLhsRhsOut::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
|
||||
|
||||
this->mTensorOutput->mapDataFromHostMemory();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -3,42 +3,21 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
OpTensorCopy::OpTensorCopy(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(tensors, nullptr)
|
||||
OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params");
|
||||
|
||||
this->mManagesTensors = false;
|
||||
this->mManagesAlgorithm = false;
|
||||
}
|
||||
|
||||
OpTensorCopy::~OpTensorCopy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy init called");
|
||||
|
||||
if (this->mTensors.size() < 2) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCopy called with less than 2 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCopy tensor parameter has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
throw std::runtime_error("Kompute OpTensorCopy tensor parameter is "
|
||||
"of TensorTypes::eStorage and hence "
|
||||
"cannot be used to receive or pass data.");
|
||||
}
|
||||
}
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
OpTensorCopy::~OpTensorCopy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -1,46 +0,0 @@
|
|||
|
||||
#include "kompute/operations/OpTensorCreate.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorCreate::OpTensorCreate(
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpBase(tensors, nullptr)
|
||||
{
|
||||
KP_LOG_DEBUG("Compute OpTensorCreate constructor with params");
|
||||
this->mManagesTensors = true;
|
||||
}
|
||||
|
||||
OpTensorCreate::~OpTensorCreate()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate init called");
|
||||
|
||||
OpBase::init(physicalDevice, device);
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate record called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::preEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCreate postEval called");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -4,10 +4,16 @@
|
|||
namespace kp {
|
||||
|
||||
OpTensorSyncDevice::OpTensorSyncDevice(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(tensors, nullptr)
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
|
||||
|
||||
if (tensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncDevice called with less than 1 tensor");
|
||||
}
|
||||
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
OpTensorSyncDevice::~OpTensorSyncDevice()
|
||||
|
|
@ -15,31 +21,6 @@ OpTensorSyncDevice::~OpTensorSyncDevice()
|
|||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncDevice called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error("Kompute OpTensorSyncDevice: Tensor param "
|
||||
"has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute OpTensorSyncLocal tensor parameter is of type "
|
||||
"TensorTypes::eStorage and hence cannot be used to receive or "
|
||||
"pass data.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -6,13 +6,16 @@
|
|||
namespace kp {
|
||||
|
||||
OpTensorSyncLocal::OpTensorSyncLocal(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(tensors, nullptr)
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
|
||||
|
||||
this->mManagesTensors = false;
|
||||
this->mManagesAlgorithm = false;
|
||||
if (tensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal called with less than 1 tensor");
|
||||
}
|
||||
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
OpTensorSyncLocal::~OpTensorSyncLocal()
|
||||
|
|
@ -20,25 +23,6 @@ OpTensorSyncLocal::~OpTensorSyncLocal()
|
|||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal: Tensor has not been initialized");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -26,96 +26,60 @@ Sequence::~Sequence()
|
|||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
bool
|
||||
void
|
||||
Sequence::begin()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute sequence called BEGIN");
|
||||
|
||||
if (this->isRecording()) {
|
||||
KP_LOG_WARN("Kompute Sequence begin called when already recording");
|
||||
return false;
|
||||
KP_LOG_DEBUG("Kompute Sequence begin called when already recording");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->isRunning()) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Sequence begin called when sequence still running");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!this->mCommandPool) {
|
||||
throw std::runtime_error("Kompute Sequence command pool is null");
|
||||
}
|
||||
|
||||
if (this->mOperations.size()) {
|
||||
KP_LOG_INFO("Kompute Sequence clearing previous operations");
|
||||
this->mOperations.clear();
|
||||
throw std::runtime_error("Kompute Sequence begin called when sequence still running");
|
||||
}
|
||||
|
||||
if (!this->mRecording) {
|
||||
KP_LOG_INFO("Kompute Sequence command recording BEGIN");
|
||||
this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
|
||||
this->mRecording = true;
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute Sequence attempted to start command recording "
|
||||
"but recording already started");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
void
|
||||
Sequence::end()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence calling END");
|
||||
|
||||
if (!this->isRecording()) {
|
||||
KP_LOG_WARN("Kompute Sequence end called when not recording");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!this->mCommandPool) {
|
||||
throw std::runtime_error("Kompute Sequence command pool is null");
|
||||
}
|
||||
|
||||
if (this->mRecording) {
|
||||
return;
|
||||
}
|
||||
else {
|
||||
KP_LOG_INFO("Kompute Sequence command recording END");
|
||||
this->mCommandBuffer->end();
|
||||
this->mRecording = false;
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute Sequence attempted to end command recording but "
|
||||
"recording not started");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::eval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute sequence EVAL BEGIN");
|
||||
|
||||
bool evalResult = this->evalAsync();
|
||||
if (!evalResult) {
|
||||
KP_LOG_DEBUG("Kompute sequence EVAL FAILURE");
|
||||
return false;
|
||||
}
|
||||
|
||||
evalResult = this->evalAwait();
|
||||
|
||||
KP_LOG_DEBUG("Kompute sequence EVAL SUCCESS");
|
||||
|
||||
return evalResult;
|
||||
return this->evalAsync()->evalAwait();
|
||||
}
|
||||
|
||||
bool
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAsync()
|
||||
{
|
||||
if (this->isRecording()) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAsync called when still recording");
|
||||
return false;
|
||||
this->end();
|
||||
}
|
||||
if (this->mIsRunning) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAsync called when an eval async was "
|
||||
throw std::runtime_error("Kompute Sequence evalAsync called when an eval async was "
|
||||
"called without successful wait");
|
||||
return false;
|
||||
}
|
||||
|
||||
this->mIsRunning = true;
|
||||
|
|
@ -134,15 +98,15 @@ Sequence::evalAsync()
|
|||
|
||||
this->mComputeQueue->submit(1, &submitInfo, this->mFence);
|
||||
|
||||
return true;
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
bool
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAwait(uint64_t waitFor)
|
||||
{
|
||||
if (!this->mIsRunning) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait called without existing eval");
|
||||
return false;
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
vk::Result result =
|
||||
|
|
@ -153,15 +117,15 @@ Sequence::evalAwait(uint64_t waitFor)
|
|||
this->mIsRunning = false;
|
||||
|
||||
if (result == vk::Result::eTimeout) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait timed out");
|
||||
return false;
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}", waitFor);
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < this->mOperations.size(); i++) {
|
||||
this->mOperations[i]->postEval();
|
||||
}
|
||||
|
||||
return true;
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -221,6 +185,22 @@ Sequence::freeMemoryDestroyGPUResources()
|
|||
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::record(std::shared_ptr<OpBase> op)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
this->begin();
|
||||
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Sequence running record on OpBase derived class instance");
|
||||
op->record(this->mCommandBuffer);
|
||||
|
||||
this->mOperations.push_back(op);
|
||||
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::createCommandPool()
|
||||
{
|
||||
|
|
|
|||
|
|
@ -69,147 +69,6 @@ class Manager
|
|||
*/
|
||||
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0);
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against named sequence.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param sequenceName The name of the sequence to be retrieved or created
|
||||
* @param TArgs Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::string sequenceName,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp triggered");
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
this->sequence(sequenceName);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence RECORD");
|
||||
sq->record<T>(tensors, std::forward<TArgs>(params)...);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence END");
|
||||
sq->end();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence EVAL");
|
||||
sq->eval();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS");
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against a newly created sequence.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param TArgs Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOpDefault(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp Default triggered");
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->evalOp<T>(
|
||||
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against named sequence asynchronously.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param sequenceName The name of the sequence to be retrieved or created
|
||||
* @param params Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOpAsync(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::string sequenceName,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync triggered");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
this->sequence(sequenceName);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD");
|
||||
sq->record<T>(tensors, std::forward<TArgs>(params)...);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence END");
|
||||
sq->end();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL");
|
||||
sq->evalAsync();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS");
|
||||
}
|
||||
|
||||
/**
|
||||
* Operation that evaluates operation against default sequence
|
||||
* asynchronously.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param params Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOpAsyncDefault(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->evalOpAsync<T>(
|
||||
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
|
||||
}
|
||||
|
||||
/**
|
||||
* Operation that awaits for named sequence to finish.
|
||||
*
|
||||
* @param sequenceName The name of the sequence to wait for termination
|
||||
* @param waitFor The amount of time to wait before timing out
|
||||
*/
|
||||
void evalOpAwait(std::string sequenceName, uint64_t waitFor = UINT64_MAX)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAwait triggered with sequence {}",
|
||||
sequenceName);
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
|
||||
found = this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found != this->mManagedSequences.end()) {
|
||||
if (std::shared_ptr<kp::Sequence> sq = found->second) {
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAwait running sequence "
|
||||
"Sequence EVAL AWAIT");
|
||||
if (sq->isRunning()) {
|
||||
sq->evalAwait(waitFor);
|
||||
}
|
||||
}
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Manager evalOpAwait running sequence SUCCESS");
|
||||
} else {
|
||||
KP_LOG_ERROR("Kompute Manager evalOpAwait Sequence not found");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Operation that awaits for default sequence to finish.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param params Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
|
||||
this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor creation and
|
||||
* initialization. It will take the constructor parameters for a Tensor
|
||||
|
|
@ -233,80 +92,6 @@ class Manager
|
|||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
* create a new Tensor. The tensor memory will then be managed and owned by
|
||||
* the manager.
|
||||
*
|
||||
* @param tensors Array of tensors to rebuild
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
*/
|
||||
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
bool syncDataToGPU = true);
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
* create a new Tensor. The tensor memory will then be managed and owned by
|
||||
* the manager.
|
||||
*
|
||||
* @param tensors Single tensor to rebuild
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
*/
|
||||
void rebuild(std::shared_ptr<kp::Tensor> tensor,
|
||||
bool syncDataToGPU = true);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* single tensor.
|
||||
*
|
||||
* @param tensors Single tensor to rebuild
|
||||
*/
|
||||
void destroy(std::shared_ptr<kp::Tensor> tensor);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* vector of tensors.
|
||||
*
|
||||
* @param tensors Single tensor to rebuild
|
||||
*/
|
||||
void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* vector of sequences. Destroying by sequence name is more efficent
|
||||
* and hence recommended instead of by object.
|
||||
*
|
||||
* @param sequences Vector for shared ptrs with sequences to destroy
|
||||
*/
|
||||
void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* single sequence. Destroying by sequence name is more efficent
|
||||
* and hence recommended instead of by object.
|
||||
*
|
||||
* @param sequences Single sequence to rebuild
|
||||
*/
|
||||
void destroy(std::shared_ptr<kp::Sequence> sequence);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* sequence by name.
|
||||
*
|
||||
* @param sequenceName Single name of named sequence to destroy
|
||||
*/
|
||||
void destroy(const std::string& sequenceName);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* sequences using vector of named sequence names.
|
||||
*
|
||||
* @param sequenceName Vector of sequence names to destroy
|
||||
*/
|
||||
void destroy(const std::vector<std::string>& sequenceNames);
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
|
|
@ -317,10 +102,9 @@ class Manager
|
|||
bool mFreeDevice = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::set<std::weak_ptr<Tensor>> mManagedTensors;
|
||||
std::set<std::weak_ptr<Sequence>> mManagedSequences;
|
||||
std::set<std::weak_ptr<Algorithm>> mManagedAlgorithms;
|
||||
//std::unique_ptr<Sequence> mDefaultSequence;
|
||||
std::vector<std::weak_ptr<Tensor>> mManagedTensors;
|
||||
std::vector<std::weak_ptr<Sequence>> mManagedSequences;
|
||||
std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
|
||||
|
||||
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ namespace kp {
|
|||
/**
|
||||
* Container of operations that can be sent to GPU as batch
|
||||
*/
|
||||
class Sequence
|
||||
class Sequence: public std::enable_shared_from_this<Sequence>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
|
@ -31,13 +31,30 @@ class Sequence
|
|||
*/
|
||||
~Sequence();
|
||||
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
* template requires classes to be derived from the OpBase class. This
|
||||
* function also requires the Sequence to be recording, otherwise it will
|
||||
* not be able to add the operation.
|
||||
*
|
||||
* @param tensors Vector of tensors to use for the operation
|
||||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Clear function clears all operations currently recorded and starts recording again.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* Begins recording commands for commands to be submitted into the command
|
||||
* buffer.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool begin();
|
||||
void begin();
|
||||
|
||||
/**
|
||||
* Ends the recording and stops recording commands when the record command
|
||||
|
|
@ -45,7 +62,7 @@ class Sequence
|
|||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool end();
|
||||
void end();
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
|
|
@ -53,7 +70,7 @@ class Sequence
|
|||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool eval();
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
|
|
@ -62,7 +79,7 @@ class Sequence
|
|||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool evalAsync();
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
|
|
@ -71,7 +88,7 @@ class Sequence
|
|||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently in recording activated.
|
||||
|
|
@ -94,55 +111,6 @@ class Sequence
|
|||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
* template requires classes to be derived from the OpBase class. This
|
||||
* function also requires the Sequence to be recording, otherwise it will
|
||||
* not be able to add the operation.
|
||||
*
|
||||
* @param tensors Vector of tensors to use for the operation
|
||||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
bool record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
if (!this->isRecording()) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute sequence record attempted when not record BEGIN");
|
||||
return false;
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
T* op = new T(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mCommandBuffer,
|
||||
tensors,
|
||||
std::forward<TArgs>(params)...);
|
||||
|
||||
OpBase* baseOp = dynamic_cast<OpBase*>(op);
|
||||
|
||||
std::unique_ptr<OpBase> baseOpPtr{ baseOp };
|
||||
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Sequence running init on OpBase derived class instance");
|
||||
baseOpPtr->init();
|
||||
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Sequence running record on OpBase derived class instance");
|
||||
baseOpPtr->record();
|
||||
|
||||
mOperations.push_back(std::move(baseOpPtr));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
|
|
@ -158,7 +126,7 @@ class Sequence
|
|||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
vk::Fence mFence;
|
||||
std::vector<std::unique_ptr<OpBase>> mOperations;
|
||||
std::vector<std::shared_ptr<OpBase>> mOperations;
|
||||
|
||||
// State
|
||||
bool mRecording = false;
|
||||
|
|
|
|||
|
|
@ -1,77 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* By default it enables the user to provide a dynamic number of tensors
|
||||
* which are then passed as inputs.
|
||||
*/
|
||||
class OpAlgoCreate : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoCreate(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<kp::Algorithm> algorithm);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpAlgoCreate() override;
|
||||
|
||||
|
||||
virtual void init(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
|
@ -17,51 +17,8 @@ class OpAlgoDispatch : public OpBase
|
|||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<kp::Algorithm> algorithm);
|
||||
|
||||
/**
|
||||
* Constructor that enables a file to be passed to the operation with
|
||||
* the contents of the shader. This can be either in raw format or in
|
||||
* compiled SPIR-V binary format.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
std::string shaderFilePath);
|
||||
|
||||
/**
|
||||
* Constructor that enables raw shader data to be passed to the main operation
|
||||
* which can be either in raw shader glsl code or in compiled SPIR-V binary.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const std::vector<uint32_t>& shaderDataRaw);
|
||||
OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::shared_ptr<kp::Algorithm>& algorithm);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -69,15 +26,6 @@ class OpAlgoDispatch : public OpBase
|
|||
*/
|
||||
virtual ~OpAlgoDispatch() override;
|
||||
|
||||
/**
|
||||
* The init function is responsible for the initialisation of the algorithm
|
||||
* component based on the parameters specified, and allows for extensibility
|
||||
* on the options provided. Further dependent classes can perform more
|
||||
* specific checks such as ensuring tensors provided are initialised, etc.
|
||||
*/
|
||||
virtual void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
|
|
@ -88,7 +36,6 @@ class OpAlgoDispatch : public OpBase
|
|||
*/
|
||||
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
|
|
@ -101,6 +48,10 @@ class OpAlgoDispatch : public OpBase
|
|||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -1,78 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoCreate.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation base class to simplify the creation of operations that require
|
||||
* right hand and left hand side datapoints together with a single output.
|
||||
* The expected data passed is two input tensors and one output tensor.
|
||||
*/
|
||||
class OpAlgoLhsRhsOut : public OpAlgoCreate
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoLhsRhsOut(std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<Algorithm> algorithm);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpAlgoLhsRhsOut() override;
|
||||
|
||||
/**
|
||||
* The init function is responsible for ensuring that all of the tensors
|
||||
* provided are aligned with requirements such as LHS, RHS and Output
|
||||
* tensors, and creates the algorithm component which processes the
|
||||
* computation.
|
||||
*/
|
||||
virtual void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
|
@ -19,25 +19,6 @@ class OpBase
|
|||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
*/
|
||||
OpBase(std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Compute OpBase constructor with params");
|
||||
this->mTensors = tensors;
|
||||
this->mAlgorithm = algorithm;
|
||||
this->mIsInit = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default destructor for OpBase class. This OpBase destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
|
|
@ -46,81 +27,6 @@ class OpBase
|
|||
virtual ~OpBase()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBase destructor started");
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
virtual std::shared_ptr<kp::Algorithm> algorithm() {
|
||||
return this->mAlgorithm;
|
||||
}
|
||||
|
||||
virtual std::vector<std::shared_ptr<kp::Tensor>> tensors() {
|
||||
return this->mTensors;
|
||||
}
|
||||
|
||||
virtual bool isInit() {
|
||||
return this->mIsInit;
|
||||
}
|
||||
|
||||
/**
|
||||
* The init function is responsible for setting up all the resources and
|
||||
* should be called after the Operation has been created.
|
||||
*/
|
||||
// TODO: Potentially remove physicalDevice in favour of memoryProperties (for tensor)
|
||||
virtual void init(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) {
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error("Kompute OpBase init called with 0 tensors");
|
||||
}
|
||||
|
||||
if (this->mManagesTensors) {
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (tensor->isInit()) {
|
||||
// TODO: Evaluate whether throwing runtime error or just writing error log
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCreate: Tensor has already been initialized");
|
||||
}
|
||||
else {
|
||||
tensor->init(physicalDevice, device);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (this->mManagesAlgorithm) {
|
||||
this->mAlgorithm->init(device, this->mTensors);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void destroy() {
|
||||
if (!this->mIsInit) {
|
||||
KP_LOG_WARN("Kompute OpBase destroy called but not initialised");
|
||||
}
|
||||
|
||||
if (this->mManagesTensors) {
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
KP_LOG_WARN("Kompute OpBase attempted to free managed tensor "
|
||||
"but tensor is not initialised");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompute OpBase freeing tensor");
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
}
|
||||
this->mTensors.clear();
|
||||
}
|
||||
|
||||
if (this->mManagesAlgorithm) {
|
||||
if (this->mAlgorithm && this->mAlgorithm->isInit()) {
|
||||
KP_LOG_DEBUG("Kompute OpBase freeing tensor");
|
||||
this->mAlgorithm->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute OpBase attempted to free managed algorithm"
|
||||
"but algorithm is not initialised");
|
||||
}
|
||||
}
|
||||
|
||||
this->mIsInit = false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -149,16 +55,6 @@ class OpBase
|
|||
* provided by the user.
|
||||
*/
|
||||
virtual void postEval() = 0;
|
||||
|
||||
protected:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
bool mManagesTensors = false;
|
||||
std::shared_ptr<kp::Algorithm> mAlgorithm;
|
||||
bool mManagesAlgorithm = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
bool mIsInit;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -4,14 +4,12 @@
|
|||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#if RELEASE
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
#endif
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoCreate.hpp"
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -19,15 +17,9 @@ namespace kp {
|
|||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor.
|
||||
*/
|
||||
class OpMult : public OpAlgoCreate
|
||||
class OpMult : public OpAlgoDispatch
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpMult() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
|
|
@ -40,46 +32,30 @@ class OpMult : public OpAlgoCreate
|
|||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
const Workgroup& komputeWorkgroup = {})
|
||||
: OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
|
||||
OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
|
||||
: OpAlgoDispatch(tensors, algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
#ifndef RELEASE
|
||||
this->mShaderFilePath = "shaders/glsl/opmult.comp.spv";
|
||||
#endif
|
||||
}
|
||||
if (tensors.size() != 3) {
|
||||
throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size());
|
||||
}
|
||||
|
||||
#if RELEASE
|
||||
/**
|
||||
* If RELEASE=1 it will be using the static version of the shader which is
|
||||
* loaded using this file directly. Otherwise it should not override the function.
|
||||
*/
|
||||
std::vector<uint32_t> fetchSpirvBinaryData() override
|
||||
{
|
||||
KP_LOG_WARN(
|
||||
"Kompute OpMult Running shaders directly from header");
|
||||
|
||||
return std::vector<uint32_t>(
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)shader_data::shaders_glsl_opmult_comp_spv,
|
||||
(uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
|
||||
|
||||
algorithm->rebuild(tensors, spirv, Workgroup({tensors[0]->size()}));
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpMult() override {
|
||||
virtual ~OpMult() override {
|
||||
KP_LOG_DEBUG("Kompute OpMult destructor started");
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -22,19 +22,13 @@ class OpTensorCopy : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorCopy(std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorCopy() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage.
|
||||
*/
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier.
|
||||
*/
|
||||
|
|
@ -51,6 +45,8 @@ class OpTensorCopy : public OpBase
|
|||
virtual void postEval() override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -1,71 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Base Operation which provides the high level interface that Kompute
|
||||
* operations implement in order to perform a set of actions in the GPU.
|
||||
*
|
||||
* Operations can perform actions on tensors, and optionally can also own an
|
||||
* Algorithm with respective parameters. kp::Operations with kp::Algorithms
|
||||
* would inherit from kp::OpBaseAlgo.
|
||||
*/
|
||||
class OpTensorCreate : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
*/
|
||||
OpTensorCreate(std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor for OpTensorCreate class. This OpTensorCreate destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
* intended to destroy the resources in the parent class.
|
||||
*/
|
||||
virtual ~OpTensorCreate() override;
|
||||
|
||||
/**
|
||||
* The init function is responsible for setting up all the resources and
|
||||
* should be called after the Operation has been created.
|
||||
*/
|
||||
virtual void init(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* Record runs the core actions to create the tensors. For device tensors
|
||||
* it records a copyCommand to move the data from the staging tensor to the
|
||||
* device tensor. The mapping for staging tensors happens in the init function
|
||||
* not in the record function.
|
||||
*/
|
||||
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
|
||||
/**
|
||||
* Performs a copy back into the main tensor to ensure that the data
|
||||
* contained is the one that is now being stored in the GPU.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -21,19 +21,13 @@ class OpTensorSyncDevice : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncDevice(std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
|
||||
*/
|
||||
|
|
@ -50,6 +44,8 @@ class OpTensorSyncDevice : public OpBase
|
|||
virtual void postEval() override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -22,19 +22,13 @@ class OpTensorSyncLocal : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncLocal(std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncLocal() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device) override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
|
||||
*/
|
||||
|
|
@ -52,6 +46,8 @@ class OpTensorSyncLocal : public OpBase
|
|||
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue