Added functionality for named sequences to be created
This commit is contained in:
parent
c8db55aa1b
commit
b91c392f5e
15 changed files with 299 additions and 173 deletions
51
Makefile
51
Makefile
|
|
@ -1,28 +1,26 @@
|
|||
|
||||
####### SRC Build Params #######
|
||||
|
||||
CC="/c/Program Files (x86)/Microsoft Visual Studio/2019/Community/MSBuild/Current/Bin/MSBuild.exe"
|
||||
|
||||
|
||||
####### Shader Build Params #######
|
||||
|
||||
ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10...
|
||||
SCMP=C:\VulkanSDK\1.2.141.2\Bin32\glslangValidator.exe
|
||||
CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe"
|
||||
SCMP_BIN="C:\\VulkanSDK\\1.2.141.2\\Bin32\\glslangValidator.exe"
|
||||
MSBUILD_BIN ?= "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\MSBuild\\Current\\Bin\\MSBuild.exe"
|
||||
VCPKG_CMAKE ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake"
|
||||
else
|
||||
SCMP=/c/VulkanSDK/1.2.141.2/Bin32/glslangValidator.exe
|
||||
CLANG_FORMAT_BIN ?= "/home/alejandro/Programming/lib/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/bin/clang-format"
|
||||
CMAKE_BIN ?= "/c/Program Files/CMake/bin/cmake.exe"
|
||||
SCMP_BIN ?= "/c/VulkanSDK/1.2.141.2/Bin32/glslangValidator.exe"
|
||||
MSBUILD_BIN ?= "/c/Program Files (x86)/Microsoft Visual Studio/2019/Community/MSBuild/Current/Bin/MSBuild.exe"
|
||||
VCPKG_CMAKE ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake"
|
||||
endif
|
||||
|
||||
####### Package manager #######
|
||||
|
||||
VCPKG=/c/Users/axsau/Programming/lib/vcpkg/vcpkg
|
||||
|
||||
####### Main Target Rules #######
|
||||
|
||||
run_cmake:
|
||||
cmake \
|
||||
$(CMAKE_BIN) \
|
||||
-Bbuild \
|
||||
-DCMAKE_TOOLCHAIN_FILE=C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake \
|
||||
-DCMAKE_TOOLCHAIN_FILE=$(VCPKG_CMAKE) \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
|
||||
-G "Visual Studio 16 2019"
|
||||
|
||||
push_docs_to_ghpages:
|
||||
|
|
@ -31,10 +29,25 @@ push_docs_to_ghpages:
|
|||
GIT_DEPLOY_REPO="origin" \
|
||||
./scripts/push_folder_to_branch.sh
|
||||
|
||||
build_vs:
|
||||
$(CC) build/kompute.sln
|
||||
####### Visual studio build shortcut commands #######
|
||||
|
||||
run_tests:
|
||||
build_all:
|
||||
$(MSBUILD_BIN) build/kompute.sln
|
||||
|
||||
build_docs:
|
||||
$(MSBUILD_BIN) build/docs/gendoxygen.vcxproj
|
||||
$(MSBUILD_BIN) build/docs/gensphinx.vcxproj
|
||||
|
||||
build_kompute:
|
||||
$(MSBUILD_BIN) build/src/kompute.vcxproj
|
||||
|
||||
build_tests:
|
||||
$(MSBUILD_BIN) build/test/test_kompute.vcxproj
|
||||
|
||||
run_docs: build_docs
|
||||
(cd build/docs/sphinx && python2.7 -m SimpleHTTPServer)
|
||||
|
||||
run_tests: build_tests
|
||||
./build/test/Debug/test_kompute.exe
|
||||
|
||||
clean_cmake:
|
||||
|
|
@ -46,7 +59,7 @@ install_python_reqs:
|
|||
build_shaders:
|
||||
python scripts/convert_shaders.py \
|
||||
--shader-path shaders/glsl \
|
||||
--shader-binary $(SCMP) \
|
||||
--shader-binary $(SCMP_BIN) \
|
||||
--header-path src/include/kompute/shaders/ \
|
||||
-v
|
||||
|
||||
|
|
@ -85,7 +98,7 @@ build_single_header:
|
|||
"single_include/kompute/Kompute.hpp"
|
||||
|
||||
format:
|
||||
clang-format -i -style="{BasedOnStyle: mozilla, IndentWidth: 4}" src/*.cpp src/include/kompute/*.hpp
|
||||
$(CLANG_FORMAT_BIN) -i -style="{BasedOnStyle: mozilla, IndentWidth: 4}" src/*.cpp src/include/kompute/*.hpp
|
||||
|
||||
clean:
|
||||
find src -name "*gch" -exec rm {} \; || "No ghc files"
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@
|
|||
</tr>
|
||||
</table>
|
||||
|
||||
🔋 [Documentation]() 💻 [Import to your project]() ⌨ [Tutorials]() 💾
|
||||
|
||||
|
||||
## Principles & Features
|
||||
|
||||
* Single header easy to import static library
|
||||
|
|
|
|||
|
|
@ -150,6 +150,8 @@ static unsigned const int shaders_glsl_opmult_comp_spv_len = 1308;
|
|||
}
|
||||
#endif // define SHADEROP_SHADEROPMULT_HPP
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#define KP_MAX_DIM_SIZE 1
|
||||
|
||||
namespace kp {
|
||||
|
|
@ -234,42 +236,81 @@ class Tensor
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
Base Operation
|
||||
*/
|
||||
* Base Operation which provides the high level interface that Kompute
|
||||
* operations implement in order to perform a set of actions in the GPU.
|
||||
*
|
||||
* Operations can perform actions on tensors, and optionally can also own an
|
||||
* Algorithm with respective parameters. kp::Operations with kp::Algorithms
|
||||
* would inherit from kp::OpBaseAlgo.
|
||||
*/
|
||||
class OpBase
|
||||
{
|
||||
private:
|
||||
public:
|
||||
/**
|
||||
Constructor
|
||||
*/
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpBase() { SPDLOG_DEBUG("Compute OpBase base constructor"); }
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*/
|
||||
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Compute OpBase constructor with params");
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mCommandBuffer = commandBuffer;
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
~OpBase() {
|
||||
SPDLOG_DEBUG("Compute OpBase destructor started");
|
||||
}
|
||||
|
||||
virtual void init(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
/**
|
||||
* Default destructor for OpBase class. This OpBase destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
* intended to destroy the resources in the parent class. This can be done
|
||||
* by passing the mFreeTensors=false.
|
||||
*/
|
||||
~OpBase()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpBase init called");
|
||||
SPDLOG_DEBUG("Kompute OpBase destructor started");
|
||||
|
||||
if (!this->mDevice) {
|
||||
spdlog::warn("Kompute OpBase destructor called with empty device");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreeTensors) {
|
||||
SPDLOG_DEBUG("Kompute OpBase freeing tensors");
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (tensor && tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
spdlog::error("Kompute OpBase expected to free "
|
||||
"tensor but has already been freed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual void record() { SPDLOG_DEBUG("Kompute OpBase record called"); }
|
||||
virtual void init() = 0;
|
||||
|
||||
virtual void postSubmit() { SPDLOG_DEBUG("Kompute OpBase init called"); }
|
||||
virtual void record() = 0;
|
||||
|
||||
virtual void postSubmit() = 0;
|
||||
|
||||
protected:
|
||||
// Sometimes owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
bool mFreeTensors =
|
||||
false; // TODO: Provide granularity to specify which to free
|
||||
|
||||
// Always external resources
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
|
|
@ -295,6 +336,9 @@ class Sequence
|
|||
uint32_t queueIndex);
|
||||
~Sequence();
|
||||
|
||||
// Initialiser
|
||||
void init();
|
||||
|
||||
// Record command functions
|
||||
void begin();
|
||||
void end();
|
||||
|
|
@ -310,15 +354,15 @@ class Sequence
|
|||
SPDLOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
SPDLOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
T* op =
|
||||
new T(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
T* op = new T(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, tensors);
|
||||
OpBase* baseOp = dynamic_cast<OpBase*>(op);
|
||||
|
||||
std::unique_ptr<OpBase> baseOpPtr{ baseOp };
|
||||
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Sequence running init on OpBase derived class instance");
|
||||
baseOpPtr->init(tensors);
|
||||
baseOpPtr->init();
|
||||
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Sequence running record on OpBase derived class instance");
|
||||
|
|
@ -350,6 +394,8 @@ class Sequence
|
|||
|
||||
} // End namespace kp
|
||||
|
||||
#define KP_DEFAULT_SESSION "DEFAULT"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
|
|
@ -370,28 +416,34 @@ class Manager
|
|||
|
||||
~Manager();
|
||||
|
||||
std::weak_ptr<Sequence> managedSequence();
|
||||
std::weak_ptr<Sequence> getOrCreateManagedSequence(std::string sessionName);
|
||||
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors, std::string sessionName = KP_DEFAULT_SESSION)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp triggered");
|
||||
Sequence sq(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueue,
|
||||
this->mComputeQueueFamilyIndex);
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq.begin();
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD");
|
||||
sq.record<T>(tensors);
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence END");
|
||||
sq.end();
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL");
|
||||
sq.eval();
|
||||
std::weak_ptr<Sequence> sqWeakPtr =
|
||||
this->getOrCreateManagedSequence(sessionName);
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock())
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD");
|
||||
sq->record<T>(tensors);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence END");
|
||||
sq->end();
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL");
|
||||
sq->eval();
|
||||
}
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS");
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
|
|
@ -402,7 +454,7 @@ class Manager
|
|||
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
|
||||
|
||||
// Always owned resources
|
||||
std::vector<std::shared_ptr<Sequence>> mManagedSequences;
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>> mManagedSequences;
|
||||
|
||||
#if DEBUG
|
||||
vk::DebugReportCallbackEXT mDebugReportCallback;
|
||||
|
|
@ -439,15 +491,16 @@ class Algorithm
|
|||
void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1);
|
||||
|
||||
private:
|
||||
// Shared resources
|
||||
// Never Owned Resources
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
|
||||
// Resources owned by default
|
||||
// Optionally owned resources
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
bool mFreeDescriptorSetLayout = false;
|
||||
std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
|
||||
bool mFreeDescriptorPool = false;
|
||||
|
||||
// TODO: Explore design for multiple descriptor sets
|
||||
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
|
||||
bool mFreeDescriptorSet = false;
|
||||
|
|
@ -463,6 +516,7 @@ class Algorithm
|
|||
// Create util functions
|
||||
void createShaderModule(const std::vector<char>& shaderFileData);
|
||||
void createPipeline();
|
||||
|
||||
// Parameters
|
||||
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createDescriptorPool();
|
||||
|
|
@ -486,11 +540,13 @@ class OpMult : public OpBase
|
|||
|
||||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = false);
|
||||
|
||||
~OpMult();
|
||||
|
||||
void init(std::vector<std::shared_ptr<Tensor>> tensors) override;
|
||||
void init() override;
|
||||
|
||||
void record() override;
|
||||
|
||||
|
|
@ -532,8 +588,10 @@ OpMult<tX, tY, tZ>::OpMult()
|
|||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
: OpBase(physicalDevice, device, commandBuffer)
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
|
|
@ -548,20 +606,20 @@ OpMult<tX, tY, tZ>::~OpMult()
|
|||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
OpMult<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult init called");
|
||||
|
||||
if (tensors.size() < 3) {
|
||||
if (this->mTensors.size() < 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult called with less than 1 tensor");
|
||||
} else if (tensors.size() > 3) {
|
||||
spdlog::warn("Kompute OpMult called with more than 3 tensors");
|
||||
} else if (this->mTensors.size() > 3) {
|
||||
spdlog::warn("Kompute OpMult called with more than 3 this->mTensors");
|
||||
}
|
||||
|
||||
this->mTensorLHS = tensors[0];
|
||||
this->mTensorRHS = tensors[1];
|
||||
this->mTensorOutput = tensors[2];
|
||||
this->mTensorLHS = this->mTensors[0];
|
||||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
|
|
@ -635,7 +693,7 @@ OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
|
||||
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, tensors);
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
|
|
@ -709,21 +767,22 @@ class OpCreateTensor : public OpBase
|
|||
|
||||
OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = true);
|
||||
|
||||
~OpCreateTensor();
|
||||
|
||||
void init(std::vector<std::shared_ptr<Tensor>> tensors) override;
|
||||
void init() override;
|
||||
|
||||
void record() override;
|
||||
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::shared_ptr<Tensor> mPrimaryTensor;
|
||||
bool mFreePrimaryTensorResources = false;
|
||||
std::shared_ptr<Tensor> mStagingTensor;
|
||||
bool mFreeStagingTensorResources = false;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -71,16 +71,25 @@ Manager::~Manager()
|
|||
}
|
||||
|
||||
std::weak_ptr<Sequence>
|
||||
Manager::managedSequence()
|
||||
Manager::getOrCreateManagedSequence(std::string sessionName)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager creating Sequence object");
|
||||
std::shared_ptr<Sequence> sq = std::make_shared<Sequence>(
|
||||
this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueue,
|
||||
this->mComputeQueueFamilyIndex);
|
||||
this->mManagedSequences.push_back(sq);
|
||||
return sq;
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
|
||||
found = this->mManagedSequences.find(sessionName);
|
||||
|
||||
if (found == this->mManagedSequences.end()) {
|
||||
std::shared_ptr<Sequence> sq =
|
||||
std::make_shared<Sequence>(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueue,
|
||||
this->mComputeQueueFamilyIndex);
|
||||
sq->init();
|
||||
this->mManagedSequences.insert({sessionName, sq});
|
||||
return sq;
|
||||
}
|
||||
else {
|
||||
return found->second;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -13,8 +13,10 @@ OpCreateTensor::OpCreateTensor()
|
|||
OpCreateTensor::OpCreateTensor(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
: OpBase(physicalDevice, device, commandBuffer)
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpCreateTensor constructor with params");
|
||||
}
|
||||
|
|
@ -22,47 +24,21 @@ OpCreateTensor::OpCreateTensor(
|
|||
OpCreateTensor::~OpCreateTensor()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpCreateTensor destructor started");
|
||||
|
||||
if(!this->mDevice) {
|
||||
spdlog::warn("Kompute OpCreateTensor destructor called with empty device");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this->mFreePrimaryTensorResources) {
|
||||
SPDLOG_DEBUG("Kompute OpCreateTensor removing primary tensor");
|
||||
if (this->mPrimaryTensor && this->mPrimaryTensor->isInit()) {
|
||||
this->mPrimaryTensor->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
spdlog::error("Kompute OpCreateTensor expected to free primary tensor but has already been freed.");
|
||||
}
|
||||
}
|
||||
|
||||
if (!this->mFreeStagingTensorResources) {
|
||||
SPDLOG_DEBUG("Kompute OpCreateTensor removing primary tensor");
|
||||
if (this->mStagingTensor && this->mStagingTensor->isInit()) {
|
||||
this->mStagingTensor->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
spdlog::error("Kompute OpCreateTensor expected to free secondary tensor but has already been freed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpCreateTensor::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
OpCreateTensor::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpCreateTensor init called");
|
||||
|
||||
if (tensors.size() < 1) {
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpCreateTensor called with less than 1 tensor");
|
||||
} else if (tensors.size() > 1) {
|
||||
} else if (this->mTensors.size() > 1) {
|
||||
spdlog::warn("Kompute OpCreateTensor called with more than 1 tensor");
|
||||
}
|
||||
|
||||
this->mFreePrimaryTensorResources = true;
|
||||
this->mFreeStagingTensorResources = true;
|
||||
|
||||
this->mPrimaryTensor = tensors[0];
|
||||
this->mPrimaryTensor = this->mTensors[0];
|
||||
|
||||
if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mPrimaryTensor->init(
|
||||
|
|
@ -76,6 +52,9 @@ OpCreateTensor::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
|
||||
this->mStagingTensor->mapDataIntoHostMemory();
|
||||
|
||||
// Adding to the OpBase owned resource so they are freed
|
||||
this->mTensors.push_back(this->mStagingTensor);
|
||||
|
||||
} else {
|
||||
this->mPrimaryTensor->init(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
|
|
|
|||
|
|
@ -19,9 +19,6 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
this->mDevice = device;
|
||||
this->mComputeQueue = computeQueue;
|
||||
this->mQueueIndex = queueIndex;
|
||||
|
||||
this->createCommandPool();
|
||||
this->createCommandBuffer();
|
||||
}
|
||||
|
||||
Sequence::~Sequence()
|
||||
|
|
@ -58,10 +55,17 @@ Sequence::~Sequence()
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::init()
|
||||
{
|
||||
this->createCommandPool();
|
||||
this->createCommandBuffer();
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::begin()
|
||||
{
|
||||
if (this->mCommandPool == nullptr) {
|
||||
if (!this->mCommandPool) {
|
||||
throw std::runtime_error("Kompute Sequence command pool is null");
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +82,7 @@ Sequence::begin()
|
|||
void
|
||||
Sequence::end()
|
||||
{
|
||||
if (this->mCommandPool == nullptr) {
|
||||
if (!this->mCommandPool) {
|
||||
throw std::runtime_error("Kompute Sequence command pool is null");
|
||||
}
|
||||
|
||||
|
|
@ -125,7 +129,7 @@ Sequence::createCommandPool()
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute Sequence creating command pool");
|
||||
|
||||
if (this->mDevice == nullptr) {
|
||||
if (!this->mDevice) {
|
||||
throw std::runtime_error("Kompute Sequence device is null");
|
||||
}
|
||||
if (this->mQueueIndex < 0) {
|
||||
|
|
@ -146,10 +150,10 @@ void
|
|||
Sequence::createCommandBuffer()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Sequence creating command buffer");
|
||||
if (this->mDevice == nullptr) {
|
||||
if (!this->mDevice) {
|
||||
throw std::runtime_error("Kompute Sequence device is null");
|
||||
}
|
||||
if (this->mCommandPool == nullptr) {
|
||||
if (!this->mCommandPool) {
|
||||
throw std::runtime_error("Kompute Sequence command pool is null");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -323,7 +323,9 @@ Tensor::createBuffer()
|
|||
SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");
|
||||
}
|
||||
|
||||
void Tensor::freeMemoryDestroyGPUResources() {
|
||||
void
|
||||
Tensor::freeMemoryDestroyGPUResources()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources");
|
||||
|
||||
this->mIsInit = false;
|
||||
|
|
@ -355,7 +357,6 @@ void Tensor::freeMemoryDestroyGPUResources() {
|
|||
}
|
||||
|
||||
SPDLOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,15 +25,16 @@ class Algorithm
|
|||
void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1);
|
||||
|
||||
private:
|
||||
// Shared resources
|
||||
// Never Owned Resources
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
|
||||
// Resources owned by default
|
||||
// Optionally owned resources
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
bool mFreeDescriptorSetLayout = false;
|
||||
std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
|
||||
bool mFreeDescriptorPool = false;
|
||||
|
||||
// TODO: Explore design for multiple descriptor sets
|
||||
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
|
||||
bool mFreeDescriptorSet = false;
|
||||
|
|
@ -49,6 +50,7 @@ class Algorithm
|
|||
// Create util functions
|
||||
void createShaderModule(const std::vector<char>& shaderFileData);
|
||||
void createPipeline();
|
||||
|
||||
// Parameters
|
||||
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createDescriptorPool();
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Sequence.hpp"
|
||||
|
||||
#define KP_DEFAULT_SESSION "DEFAULT"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
|
|
@ -24,28 +28,34 @@ class Manager
|
|||
|
||||
~Manager();
|
||||
|
||||
std::weak_ptr<Sequence> managedSequence();
|
||||
std::weak_ptr<Sequence> getOrCreateManagedSequence(std::string sessionName);
|
||||
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors, std::string sessionName = KP_DEFAULT_SESSION)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp triggered");
|
||||
Sequence sq(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueue,
|
||||
this->mComputeQueueFamilyIndex);
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq.begin();
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD");
|
||||
sq.record<T>(tensors);
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence END");
|
||||
sq.end();
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL");
|
||||
sq.eval();
|
||||
std::weak_ptr<Sequence> sqWeakPtr =
|
||||
this->getOrCreateManagedSequence(sessionName);
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock())
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD");
|
||||
sq->record<T>(tensors);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence END");
|
||||
sq->end();
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL");
|
||||
sq->eval();
|
||||
}
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS");
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
|
|
@ -56,7 +66,7 @@ class Manager
|
|||
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
|
||||
|
||||
// Always owned resources
|
||||
std::vector<std::shared_ptr<Sequence>> mManagedSequences;
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>> mManagedSequences;
|
||||
|
||||
#if DEBUG
|
||||
vk::DebugReportCallbackEXT mDebugReportCallback;
|
||||
|
|
|
|||
|
|
@ -7,42 +7,81 @@
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
Base Operation
|
||||
*/
|
||||
* Base Operation which provides the high level interface that Kompute
|
||||
* operations implement in order to perform a set of actions in the GPU.
|
||||
*
|
||||
* Operations can perform actions on tensors, and optionally can also own an
|
||||
* Algorithm with respective parameters. kp::Operations with kp::Algorithms
|
||||
* would inherit from kp::OpBaseAlgo.
|
||||
*/
|
||||
class OpBase
|
||||
{
|
||||
private:
|
||||
public:
|
||||
/**
|
||||
Constructor
|
||||
*/
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpBase() { SPDLOG_DEBUG("Compute OpBase base constructor"); }
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*/
|
||||
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Compute OpBase constructor with params");
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mCommandBuffer = commandBuffer;
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
~OpBase() {
|
||||
SPDLOG_DEBUG("Compute OpBase destructor started");
|
||||
}
|
||||
|
||||
virtual void init(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
/**
|
||||
* Default destructor for OpBase class. This OpBase destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
* intended to destroy the resources in the parent class. This can be done
|
||||
* by passing the mFreeTensors=false.
|
||||
*/
|
||||
~OpBase()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpBase init called");
|
||||
SPDLOG_DEBUG("Kompute OpBase destructor started");
|
||||
|
||||
if (!this->mDevice) {
|
||||
spdlog::warn("Kompute OpBase destructor called with empty device");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreeTensors) {
|
||||
SPDLOG_DEBUG("Kompute OpBase freeing tensors");
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (tensor && tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
spdlog::error("Kompute OpBase expected to free "
|
||||
"tensor but has already been freed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual void record() { SPDLOG_DEBUG("Kompute OpBase record called"); }
|
||||
virtual void init() = 0;
|
||||
|
||||
virtual void postSubmit() { SPDLOG_DEBUG("Kompute OpBase init called"); }
|
||||
virtual void record() = 0;
|
||||
|
||||
virtual void postSubmit() = 0;
|
||||
|
||||
protected:
|
||||
// Sometimes owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
bool mFreeTensors =
|
||||
false; // TODO: Provide granularity to specify which to free
|
||||
|
||||
// Always external resources
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
|
|
|
|||
|
|
@ -15,21 +15,22 @@ class OpCreateTensor : public OpBase
|
|||
|
||||
OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = true);
|
||||
|
||||
~OpCreateTensor();
|
||||
|
||||
void init(std::vector<std::shared_ptr<Tensor>> tensors) override;
|
||||
void init() override;
|
||||
|
||||
void record() override;
|
||||
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::shared_ptr<Tensor> mPrimaryTensor;
|
||||
bool mFreePrimaryTensorResources = false;
|
||||
std::shared_ptr<Tensor> mStagingTensor;
|
||||
bool mFreeStagingTensorResources = false;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -27,11 +27,13 @@ class OpMult : public OpBase
|
|||
|
||||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = false);
|
||||
|
||||
~OpMult();
|
||||
|
||||
void init(std::vector<std::shared_ptr<Tensor>> tensors) override;
|
||||
void init() override;
|
||||
|
||||
void record() override;
|
||||
|
||||
|
|
@ -73,8 +75,10 @@ OpMult<tX, tY, tZ>::OpMult()
|
|||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
: OpBase(physicalDevice, device, commandBuffer)
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
|
|
@ -89,20 +93,20 @@ OpMult<tX, tY, tZ>::~OpMult()
|
|||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
OpMult<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult init called");
|
||||
|
||||
if (tensors.size() < 3) {
|
||||
if (this->mTensors.size() < 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult called with less than 1 tensor");
|
||||
} else if (tensors.size() > 3) {
|
||||
spdlog::warn("Kompute OpMult called with more than 3 tensors");
|
||||
} else if (this->mTensors.size() > 3) {
|
||||
spdlog::warn("Kompute OpMult called with more than 3 this->mTensors");
|
||||
}
|
||||
|
||||
this->mTensorLHS = tensors[0];
|
||||
this->mTensorRHS = tensors[1];
|
||||
this->mTensorOutput = tensors[2];
|
||||
this->mTensorLHS = this->mTensors[0];
|
||||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
|
|
@ -176,7 +180,7 @@ OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
|
||||
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, tensors);
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ class Sequence
|
|||
uint32_t queueIndex);
|
||||
~Sequence();
|
||||
|
||||
// Initialiser
|
||||
void init();
|
||||
|
||||
// Record command functions
|
||||
void begin();
|
||||
void end();
|
||||
|
|
@ -37,15 +40,15 @@ class Sequence
|
|||
SPDLOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
SPDLOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
T* op =
|
||||
new T(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
T* op = new T(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, tensors);
|
||||
OpBase* baseOp = dynamic_cast<OpBase*>(op);
|
||||
|
||||
std::unique_ptr<OpBase> baseOpPtr{ baseOp };
|
||||
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Sequence running init on OpBase derived class instance");
|
||||
baseOpPtr->init(tensors);
|
||||
baseOpPtr->init();
|
||||
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Sequence running record on OpBase derived class instance");
|
||||
|
|
|
|||
|
|
@ -57,7 +57,6 @@ class Tensor
|
|||
void mapDataFromHostMemory();
|
||||
void mapDataIntoHostMemory();
|
||||
|
||||
|
||||
private:
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
|
|
|
|||
|
|
@ -74,8 +74,8 @@ TEST_CASE("End to end OpMult Flow should execute correctly from sequence") {
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::weak_ptr<kp::Sequence> sq_ref = mgr.managedSequence();
|
||||
if (std::shared_ptr<kp::Sequence> sq = sq_ref.lock()) {
|
||||
std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.getOrCreateManagedSequence("newSequence");
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpCreateTensor>({ tensorLHS });
|
||||
|
|
@ -93,7 +93,7 @@ TEST_CASE("End to end OpMult Flow should execute correctly from sequence") {
|
|||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
sq_ref.reset();
|
||||
sqWeakPtr.reset();
|
||||
|
||||
spdlog::info("OpMult call success");
|
||||
spdlog::info("Tensor output: {}", tensorOutput->data());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue