Reformatted
This commit is contained in:
parent
c22247534a
commit
1748694b3a
8 changed files with 82 additions and 113 deletions
|
|
@ -41,9 +41,9 @@ Algorithm::init(const std::vector<char>& shaderFileData,
|
|||
this->createPipeline();
|
||||
}
|
||||
|
||||
void Algorithm::createDescriptorPool() {
|
||||
|
||||
}
|
||||
void
|
||||
Algorithm::createDescriptorPool()
|
||||
{}
|
||||
|
||||
void
|
||||
Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
||||
|
|
@ -52,9 +52,10 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
|||
|
||||
// TODO: Explore design for having multiple descriptor pool sizes
|
||||
std::vector<vk::DescriptorPoolSize> descriptorPoolSizes = {
|
||||
vk::DescriptorPoolSize(vk::DescriptorType::eStorageBuffer,
|
||||
static_cast<uint32_t>(tensorParams.size()) // Descriptor count
|
||||
)
|
||||
vk::DescriptorPoolSize(
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
static_cast<uint32_t>(tensorParams.size()) // Descriptor count
|
||||
)
|
||||
};
|
||||
|
||||
// TODO: Explore design for having more than 1 set configurable
|
||||
|
|
@ -120,11 +121,12 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
|||
nullptr, // Descriptor image info
|
||||
&descriptorBufferInfo));
|
||||
|
||||
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr);
|
||||
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
SPDLOG_DEBUG("Kompute Algorithm updating descriptor sets");
|
||||
//this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr);
|
||||
// this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr);
|
||||
|
||||
SPDLOG_DEBUG("Kompue Algorithm successfully run init");
|
||||
}
|
||||
|
|
@ -135,7 +137,9 @@ Algorithm::createShaderModule(const std::vector<char>& shaderFileData)
|
|||
SPDLOG_DEBUG("Kompute Algorithm createShaderModule started");
|
||||
|
||||
vk::ShaderModuleCreateInfo shaderModuleInfo(
|
||||
vk::ShaderModuleCreateFlags(), shaderFileData.size(), (uint32_t*)shaderFileData.data());
|
||||
vk::ShaderModuleCreateFlags(),
|
||||
shaderFileData.size(),
|
||||
(uint32_t*)shaderFileData.data());
|
||||
|
||||
SPDLOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
|
||||
shaderFileData.size());
|
||||
|
|
|
|||
|
|
@ -66,9 +66,13 @@ Manager::~Manager()
|
|||
}
|
||||
|
||||
Sequence
|
||||
Manager::constructSequence() {
|
||||
Manager::constructSequence()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager creating Sequence object");
|
||||
return Sequence(this->mPhysicalDevice, this->mDevice, this->mComputeQueue, this->mComputeQueueFamilyIndex);
|
||||
return Sequence(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueue,
|
||||
this->mComputeQueueFamilyIndex);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
21
src/Tensor.cpp
Executable file → Normal file
21
src/Tensor.cpp
Executable file → Normal file
|
|
@ -15,7 +15,8 @@ Tensor::Tensor()
|
|||
|
||||
Tensor::Tensor(std::vector<uint32_t> data, TensorTypes tensorType)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Tensor constructor data: {}, and type: {}", data, tensorType);
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Tensor constructor data: {}, and type: {}", data, tensorType);
|
||||
|
||||
this->mData = data;
|
||||
this->mShape = { static_cast<uint32_t>(data.size()) };
|
||||
|
|
@ -60,8 +61,9 @@ Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Tensor running init with Vulkan params and num data elementS: {}", this->mData.size());
|
||||
SPDLOG_DEBUG("Kompute Tensor running init with Vulkan params and num data "
|
||||
"elementS: {}",
|
||||
this->mData.size());
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
|
|
@ -178,8 +180,9 @@ Tensor::mapDataFromHostMemory()
|
|||
SPDLOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
if (this->mTensorType != TensorTypes::eStaging) {
|
||||
spdlog::error("Mapping tensor data manually from DEVICE buffer instead of "
|
||||
"using record GPU command with staging buffer");
|
||||
spdlog::error(
|
||||
"Mapping tensor data manually from DEVICE buffer instead of "
|
||||
"using record GPU command with staging buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -198,10 +201,12 @@ Tensor::mapDataIntoHostMemory()
|
|||
|
||||
SPDLOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");
|
||||
|
||||
// TODO: Verify if there are situations where we want to copy to device memory
|
||||
// TODO: Verify if there are situations where we want to copy to device
|
||||
// memory
|
||||
if (this->mTensorType != TensorTypes::eStaging) {
|
||||
spdlog::error("Mapping tensor data manually to DEVICE memory instead of "
|
||||
"using record GPU command with staging buffer");
|
||||
spdlog::error(
|
||||
"Mapping tensor data manually to DEVICE memory instead of "
|
||||
"using record GPU command with staging buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,6 @@ class Algorithm
|
|||
// Parameters
|
||||
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createDescriptorPool();
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -8,5 +8,3 @@
|
|||
#endif
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -66,8 +66,8 @@ OpMult<tX, tY, tZ>::OpMult()
|
|||
// TODO: Remove physicalDevice from main initialiser
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
: OpBase(physicalDevice, device, commandBuffer)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
|
@ -79,7 +79,6 @@ template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
|||
OpMult<tX, tY, tZ>::~OpMult()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult destructor started");
|
||||
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
|
|
@ -99,20 +98,24 @@ OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
this->mTensorRHS = tensors[1];
|
||||
this->mTensorOutput = tensors[2];
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template parameters or by default it would take the shape and size of the tensors
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (tX > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters provided
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mX = tX;
|
||||
this->mY = tY > 0 ? tY : 1;
|
||||
this->mZ = tZ > 0 ? tZ : 1;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// TODO: Fully support the full size dispatch using size for the shape
|
||||
this->mX = this->mTensorLHS->size();
|
||||
this->mY = 1;
|
||||
this->mZ = 1;
|
||||
}
|
||||
spdlog::info("Kompute OpMult dispatch size X: {}, Y: {}, Z: {}", this->mX, this->mY, this->mZ);
|
||||
spdlog::info("Kompute OpMult dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mX,
|
||||
this->mY,
|
||||
this->mZ);
|
||||
|
||||
// TODO: Explore adding a validate function
|
||||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
|
|
@ -138,16 +141,17 @@ OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
this->mTensorOutputStaging = std::make_shared<Tensor>(
|
||||
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
this->mTensorOutputStaging->init(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mCommandBuffer);
|
||||
this->mTensorOutputStaging->init(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
|
||||
#if RELEASE
|
||||
std::vector<char> shaderFileData(
|
||||
shader_data::shaders_glsl_opmult_comp_spv,
|
||||
shader_data::shaders_glsl_opmult_comp_spv + kp::shader_data::shaders_glsl_opmult_comp_spv_len);
|
||||
shader_data::shaders_glsl_opmult_comp_spv,
|
||||
shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
|
||||
#else
|
||||
SPDLOG_DEBUG("Kompute OpMult Running debug loading shaders directly from spirv file");
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute OpMult Running debug loading shaders directly from spirv file");
|
||||
|
||||
// TODO: Move to utility function
|
||||
std::string shaderFilePath = "shaders/glsl/opmult.comp.spv";
|
||||
|
|
@ -160,7 +164,8 @@ OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
fileStream.read(shaderDataRaw, shaderFileSize);
|
||||
fileStream.close();
|
||||
|
||||
std::vector<char> shaderFileData(shaderDataRaw, shaderDataRaw + shaderFileSize);
|
||||
std::vector<char> shaderFileData(shaderDataRaw,
|
||||
shaderDataRaw + shaderFileSize);
|
||||
#endif
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
|
||||
|
|
@ -176,43 +181,43 @@ OpMult<tX, tY, tZ>::record()
|
|||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
|
||||
|
||||
// Barrier to ensure the shader code is executed before buffer read
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
this->mTensorOutput->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
|
||||
this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput);
|
||||
|
||||
// Buffer to ensure wait until data is copied to staging buffer
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eTransferWrite,
|
||||
vk::AccessFlagBits::eHostRead,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eHost);
|
||||
vk::AccessFlagBits::eTransferWrite,
|
||||
vk::AccessFlagBits::eHostRead,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eHost);
|
||||
this->mTensorOutput->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eTransferWrite,
|
||||
vk::AccessFlagBits::eHostRead,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eHost);
|
||||
vk::AccessFlagBits::eTransferWrite,
|
||||
vk::AccessFlagBits::eHostRead,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eHost);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
|
|
@ -229,5 +234,3 @@ OpMult<tX, tY, tZ>::postSubmit()
|
|||
}
|
||||
|
||||
#endif // #ifndef OPMULT_CPP
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -37,10 +37,12 @@ class Sequence
|
|||
|
||||
std::unique_ptr<OpBase> baseOpPtr{ baseOp };
|
||||
|
||||
SPDLOG_DEBUG("Kompute Sequence running init on OpBase derived class instance");
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Sequence running init on OpBase derived class instance");
|
||||
baseOpPtr->init(tensors);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Sequence running record on OpBase derived class instance");
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Sequence running record on OpBase derived class instance");
|
||||
baseOpPtr->record();
|
||||
|
||||
mOperations.push_back(std::move(baseOpPtr));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue