Updated tensor to copy to memory explicitly, assessing why copy doesn't last through operations
This commit is contained in:
parent
6f0203b863
commit
e047aa3b43
10 changed files with 65 additions and 79 deletions
|
|
@ -1,21 +1,17 @@
|
|||
#version 450
|
||||
|
||||
layout(binding = 0) buffer tensorLhs {
|
||||
layout(set = 0, binding = 0) buffer tensorLhs {
|
||||
uint valuesLhs[ ];
|
||||
};
|
||||
|
||||
layout(binding = 1) buffer tensorRhs {
|
||||
layout(set = 0, binding = 1) buffer tensorRhs {
|
||||
uint valuesRhs[ ];
|
||||
};
|
||||
|
||||
layout(binding = 2) buffer tensorOutput {
|
||||
layout(set = 0, binding = 2) buffer tensorOutput {
|
||||
uint valuesOutput[ ];
|
||||
};
|
||||
|
||||
layout(binding = 3) buffer tensorInvalid {
|
||||
uint valuesInvalid[ ];
|
||||
};
|
||||
|
||||
// TODO: Explore how to make layout inside shader dynamic
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
|
@ -25,10 +21,7 @@ void main()
|
|||
|
||||
//valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
||||
// FOR TESTING
|
||||
valuesOutput[index] = 100 + index;
|
||||
valuesRhs[index] = 100 + index;
|
||||
valuesLhs[index] = 100 + index;
|
||||
valuesInvalid[index] = 100 + index;
|
||||
valuesOutput[index] = valuesLhs[index] + valuesRhs[index];
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -41,6 +41,10 @@ Algorithm::init(std::string shaderFilePath,
|
|||
this->createPipeline();
|
||||
}
|
||||
|
||||
void Algorithm::createDescriptorPool() {
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -55,9 +55,12 @@ class Algorithm
|
|||
bool mFreePipeline = false;
|
||||
|
||||
// Create util functions
|
||||
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createShaderModule(std::string shaderFilePath);
|
||||
void createPipeline();
|
||||
// Parameters
|
||||
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createDescriptorPool();
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ OpCreateTensor::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
}
|
||||
|
||||
this->mPrimaryTensor = tensors[0];
|
||||
std::vector<uint32_t> data = this->mPrimaryTensor->data();
|
||||
|
||||
if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mPrimaryTensor->init(
|
||||
|
|
@ -47,11 +46,13 @@ OpCreateTensor::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
this->mPrimaryTensor->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
this->mStagingTensor->init(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, data);
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
|
||||
this->mStagingTensor->mapDataIntoHostMemory();
|
||||
|
||||
} else {
|
||||
this->mPrimaryTensor->init(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, data);
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -69,6 +70,10 @@ void
|
|||
OpCreateTensor::postSubmit()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
|
||||
|
||||
this->mStagingTensor->mapDataFromHostMemory();
|
||||
|
||||
this->mPrimaryTensor->setData(this->mStagingTensor->data());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,8 +96,7 @@ OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
|
|||
|
||||
this->mTensorOutputStaging->init(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mCommandBuffer,
|
||||
this->mTensorOutput->data());
|
||||
this->mCommandBuffer);
|
||||
|
||||
// TODO: Make this path configurable
|
||||
this->mAlgorithm->init("shaders/glsl/opmult.comp.spv", tensors);
|
||||
|
|
@ -110,30 +109,20 @@ OpMult<tX, tY, tZ>::record()
|
|||
SPDLOG_DEBUG("Kompute OpMult record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
//this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
// vk::AccessFlagBits::eHostWrite,
|
||||
// vk::AccessFlagBits::eShaderRead,
|
||||
// vk::PipelineStageFlagBits::eHost,
|
||||
// vk::PipelineStageFlagBits::eComputeShader);
|
||||
//this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
// vk::AccessFlagBits::eHostWrite,
|
||||
// vk::AccessFlagBits::eShaderRead,
|
||||
// vk::PipelineStageFlagBits::eHost,
|
||||
// vk::PipelineStageFlagBits::eComputeShader);
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
|
||||
|
||||
// Barrier to ensure the shader code is executed before buffer read
|
||||
//this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
// vk::AccessFlagBits::eShaderWrite,
|
||||
// vk::AccessFlagBits::eTransferRead,
|
||||
// vk::PipelineStageFlagBits::eComputeShader,
|
||||
// vk::PipelineStageFlagBits::eTransfer);
|
||||
//this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
// vk::AccessFlagBits::eShaderWrite,
|
||||
// vk::AccessFlagBits::eTransferRead,
|
||||
// vk::PipelineStageFlagBits::eComputeShader,
|
||||
// vk::PipelineStageFlagBits::eTransfer);
|
||||
this->mTensorOutput->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
|
|
@ -148,16 +137,6 @@ OpMult<tX, tY, tZ>::record()
|
|||
vk::AccessFlagBits::eHostRead,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eHost);
|
||||
//this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
// vk::AccessFlagBits::eTransferWrite,
|
||||
// vk::AccessFlagBits::eHostRead,
|
||||
// vk::PipelineStageFlagBits::eTransfer,
|
||||
// vk::PipelineStageFlagBits::eHost);
|
||||
//this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
// vk::AccessFlagBits::eTransferWrite,
|
||||
// vk::AccessFlagBits::eHostRead,
|
||||
// vk::PipelineStageFlagBits::eTransfer,
|
||||
// vk::PipelineStageFlagBits::eHost);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
|
|
@ -166,7 +145,7 @@ OpMult<tX, tY, tZ>::postSubmit()
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
|
||||
|
||||
this->mTensorOutputStaging->copyDataFromHostBuffer();
|
||||
this->mTensorOutputStaging->mapDataFromHostMemory();
|
||||
|
||||
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ Sequence::~Sequence()
|
|||
}
|
||||
this->mDevice->freeCommandBuffers(
|
||||
*this->mCommandPool, 1, this->mCommandBuffer.get());
|
||||
SPDLOG_DEBUG("Kompute Manager Freed CommandBuffer");
|
||||
SPDLOG_DEBUG("Kompute Sequence Freed CommandBuffer");
|
||||
}
|
||||
|
||||
if (this->mFreeCommandPool) {
|
||||
|
|
@ -54,7 +54,7 @@ Sequence::~Sequence()
|
|||
return;
|
||||
}
|
||||
this->mDevice->destroy(*this->mCommandPool);
|
||||
SPDLOG_DEBUG("Kompute Manager Destroyed CommandPool");
|
||||
SPDLOG_DEBUG("Kompute Sequence Destroyed CommandPool");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -139,7 +139,7 @@ Sequence::createCommandPool()
|
|||
this->mCommandPool = std::make_shared<vk::CommandPool>();
|
||||
this->mDevice->createCommandPool(
|
||||
&commandPoolInfo, nullptr, this->mCommandPool.get());
|
||||
SPDLOG_DEBUG("Kompute Manager Command Pool Created");
|
||||
SPDLOG_DEBUG("Kompute Sequence Command Pool Created");
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -161,7 +161,7 @@ Sequence::createCommandBuffer()
|
|||
this->mCommandBuffer = std::make_shared<vk::CommandBuffer>();
|
||||
this->mDevice->allocateCommandBuffers(&commandBufferAllocateInfo,
|
||||
this->mCommandBuffer.get());
|
||||
SPDLOG_DEBUG("Kompute Manager Command Buffer Created");
|
||||
SPDLOG_DEBUG("Kompute Sequence Command Buffer Created");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,15 +36,19 @@ class Sequence
|
|||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Template only valid with OpBase derived classes");
|
||||
|
||||
SPDLOG_DEBUG("Kompute Sequence record");
|
||||
SPDLOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
SPDLOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
T* op =
|
||||
new T(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
OpBase* baseOp = dynamic_cast<OpBase*>(op);
|
||||
|
||||
std::unique_ptr<OpBase> baseOpPtr{ baseOp };
|
||||
|
||||
SPDLOG_DEBUG("Kompute Sequence running init on OpBase derived class instance");
|
||||
baseOpPtr->init(std::forward<TArgs>(args)...);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Sequence running record on OpBase derived class instance");
|
||||
baseOpPtr->record();
|
||||
|
||||
mOperations.push_back(std::move(baseOpPtr));
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
|
||||
#if DEBUG
|
||||
#include <spdlog/fmt/bundled/ranges.h>
|
||||
#endif
|
||||
|
||||
#include "Tensor.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
|
@ -11,7 +15,7 @@ Tensor::Tensor()
|
|||
|
||||
Tensor::Tensor(std::vector<uint32_t> data, TensorTypes tensorType)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Tensor constructor data and type");
|
||||
SPDLOG_DEBUG("Kompute Tensor constructor data: {}, and type: {}", data, tensorType);
|
||||
|
||||
this->mData = data;
|
||||
this->mShape = { data.size() };
|
||||
|
|
@ -54,11 +58,10 @@ Tensor::~Tensor()
|
|||
void
|
||||
Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<uint32_t> data)
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
||||
{
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute Tensor running init with Vulkan params and data size: {}", data.size());
|
||||
"Kompute Tensor running init with Vulkan params and num data elementS: {}", this->mData.size());
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
|
|
@ -66,7 +69,7 @@ Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
|
||||
this->mIsInit = true;
|
||||
|
||||
this->createBuffer(data.data());
|
||||
this->createBuffer();
|
||||
}
|
||||
|
||||
std::vector<uint32_t>
|
||||
|
|
@ -131,9 +134,6 @@ Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
|
|||
// TODO: Ensure command buffer is in same device from buffer
|
||||
this->mCommandBuffer->copyBuffer(
|
||||
*copyFromTensor->mBuffer, *this->mBuffer, copyRegion);
|
||||
|
||||
// TODO: Ensure copied data is consistent with device
|
||||
this->mData = copyFromTensor->mData;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -173,13 +173,14 @@ Tensor::constructDescriptorBufferInfo()
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::copyDataFromHostBuffer()
|
||||
Tensor::mapDataFromHostMemory()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Tensor copying data from host buffer");
|
||||
SPDLOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
if (this->mTensorType != TensorTypes::eStaging) {
|
||||
spdlog::warn("Copying tensor data manually to DEVICE buffer instead of "
|
||||
"using record GPU command");
|
||||
spdlog::error("Mapping tensor data manually from DEVICE buffer instead of "
|
||||
"using record GPU command with staging buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
|
@ -192,14 +193,16 @@ Tensor::copyDataFromHostBuffer()
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::copyDataToHostBuffer()
|
||||
Tensor::mapDataIntoHostMemory()
|
||||
{
|
||||
|
||||
SPDLOG_DEBUG("Kompute Tensor copying data to buffer");
|
||||
SPDLOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");
|
||||
|
||||
// TODO: Verify if there are situations where we want to copy to device memory
|
||||
if (this->mTensorType != TensorTypes::eStaging) {
|
||||
spdlog::warn("Copying tensor data manually to DEVICE buffer instead of "
|
||||
"using record GPU command");
|
||||
spdlog::error("Mapping tensor data manually to DEVICE memory instead of "
|
||||
"using record GPU command with staging buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
|
@ -253,7 +256,7 @@ Tensor::getMemoryPropertyFlags()
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::createBuffer(void* data)
|
||||
Tensor::createBuffer()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Tensor creating buffer");
|
||||
|
||||
|
|
@ -331,10 +334,6 @@ Tensor::createBuffer(void* data)
|
|||
this->mDevice->bindBufferMemory(*this->mBuffer, *this->mMemory, 0);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");
|
||||
|
||||
if (data != nullptr) {
|
||||
this->copyDataToHostBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,11 +33,10 @@ class Tensor
|
|||
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<uint32_t> data = std::vector<uint32_t>());
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
|
||||
// Create functions
|
||||
void createBuffer(void* data = nullptr);
|
||||
void createBuffer();
|
||||
|
||||
// Getter functions
|
||||
std::vector<uint32_t> data();
|
||||
|
|
@ -60,8 +59,8 @@ class Tensor
|
|||
|
||||
// Util functions
|
||||
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
|
||||
void copyDataFromHostBuffer();
|
||||
void copyDataToHostBuffer();
|
||||
void mapDataFromHostMemory();
|
||||
void mapDataIntoHostMemory();
|
||||
|
||||
private:
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue