Updated tensor to copy to memory explicitly, assessing why copy doesn't last through operations

This commit is contained in:
Alejandro Saucedo 2020-08-22 18:08:56 +01:00
parent 6f0203b863
commit e047aa3b43
10 changed files with 65 additions and 79 deletions

View file

@ -1,21 +1,17 @@
#version 450
layout(binding = 0) buffer tensorLhs {
layout(set = 0, binding = 0) buffer tensorLhs {
uint valuesLhs[ ];
};
layout(binding = 1) buffer tensorRhs {
layout(set = 0, binding = 1) buffer tensorRhs {
uint valuesRhs[ ];
};
layout(binding = 2) buffer tensorOutput {
layout(set = 0, binding = 2) buffer tensorOutput {
uint valuesOutput[ ];
};
layout(binding = 3) buffer tensorInvalid {
uint valuesInvalid[ ];
};
// TODO: Explore how to make layout inside shader dynamic
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
@ -25,10 +21,7 @@ void main()
//valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
// FOR TESTING
valuesOutput[index] = 100 + index;
valuesRhs[index] = 100 + index;
valuesLhs[index] = 100 + index;
valuesInvalid[index] = 100 + index;
valuesOutput[index] = valuesLhs[index] + valuesRhs[index];
}

Binary file not shown.

View file

@ -41,6 +41,10 @@ Algorithm::init(std::string shaderFilePath,
this->createPipeline();
}
void Algorithm::createDescriptorPool() {
}
void
Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
{

View file

@ -55,9 +55,12 @@ class Algorithm
bool mFreePipeline = false;
// Create util functions
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
void createShaderModule(std::string shaderFilePath);
void createPipeline();
// Parameters
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
void createDescriptorPool();
};
} // End namespace kp

View file

@ -37,7 +37,6 @@ OpCreateTensor::init(std::vector<std::shared_ptr<Tensor>> tensors)
}
this->mPrimaryTensor = tensors[0];
std::vector<uint32_t> data = this->mPrimaryTensor->data();
if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) {
this->mPrimaryTensor->init(
@ -47,11 +46,13 @@ OpCreateTensor::init(std::vector<std::shared_ptr<Tensor>> tensors)
this->mPrimaryTensor->data(), Tensor::TensorTypes::eStaging);
this->mStagingTensor->init(
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, data);
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
this->mStagingTensor->mapDataIntoHostMemory();
} else {
this->mPrimaryTensor->init(
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, data);
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
}
}
@ -69,6 +70,10 @@ void
OpCreateTensor::postSubmit()
{
SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
this->mStagingTensor->mapDataFromHostMemory();
this->mPrimaryTensor->setData(this->mStagingTensor->data());
}
}

View file

@ -96,8 +96,7 @@ OpMult<tX, tY, tZ>::init(std::vector<std::shared_ptr<Tensor>> tensors)
this->mTensorOutputStaging->init(this->mPhysicalDevice,
this->mDevice,
this->mCommandBuffer,
this->mTensorOutput->data());
this->mCommandBuffer);
// TODO: Make this path configurable
this->mAlgorithm->init("shaders/glsl/opmult.comp.spv", tensors);
@ -110,30 +109,20 @@ OpMult<tX, tY, tZ>::record()
SPDLOG_DEBUG("Kompute OpMult record called");
// Barrier to ensure the data is finished writing to buffer memory
//this->mTensorLHS->recordBufferMemoryBarrier(
// vk::AccessFlagBits::eHostWrite,
// vk::AccessFlagBits::eShaderRead,
// vk::PipelineStageFlagBits::eHost,
// vk::PipelineStageFlagBits::eComputeShader);
//this->mTensorRHS->recordBufferMemoryBarrier(
// vk::AccessFlagBits::eHostWrite,
// vk::AccessFlagBits::eShaderRead,
// vk::PipelineStageFlagBits::eHost,
// vk::PipelineStageFlagBits::eComputeShader);
this->mTensorLHS->recordBufferMemoryBarrier(
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
this->mTensorRHS->recordBufferMemoryBarrier(
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
// Barrier to ensure the shader code is executed before buffer read
//this->mTensorLHS->recordBufferMemoryBarrier(
// vk::AccessFlagBits::eShaderWrite,
// vk::AccessFlagBits::eTransferRead,
// vk::PipelineStageFlagBits::eComputeShader,
// vk::PipelineStageFlagBits::eTransfer);
//this->mTensorRHS->recordBufferMemoryBarrier(
// vk::AccessFlagBits::eShaderWrite,
// vk::AccessFlagBits::eTransferRead,
// vk::PipelineStageFlagBits::eComputeShader,
// vk::PipelineStageFlagBits::eTransfer);
this->mTensorOutput->recordBufferMemoryBarrier(
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead,
@ -148,16 +137,6 @@ OpMult<tX, tY, tZ>::record()
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
//this->mTensorLHS->recordBufferMemoryBarrier(
// vk::AccessFlagBits::eTransferWrite,
// vk::AccessFlagBits::eHostRead,
// vk::PipelineStageFlagBits::eTransfer,
// vk::PipelineStageFlagBits::eHost);
//this->mTensorRHS->recordBufferMemoryBarrier(
// vk::AccessFlagBits::eTransferWrite,
// vk::AccessFlagBits::eHostRead,
// vk::PipelineStageFlagBits::eTransfer,
// vk::PipelineStageFlagBits::eHost);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
@ -166,7 +145,7 @@ OpMult<tX, tY, tZ>::postSubmit()
{
SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
this->mTensorOutputStaging->copyDataFromHostBuffer();
this->mTensorOutputStaging->mapDataFromHostMemory();
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
}

View file

@ -43,7 +43,7 @@ Sequence::~Sequence()
}
this->mDevice->freeCommandBuffers(
*this->mCommandPool, 1, this->mCommandBuffer.get());
SPDLOG_DEBUG("Kompute Manager Freed CommandBuffer");
SPDLOG_DEBUG("Kompute Sequence Freed CommandBuffer");
}
if (this->mFreeCommandPool) {
@ -54,7 +54,7 @@ Sequence::~Sequence()
return;
}
this->mDevice->destroy(*this->mCommandPool);
SPDLOG_DEBUG("Kompute Manager Destroyed CommandPool");
SPDLOG_DEBUG("Kompute Sequence Destroyed CommandPool");
}
}
@ -139,7 +139,7 @@ Sequence::createCommandPool()
this->mCommandPool = std::make_shared<vk::CommandPool>();
this->mDevice->createCommandPool(
&commandPoolInfo, nullptr, this->mCommandPool.get());
SPDLOG_DEBUG("Kompute Manager Command Pool Created");
SPDLOG_DEBUG("Kompute Sequence Command Pool Created");
}
void
@ -161,7 +161,7 @@ Sequence::createCommandBuffer()
this->mCommandBuffer = std::make_shared<vk::CommandBuffer>();
this->mDevice->allocateCommandBuffers(&commandBufferAllocateInfo,
this->mCommandBuffer.get());
SPDLOG_DEBUG("Kompute Manager Command Buffer Created");
SPDLOG_DEBUG("Kompute Sequence Command Buffer Created");
}
}

View file

@ -36,15 +36,19 @@ class Sequence
static_assert(std::is_base_of<OpBase, T>::value,
"Template only valid with OpBase derived classes");
SPDLOG_DEBUG("Kompute Sequence record");
SPDLOG_DEBUG("Kompute Sequence record function started");
SPDLOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
T* op =
new T(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
OpBase* baseOp = dynamic_cast<OpBase*>(op);
std::unique_ptr<OpBase> baseOpPtr{ baseOp };
SPDLOG_DEBUG("Kompute Sequence running init on OpBase derived class instance");
baseOpPtr->init(std::forward<TArgs>(args)...);
SPDLOG_DEBUG("Kompute Sequence running record on OpBase derived class instance");
baseOpPtr->record();
mOperations.push_back(std::move(baseOpPtr));

View file

@ -1,4 +1,8 @@
#if DEBUG
#include <spdlog/fmt/bundled/ranges.h>
#endif
#include "Tensor.hpp"
namespace kp {
@ -11,7 +15,7 @@ Tensor::Tensor()
Tensor::Tensor(std::vector<uint32_t> data, TensorTypes tensorType)
{
SPDLOG_DEBUG("Kompute Tensor constructor data and type");
SPDLOG_DEBUG("Kompute Tensor constructor data: {}, and type: {}", data, tensorType);
this->mData = data;
this->mShape = { data.size() };
@ -54,11 +58,10 @@ Tensor::~Tensor()
void
Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<uint32_t> data)
std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
SPDLOG_DEBUG(
"Kompute Tensor running init with Vulkan params and data size: {}", data.size());
"Kompute Tensor running init with Vulkan params and num data elementS: {}", this->mData.size());
this->mPhysicalDevice = physicalDevice;
this->mDevice = device;
@ -66,7 +69,7 @@ Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
this->mIsInit = true;
this->createBuffer(data.data());
this->createBuffer();
}
std::vector<uint32_t>
@ -131,9 +134,6 @@ Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
// TODO: Ensure command buffer is in same device from buffer
this->mCommandBuffer->copyBuffer(
*copyFromTensor->mBuffer, *this->mBuffer, copyRegion);
// TODO: Ensure copied data is consistent with device
this->mData = copyFromTensor->mData;
}
void
@ -173,13 +173,14 @@ Tensor::constructDescriptorBufferInfo()
}
void
Tensor::copyDataFromHostBuffer()
Tensor::mapDataFromHostMemory()
{
SPDLOG_DEBUG("Kompute Tensor copying data from host buffer");
SPDLOG_DEBUG("Kompute Tensor mapping data from host buffer");
if (this->mTensorType != TensorTypes::eStaging) {
spdlog::warn("Copying tensor data manually to DEVICE buffer instead of "
"using record GPU command");
spdlog::error("Mapping tensor data manually from DEVICE buffer instead of "
"using record GPU command with staging buffer");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
@ -192,14 +193,16 @@ Tensor::copyDataFromHostBuffer()
}
void
Tensor::copyDataToHostBuffer()
Tensor::mapDataIntoHostMemory()
{
SPDLOG_DEBUG("Kompute Tensor copying data to buffer");
SPDLOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");
// TODO: Verify if there are situations where we want to copy to device memory
if (this->mTensorType != TensorTypes::eStaging) {
spdlog::warn("Copying tensor data manually to DEVICE buffer instead of "
"using record GPU command");
spdlog::error("Mapping tensor data manually to DEVICE memory instead of "
"using record GPU command with staging buffer");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
@ -253,7 +256,7 @@ Tensor::getMemoryPropertyFlags()
}
void
Tensor::createBuffer(void* data)
Tensor::createBuffer()
{
SPDLOG_DEBUG("Kompute Tensor creating buffer");
@ -331,10 +334,6 @@ Tensor::createBuffer(void* data)
this->mDevice->bindBufferMemory(*this->mBuffer, *this->mMemory, 0);
SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");
if (data != nullptr) {
this->copyDataToHostBuffer();
}
}
}

View file

@ -33,11 +33,10 @@ class Tensor
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<uint32_t> data = std::vector<uint32_t>());
std::shared_ptr<vk::CommandBuffer> commandBuffer);
// Create functions
void createBuffer(void* data = nullptr);
void createBuffer();
// Getter functions
std::vector<uint32_t> data();
@ -60,8 +59,8 @@ class Tensor
// Util functions
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
void copyDataFromHostBuffer();
void copyDataToHostBuffer();
void mapDataFromHostMemory();
void mapDataIntoHostMemory();
private:
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;