Updated memory barriers to include staging buffers

This commit is contained in:
Alejandro Saucedo 2021-03-09 08:06:27 +00:00
parent 1d1018fa0c
commit 263f392cbb
7 changed files with 230 additions and 175 deletions

View file

@ -26,7 +26,7 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
// Barrier to ensure the data is finished writing to buffer memory
for (const std::shared_ptr<Tensor>& tensor :
this->mAlgorithm->getTensors()) {
tensor->recordBufferMemoryBarrier(
tensor->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,

View file

@ -45,7 +45,7 @@ OpTensorCopy::record(const vk::CommandBuffer& commandBuffer)
// We iterate from the second tensor onwards and record a copy to all
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->recordCopyFrom(
commandBuffer, this->mTensors[0], false);
commandBuffer, this->mTensors[0]);
}
}

View file

@ -30,8 +30,7 @@ OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer)
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer,
false);
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer);
}
}
}

View file

@ -30,8 +30,14 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer,
true);
this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer);
this->mTensors[i]->recordStagingBufferMemoryBarrier(commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
}
}
}

View file

@ -70,8 +70,7 @@ Tensor::isInit()
void
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<Tensor> copyFromTensor,
bool createBarrier)
std::shared_ptr<Tensor> copyFromTensor)
{
vk::DeviceSize bufferSize(this->memorySize());
@ -83,13 +82,11 @@ Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
copyFromTensor->mPrimaryBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
copyRegion);
}
void
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
bool createBarrier)
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
@ -100,13 +97,11 @@ Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
this->mStagingBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
copyRegion);
}
void
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
bool createBarrier)
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
@ -117,8 +112,7 @@ Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
this->mPrimaryBuffer,
this->mStagingBuffer,
bufferSize,
copyRegion,
createBarrier);
copyRegion);
}
void
@ -126,24 +120,49 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier)
vk::BufferCopy copyRegion)
{
commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion);
}
if (createBarrier) {
// Buffer to ensure wait until data is copied to staging buffer
this->recordBufferMemoryBarrier(commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
}
void
Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier");
this->recordBufferMemoryBarrier(commandBuffer,
*this->mPrimaryBuffer,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask);
}
void
Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier");
this->recordBufferMemoryBarrier(commandBuffer,
*this->mStagingBuffer,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask);
}
void
Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
const vk::Buffer& buffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
@ -154,7 +173,7 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::DeviceSize bufferSize = this->memorySize();
vk::BufferMemoryBarrier bufferMemoryBarrier;
bufferMemoryBarrier.buffer = *this->mPrimaryBuffer;
bufferMemoryBarrier.buffer = buffer;
bufferMemoryBarrier.size = bufferSize;
bufferMemoryBarrier.srcAccessMask = srcAccessMask;
bufferMemoryBarrier.dstAccessMask = dstAccessMask;

View file

@ -97,12 +97,9 @@ class Tensor
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param copyFromTensor Tensor to copy the data from
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFrom(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<Tensor> copyFromTensor,
bool createBarrier);
std::shared_ptr<Tensor> copyFromTensor);
/**
* Records a copy from the internal staging memory to the device memory
@ -110,11 +107,8 @@ class Tensor
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
bool createBarrier);
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer);
/**
* Records a copy from the internal device memory to the staging memory
@ -122,14 +116,11 @@ class Tensor
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
bool createBarrier);
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer);
/**
* Records the buffer memory barrier into the command buffer which
* Records the buffer memory barrier into the primary buffer and command buffer which
* ensures that relevant data transfers are carried out correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
@ -138,11 +129,27 @@ class Tensor
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
void recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Records the buffer memory barrier into the staging buffer and command buffer which
* ensures that relevant data transfers are carried out correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Constructs a vulkan descriptor buffer info which can be used to specify
@ -284,8 +291,13 @@ class Tensor
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier);
vk::BufferCopy copyRegion);
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
const vk::Buffer& buffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
// Private util functions
vk::BufferUsageFlags getPrimaryBufferUsageFlags();