Merge pull request #182 from EthicalML/181_memory_barriers

Updated memory barriers to include staging buffers
This commit is contained in:
Alejandro Saucedo 2021-03-13 10:03:14 +00:00 committed by GitHub
commit 00f02cb9ad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 345 additions and 72 deletions

View file

@ -72,8 +72,7 @@ Tensor::isInit()
void
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<Tensor> copyFromTensor,
bool createBarrier)
std::shared_ptr<Tensor> copyFromTensor)
{
vk::DeviceSize bufferSize(this->memorySize());
@ -85,13 +84,11 @@ Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
copyFromTensor->mPrimaryBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
copyRegion);
}
void
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
bool createBarrier)
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
@ -102,13 +99,11 @@ Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
this->mStagingBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
copyRegion);
}
void
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
bool createBarrier)
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
@ -119,8 +114,7 @@ Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
this->mPrimaryBuffer,
this->mStagingBuffer,
bufferSize,
copyRegion,
createBarrier);
copyRegion);
}
void
@ -128,24 +122,49 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier)
vk::BufferCopy copyRegion)
{
commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion);
}
if (createBarrier) {
// Buffer to ensure wait until data is copied to staging buffer
this->recordBufferMemoryBarrier(commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
}
void
Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier");
this->recordBufferMemoryBarrier(commandBuffer,
*this->mPrimaryBuffer,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask);
}
void
Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier");
this->recordBufferMemoryBarrier(commandBuffer,
*this->mStagingBuffer,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask);
}
void
Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
const vk::Buffer& buffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
@ -156,7 +175,7 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::DeviceSize bufferSize = this->memorySize();
vk::BufferMemoryBarrier bufferMemoryBarrier;
bufferMemoryBarrier.buffer = *this->mPrimaryBuffer;
bufferMemoryBarrier.buffer = buffer;
bufferMemoryBarrier.size = bufferSize;
bufferMemoryBarrier.srcAccessMask = srcAccessMask;
bufferMemoryBarrier.dstAccessMask = dstAccessMask;
@ -241,7 +260,8 @@ Tensor::getStagingMemoryPropertyFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
return vk::MemoryPropertyFlagBits::eHostVisible;
return vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent;
break;
default:
throw std::runtime_error("Kompute Tensor invalid tensor type");