Updated Tensor Memory to hold staging within class
This commit is contained in:
parent
b61f3f2297
commit
04853df469
11 changed files with 97 additions and 115 deletions
|
|
@ -65,11 +65,6 @@ OpAlgoLhsRhsOut::init()
|
|||
" Output: " + std::to_string(this->mTensorOutput->size()));
|
||||
}
|
||||
|
||||
this->mTensorOutputStaging = std::make_shared<Tensor>(
|
||||
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
this->mTensorOutputStaging->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
|
||||
|
||||
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
|
@ -110,8 +105,10 @@ OpAlgoLhsRhsOut::record()
|
|||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
|
||||
this->mTensorOutputStaging->recordCopyFrom(
|
||||
this->mCommandBuffer, this->mTensorOutput, true);
|
||||
if (this->mTensorOutput->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensorOutput->recordCopyFromDeviceToStaging(
|
||||
this->mCommandBuffer, true);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -119,9 +116,7 @@ OpAlgoLhsRhsOut::postEval()
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
|
||||
|
||||
this->mTensorOutputStaging->mapDataFromHostMemory();
|
||||
|
||||
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
|
||||
this->mTensorOutput->mapDataFromHostMemory();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,16 +23,6 @@ OpTensorCreate::OpTensorCreate(
|
|||
OpTensorCreate::~OpTensorCreate()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate destructor started");
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate freeing staging tensors");
|
||||
for (std::shared_ptr<Tensor> tensor : this->mStagingTensors) {
|
||||
if (tensor && tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
SPDLOG_ERROR("Kompute OpTensorCreate expected to free "
|
||||
"tensor but has already been freed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -50,27 +40,10 @@ OpTensorCreate::init()
|
|||
throw std::runtime_error(
|
||||
"Kompute OpTensorCreate: Tensor has already been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
|
||||
tensor->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
stagingTensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
stagingTensor->mapDataIntoHostMemory();
|
||||
|
||||
this->mStagingTensors.push_back(stagingTensor);
|
||||
|
||||
} else {
|
||||
|
||||
if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
tensor->mapDataIntoHostMemory();
|
||||
|
||||
// We push a nullptr when no staging tensor is needed to match
|
||||
// index number in array to have one to one mapping with tensors
|
||||
this->mStagingTensors.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -82,8 +55,8 @@ OpTensorCreate::record()
|
|||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFrom(
|
||||
this->mCommandBuffer, this->mStagingTensors[i], false);
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(
|
||||
this->mCommandBuffer, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,25 +41,11 @@ OpTensorSyncDevice::init()
|
|||
"has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
throw std::runtime_error(
|
||||
SPDLOG_WARN(
|
||||
"Kompute OpTensorSyncLocal tensor parameter is of type "
|
||||
"TensorTypes::eStorage and hence cannot be used to receive or "
|
||||
"pass data.");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
|
||||
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
|
||||
tensor->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
stagingTensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
this->mStagingTensors.push_back(stagingTensor);
|
||||
|
||||
} else {
|
||||
// We push a nullptr when no staging tensor is needed to match
|
||||
// index number in array to have one to one mapping with tensors
|
||||
this->mStagingTensors.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -70,8 +56,8 @@ OpTensorSyncDevice::record()
|
|||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFrom(
|
||||
this->mCommandBuffer, this->mStagingTensors[i], false);
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(
|
||||
this->mCommandBuffer, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -83,11 +69,8 @@ OpTensorSyncDevice::preEval()
|
|||
|
||||
// Performing sync of data as eval can be called multiple times with same op
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mStagingTensors[i]->setData(this->mTensors[i]->data());
|
||||
this->mStagingTensors[i]->mapDataIntoHostMemory();
|
||||
} else {
|
||||
this->mTensors[i]->mapDataFromHostMemory();
|
||||
if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
|
||||
this->mTensors[i]->mapDataIntoHostMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,26 +41,11 @@ OpTensorSyncLocal::init()
|
|||
"Kompute OpTensorSyncLocal: Tensor has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
throw std::runtime_error(
|
||||
SPDLOG_WARN(
|
||||
"Kompute OpTensorSyncLocal tensor parameter is of type "
|
||||
"TensorTypes::eStorage and hence cannot be used to receive or "
|
||||
"pass data.");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
|
||||
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
|
||||
tensor->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
stagingTensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
this->mStagingTensors.push_back(stagingTensor);
|
||||
|
||||
} else {
|
||||
|
||||
// We push a nullptr when no staging tensor is needed to match
|
||||
// index number in array to have one to one mapping with tensors
|
||||
this->mStagingTensors.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -71,8 +56,8 @@ OpTensorSyncLocal::record()
|
|||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mStagingTensors[i]->recordCopyFrom(
|
||||
this->mCommandBuffer, this->mTensors[i], true);
|
||||
this->mTensors[i]->recordCopyFromDeviceToStaging(
|
||||
this->mCommandBuffer, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -90,10 +75,7 @@ OpTensorSyncLocal::postEval()
|
|||
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local");
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mStagingTensors[i]->mapDataFromHostMemory();
|
||||
this->mTensors[i]->setData(this->mStagingTensors[i]->data());
|
||||
} else {
|
||||
if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
|
||||
this->mTensors[i]->mapDataFromHostMemory();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,20 +107,51 @@ Tensor::recordCopyFrom(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
|||
std::shared_ptr<Tensor> copyFromTensor,
|
||||
bool createBarrier)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Tensor recordCopyFrom called");
|
||||
|
||||
if (!this->mIsInit || !copyFromTensor->mIsInit) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Tensor attempted to run createBuffer without init");
|
||||
}
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(0, 0, bufferSize);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize);
|
||||
|
||||
this->copyBuffer(commandBuffer, copyFromTensor->mPrimaryBuffer, this->mPrimaryBuffer, bufferSize, copyRegion, createBarrier);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFromStagingToDevice(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier)
|
||||
{
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(0, 0, bufferSize);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
|
||||
|
||||
this->copyBuffer(commandBuffer, this->mStagingBuffer, this->mPrimaryBuffer, bufferSize, copyRegion, createBarrier);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFromDeviceToStaging(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier)
|
||||
{
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(0, 0, bufferSize);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
|
||||
|
||||
this->copyBuffer(commandBuffer, this->mPrimaryBuffer, this->mStagingBuffer, bufferSize, copyRegion, createBarrier);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer, std::shared_ptr<vk::Buffer> bufferFrom, std::shared_ptr<vk::Buffer> bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier) {
|
||||
|
||||
if (!this->mIsInit) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Tensor attempted to run copyBuffer without init");
|
||||
}
|
||||
|
||||
commandBuffer->copyBuffer(
|
||||
*copyFromTensor->mPrimaryBuffer, *this->mPrimaryBuffer, copyRegion);
|
||||
*bufferFrom, *bufferTo, copyRegion);
|
||||
|
||||
if (createBarrier) {
|
||||
// Buffer to ensure wait until data is copied to staging buffer
|
||||
|
|
@ -275,7 +306,7 @@ Tensor::getStagingMemoryPropertyFlags()
|
|||
{
|
||||
switch (this->mTensorType) {
|
||||
case TensorTypes::eDevice:
|
||||
return vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible;
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("Kompute Tensor invalid tensor type");
|
||||
|
|
@ -299,16 +330,22 @@ Tensor::allocateMemoryCreateGPUResources()
|
|||
throw std::runtime_error("Kompute Tensor device is null");
|
||||
}
|
||||
|
||||
SPDLOG_DEBUG("Kompute Tensor creating primary buffer and memory");
|
||||
|
||||
this->mPrimaryBuffer = std::make_shared<vk::Buffer>();
|
||||
this->createBuffer(this->mPrimaryBuffer, this->getPrimaryBufferUsageFlags());
|
||||
this->mFreePrimaryBuffer = true;
|
||||
this->mPrimaryMemory = std::make_shared<vk::DeviceMemory>();
|
||||
this->allocateBindMemory(this->mPrimaryBuffer, this->mPrimaryMemory, this->getPrimaryMemoryPropertyFlags());
|
||||
this->mFreePrimaryMemory = true;
|
||||
|
||||
if (this->mTensorType == TensorTypes::eDevice) {
|
||||
SPDLOG_DEBUG("Kompute Tensor creating staging buffer and memory");
|
||||
|
||||
this->mStagingBuffer = std::make_shared<vk::Buffer>();
|
||||
this->createBuffer(this->mStagingBuffer, this->getStagingBufferUsageFlags());
|
||||
this->mFreeStagingBuffer = true;
|
||||
this->mStagingMemory = std::make_shared<vk::DeviceMemory>();
|
||||
this->allocateBindMemory(this->mStagingBuffer, this->mStagingMemory, this->getStagingMemoryPropertyFlags());
|
||||
this->mFreeStagingMemory = true;
|
||||
}
|
||||
|
|
@ -330,7 +367,7 @@ Tensor::createBuffer(std::shared_ptr<vk::Buffer> buffer, vk::BufferUsageFlags bu
|
|||
SPDLOG_DEBUG("Kompute Tensor creating buffer with memory size: {}, and "
|
||||
"usage flags: {}",
|
||||
bufferSize,
|
||||
vk::to_string(usageFlags));
|
||||
vk::to_string(bufferUsageFlags));
|
||||
|
||||
// TODO: Explore having concurrent sharing mode (with option)
|
||||
vk::BufferCreateInfo bufferInfo(vk::BufferCreateFlags(),
|
||||
|
|
@ -430,7 +467,7 @@ Tensor::freeMemoryDestroyGPUResources()
|
|||
this->mDevice->freeMemory(
|
||||
*this->mPrimaryMemory,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDevice = nullptr;
|
||||
this->mPrimaryMemory = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -443,7 +480,7 @@ Tensor::freeMemoryDestroyGPUResources()
|
|||
this->mDevice->freeMemory(
|
||||
*this->mStagingMemory,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDevice = nullptr;
|
||||
this->mStagingMemory = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -131,6 +131,26 @@ class Tensor
|
|||
std::shared_ptr<Tensor> copyFromTensor,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFromStagingToDevice(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFromDeviceToStaging(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records the buffer memory barrier into the command buffer which
|
||||
* ensures that relevant data transfers are carried out correctly.
|
||||
|
|
@ -174,13 +194,13 @@ class Tensor
|
|||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Buffer> mPrimaryBuffer;
|
||||
bool mFreePrimaryBuffer;
|
||||
bool mFreePrimaryBuffer = false;
|
||||
std::shared_ptr<vk::Buffer> mStagingBuffer;
|
||||
bool mFreeStagingBuffer;
|
||||
bool mFreeStagingBuffer = false;
|
||||
std::shared_ptr<vk::DeviceMemory> mPrimaryMemory;
|
||||
bool mFreePrimaryMemory;
|
||||
bool mFreePrimaryMemory = false;
|
||||
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
|
||||
bool mFreeStagingMemory;
|
||||
bool mFreeStagingMemory = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<float> mData;
|
||||
|
|
@ -193,6 +213,7 @@ class Tensor
|
|||
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
|
||||
void createBuffer(std::shared_ptr<vk::Buffer> buffer, vk::BufferUsageFlags bufferUsageFlags);
|
||||
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer, std::shared_ptr<vk::DeviceMemory> memory, vk::MemoryPropertyFlags memoryPropertyFlags);
|
||||
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer, std::shared_ptr<vk::Buffer> bufferFrom, std::shared_ptr<vk::Buffer> bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier);
|
||||
|
||||
// Private util functions
|
||||
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
|
||||
|
|
|
|||
|
|
@ -78,9 +78,6 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
|
|||
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorOutputStaging; ///< Staging temporary tensor user do to copy the output of the tensor
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ class OpBase
|
|||
if (tensor && tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
SPDLOG_ERROR("Kompute OpBase expected to free "
|
||||
SPDLOG_WARN("Kompute OpBase expected to free "
|
||||
"tensor but has already been freed.");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,8 +69,6 @@ class OpTensorCreate : public OpBase
|
|||
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the record (as opposed to during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
|
||||
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
|
||||
*/
|
||||
class OpTensorSyncDevice : public OpBase
|
||||
{
|
||||
|
|
@ -35,12 +35,12 @@ class OpTensorSyncDevice : public OpBase
|
|||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function.
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command to the device tensor from the temporary staging tensor.
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
|
|
@ -55,8 +55,6 @@ class OpTensorSyncDevice : public OpBase
|
|||
virtual void postEval() override;
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
|
||||
Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
|
||||
*/
|
||||
class OpTensorSyncLocal : public OpBase
|
||||
{
|
||||
|
|
@ -30,17 +30,17 @@ class OpTensorSyncLocal : public OpBase
|
|||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor.
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncLocal() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command into the staging tensor from the device tensor.
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
|
|
@ -56,8 +56,6 @@ class OpTensorSyncLocal : public OpBase
|
|||
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue