Updated Tensor Memory to hold staging within class

This commit is contained in:
Alejandro Saucedo 2021-02-08 07:17:54 +00:00
parent b61f3f2297
commit 04853df469
11 changed files with 97 additions and 115 deletions

View file

@ -131,6 +131,26 @@ class Tensor
std::shared_ptr<Tensor> copyFromTensor,
bool createBarrier);
/**
* Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromStagingToDevice(std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
/**
* Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromDeviceToStaging(std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
/**
* Records the buffer memory barrier into the command buffer which
* ensures that relevant data transfers are carried out correctly.
@ -174,13 +194,13 @@ class Tensor
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Buffer> mPrimaryBuffer;
bool mFreePrimaryBuffer;
bool mFreePrimaryBuffer = false;
std::shared_ptr<vk::Buffer> mStagingBuffer;
bool mFreeStagingBuffer;
bool mFreeStagingBuffer = false;
std::shared_ptr<vk::DeviceMemory> mPrimaryMemory;
bool mFreePrimaryMemory;
bool mFreePrimaryMemory = false;
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
bool mFreeStagingMemory;
bool mFreeStagingMemory = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<float> mData;
@ -193,6 +213,7 @@ class Tensor
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
void createBuffer(std::shared_ptr<vk::Buffer> buffer, vk::BufferUsageFlags bufferUsageFlags);
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer, std::shared_ptr<vk::DeviceMemory> memory, vk::MemoryPropertyFlags memoryPropertyFlags);
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer, std::shared_ptr<vk::Buffer> bufferFrom, std::shared_ptr<vk::Buffer> bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier);
// Private util functions
vk::BufferUsageFlags getPrimaryBufferUsageFlags();

View file

@ -78,9 +78,6 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
// -------------- ALWAYS OWNED RESOURCES
std::shared_ptr<Tensor> mTensorOutputStaging; ///< Staging temporary tensor user do to copy the output of the tensor
};
} // End namespace kp

View file

@ -69,7 +69,7 @@ class OpBase
if (tensor && tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
} else {
SPDLOG_ERROR("Kompute OpBase expected to free "
SPDLOG_WARN("Kompute OpBase expected to free "
"tensor but has already been freed.");
}
}

View file

@ -69,8 +69,6 @@ class OpTensorCreate : public OpBase
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -9,7 +9,7 @@
namespace kp {
/**
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the record (as opposed to during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/
class OpTensorSyncDevice : public OpBase
{
@ -35,12 +35,12 @@ class OpTensorSyncDevice : public OpBase
~OpTensorSyncDevice() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function.
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
/**
* For device tensors, it records the copy command to the device tensor from the temporary staging tensor.
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
*/
void record() override;
@ -55,8 +55,6 @@ class OpTensorSyncDevice : public OpBase
virtual void postEval() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -9,7 +9,7 @@
namespace kp {
/**
Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/
class OpTensorSyncLocal : public OpBase
{
@ -30,17 +30,17 @@ class OpTensorSyncLocal : public OpBase
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor.
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorSyncLocal() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
/**
* For device tensors, it records the copy command into the staging tensor from the device tensor.
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
*/
void record() override;
@ -56,8 +56,6 @@ class OpTensorSyncLocal : public OpBase
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp