Merge pull request #146 from EthicalML/36_delete_sequence

Added destroy functions for tensors and sequences (named and object)
This commit is contained in:
Alejandro Saucedo 2021-02-10 18:58:44 +00:00 committed by GitHub
commit e1f7575be2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 810 additions and 203 deletions

View file

@ -170,6 +170,12 @@ run_ci:
####### General project commands #######
generate_python_docstrings:
python -m pybind11_mkdoc \
-o python/src/docstrings.hpp \
single_include/kompute/Kompute.hpp \
-I/usr/include/c++/7.5.0/
install_python_reqs:
python3 -m pip install -r scripts/requirements.txt

View file

@ -119,20 +119,48 @@ integrate with the vulkan kompute use.
@param device Vulkan logical device to use for all base resources
@param physicalDeviceIndex Index for vulkan physical device used)doc";
static const char *__doc_kp_Manager_tensor =
R"doc(Function that simplifies the common workflow of tensor creation and
initialization. It will take the constructor parameters for a Tensor
and will will us it to create a new Tensor and then create it using
the OpCreateTensor command.
@param data The data to initialize the tensor with @param tensorType
The type of tensor to initialize @returns Initialized Tensor with
memory Syncd to GPU device)doc";
static const char *__doc_kp_Manager_createDevice = R"doc()doc";
static const char *__doc_kp_Manager_createInstance = R"doc()doc";
static const char *__doc_kp_Manager_destroy =
R"doc(Destroy owned Vulkan GPU resources and free GPU memory for single
tensor.
@param tensors Single tensor to rebuild)doc";
static const char *__doc_kp_Manager_destroy_2 =
R"doc(Destroy owned Vulkan GPU resources and free GPU memory for vector of
tensors.
@param tensors Single tensor to rebuild)doc";
static const char *__doc_kp_Manager_destroy_3 =
R"doc(Destroy owned Vulkan GPU resources and free GPU memory for vector of
sequences. Destroying by sequence name is more efficent and hence
recommended instead of by object.
@param sequences Vector for shared ptrs with sequences to destroy)doc";
static const char *__doc_kp_Manager_destroy_4 =
R"doc(Destroy owned Vulkan GPU resources and free GPU memory for single
sequence. Destroying by sequence name is more efficent and hence
recommended instead of by object.
@param sequences Single sequence to rebuild)doc";
static const char *__doc_kp_Manager_destroy_5 =
R"doc(Destroy owned Vulkan GPU resources and free GPU memory for sequence by
name.
@param sequenceName Single name of named sequence to destroy)doc";
static const char *__doc_kp_Manager_destroy_6 =
R"doc(Destroy owned Vulkan GPU resources and free GPU memory for sequences
using vector of named sequence names.
@param sequenceName Vector of sequence names to destroy)doc";
static const char *__doc_kp_Manager_evalOp =
R"doc(Function that evaluates operation against named sequence.
@ -178,14 +206,6 @@ R"doc(Function that evaluates operation against a newly created sequence.
TArgs Template parameters that will be used to initialise Operation to
allow for extensible configurations on initialisation)doc";
static const char *__doc_kp_Manager_sequence =
R"doc(Get or create a managed Sequence that will be contained by this
manager. If the named sequence does not currently exist, it would be
created and initialised.
@param sequenceName The name for the named sequence to be retrieved or
created @return Shared pointer to the manager owned sequence resource)doc";
static const char *__doc_kp_Manager_mComputeQueueFamilyIndices = R"doc()doc";
static const char *__doc_kp_Manager_mComputeQueues = R"doc()doc";
@ -202,10 +222,50 @@ static const char *__doc_kp_Manager_mInstance = R"doc()doc";
static const char *__doc_kp_Manager_mManagedSequences = R"doc()doc";
static const char *__doc_kp_Manager_mManagedTensors = R"doc()doc";
static const char *__doc_kp_Manager_mPhysicalDevice = R"doc()doc";
static const char *__doc_kp_Manager_mPhysicalDeviceIndex = R"doc()doc";
static const char *__doc_kp_Manager_rebuild =
R"doc(Function that simplifies the common workflow of tensor initialisation.
It will take the constructor parameters for a Tensor and will will us
it to create a new Tensor. The tensor memory will then be managed and
owned by the manager.
@param tensors Array of tensors to rebuild @param syncDataToGPU
Whether to sync the data to GPU memory)doc";
static const char *__doc_kp_Manager_rebuild_2 =
R"doc(Function that simplifies the common workflow of tensor initialisation.
It will take the constructor parameters for a Tensor and will will us
it to create a new Tensor. The tensor memory will then be managed and
owned by the manager.
@param tensors Single tensor to rebuild @param syncDataToGPU Whether
to sync the data to GPU memory)doc";
static const char *__doc_kp_Manager_sequence =
R"doc(Get or create a managed Sequence that will be contained by this
manager. If the named sequence does not currently exist, it would be
created and initialised.
@param sequenceName The name for the named sequence to be retrieved or
created @param queueIndex The queue to use from the available queues
@return Shared pointer to the manager owned sequence resource)doc";
static const char *__doc_kp_Manager_tensor =
R"doc(Function that simplifies the common workflow of tensor creation and
initialization. It will take the constructor parameters for a Tensor
and will will us it to create a new Tensor and then create it. The
tensor memory will then be managed and owned by the manager.
@param data The data to initialize the tensor with @param tensorType
The type of tensor to initialize @param syncDataToGPU Whether to sync
the data to GPU memory @returns Initialized Tensor with memory Syncd
to GPU device)doc";
static const char *__doc_kp_OpAlgoBase =
R"doc(Operation that provides a general abstraction that simplifies the use
of algorithm and parameter components which can be used with shaders.
@ -334,8 +394,6 @@ static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorOutput =
R"doc(< Reference to the parameter used in the output of the shader and will
be copied with a staging vector)doc";
static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorOutputStaging = R"doc(< Staging temporary tensor user do to copy the output of the tensor)doc";
static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorRHS =
R"doc(< Reference to the parameter used in the right hand side equation of
the shader)doc";
@ -371,8 +429,7 @@ sub-components.
@param physicalDevice Vulkan physical device used to find device
queues @param device Vulkan logical device for passing to Algorithm
@param commandBuffer Vulkan Command Buffer to record commands into
@param tensors Tensors that are to be used in this operation @param
freeTensors Whether operation manages the memory of the Tensors)doc";
@param tensors Tensors that are to be used in this operation)doc";
static const char *__doc_kp_OpBase_init =
R"doc(The init function is responsible for setting up all the resources and
@ -463,51 +520,15 @@ static const char *__doc_kp_OpTensorCopy_record =
R"doc(Records the copy commands from the first tensor into all the other
tensors provided. Also optionally records a barrier.)doc";
static const char *__doc_kp_OpTensorCreate =
R"doc(Operation that creates tensor and manages the memory of the components
created)doc";
static const char *__doc_kp_OpTensorCreate_OpTensorCreate = R"doc()doc";
static const char *__doc_kp_OpTensorCreate_OpTensorCreate_2 =
R"doc(Default constructor with parameters that provides the bare minimum
requirements for the operations to be able to create and manage their
sub-components.
@param physicalDevice Vulkan physical device used to find device
queues @param device Vulkan logical device for passing to Algorithm
@param commandBuffer Vulkan Command Buffer to record commands into
@param tensors Tensors that will be used to create in operation.
@param freeTensors Whether operation manages the memory of the Tensors)doc";
static const char *__doc_kp_OpTensorCreate_init =
R"doc(In charge of initialising the primary Tensor as well as the staging
tensor as required. It will only initialise a staging tensor if the
Primary tensor is of type Device. For staging tensors it performs a
mapDataIntoHostMemory which would perform immediately as opposed to on
sequence eval/submission.)doc";
static const char *__doc_kp_OpTensorCreate_mStagingTensors = R"doc()doc";
static const char *__doc_kp_OpTensorCreate_postEval =
R"doc(Performs a copy back into the main tensor to ensure that the data
contained is the one that is now being stored in the GPU.)doc";
static const char *__doc_kp_OpTensorCreate_preEval = R"doc(Does not perform any preEval commands.)doc";
static const char *__doc_kp_OpTensorCreate_record =
R"doc(Record runs the core actions to create the tensors. For device tensors
it records a copyCommand to move the data from the staging tensor to
the device tensor. The mapping for staging tensors happens in the init
function not in the record function.)doc";
static const char *__doc_kp_OpTensorSyncDevice =
R"doc(Operation that syncs tensor's device by mapping local data into the
device memory. For TensorTypes::eDevice it will use a staging tensor
to perform the copy. For TensorTypes::eHost it will only copy the
data and perform a map, which will be executed during the record (as
opposed to during the sequence eval/submit). This function cannot be
carried out for TensorTypes::eHost.)doc";
device memory. For TensorTypes::eDevice it will use a record operation
for the memory to be syncd into GPU memory which means that the
operation will be done in sync with GPU commands. For
TensorTypes::eStaging it will only map the data into host memory which
will happen during preEval before the recorded commands are
dispatched. This operation won't have any effect on
TensorTypes::eStaging.)doc";
static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice = R"doc()doc";
@ -523,28 +544,25 @@ queues @param device Vulkan logical device for passing to Algorithm
static const char *__doc_kp_OpTensorSyncDevice_init =
R"doc(Performs basic checks such as ensuring that there is at least one
tensor provided, that they are initialized and that they are not of
type TensorTpes::eHost. For staging tensors in host memory, the map
is performed during the init function.)doc";
static const char *__doc_kp_OpTensorSyncDevice_mStagingTensors = R"doc()doc";
tensor provided with min memory of 1 element.)doc";
static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc";
static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc";
static const char *__doc_kp_OpTensorSyncDevice_record =
R"doc(For device tensors, it records the copy command to the device tensor
from the temporary staging tensor.)doc";
R"doc(For device tensors, it records the copy command for the tensor to copy
the data from its staging to device memory.)doc";
static const char *__doc_kp_OpTensorSyncLocal =
R"doc(Operation that syncs tensor's local data by mapping the data from
device memory into the local vector. For TensorTypes::eDevice it will
use a staging tensor to perform the copy. For TensorTypes::eHost it
will only copy the data and perform a map, which will be executed
during the postSubmit (there will be no copy during the sequence
eval/submit). This function cannot be carried out for
TensorTypes::eHost.)doc";
R"doc(Operation that syncs tensor's local memory by mapping device data into
the local CPU memory. For TensorTypes::eDevice it will use a record
operation for the memory to be syncd into GPU memory which means that
the operation will be done in sync with GPU commands. For
TensorTypes::eStaging it will only map the data into host memory which
will happen during preEval before the recorded commands are
dispatched. This operation won't have any effect on
TensorTypes::eStaging.)doc";
static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc()doc";
@ -560,10 +578,7 @@ queues @param device Vulkan logical device for passing to Algorithm
static const char *__doc_kp_OpTensorSyncLocal_init =
R"doc(Performs basic checks such as ensuring that there is at least one
tensor provided, that they are initialized and that they are not of
type TensorTpes::eHost.)doc";
static const char *__doc_kp_OpTensorSyncLocal_mStagingTensors = R"doc()doc";
tensor provided with min memory of 1 element.)doc";
static const char *__doc_kp_OpTensorSyncLocal_postEval =
R"doc(For host tensors it performs the map command from the host memory into
@ -572,8 +587,8 @@ local memory.)doc";
static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc";
static const char *__doc_kp_OpTensorSyncLocal_record =
R"doc(For device tensors, it records the copy command into the staging
tensor from the device tensor.)doc";
R"doc(For device tensors, it records the copy command for the tensor to copy
the data from its device to staging memory.)doc";
static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc";
@ -699,8 +714,9 @@ static const char *__doc_kp_Tensor_Tensor_2 =
R"doc(Default constructor with data provided which would be used to create
the respective vulkan buffer and memory.
@param data Vector of data that will be used by the tensor @param
tensorType Type for the tensor which is of type TensorTypes)doc";
@param data Non-zero-sized vector of data that will be used by the
tensor @param tensorType Type for the tensor which is of type
TensorTypes)doc";
static const char *__doc_kp_Tensor_TensorTypes =
R"doc(Type for tensors created: Device allows memory to be transferred from
@ -714,6 +730,10 @@ static const char *__doc_kp_Tensor_TensorTypes_eHost = R"doc(< Type is host memo
static const char *__doc_kp_Tensor_TensorTypes_eStorage = R"doc(< Type is Device memory (only))doc";
static const char *__doc_kp_Tensor_allocateBindMemory = R"doc()doc";
static const char *__doc_kp_Tensor_allocateMemoryCreateGPUResources = R"doc()doc";
static const char *__doc_kp_Tensor_constructDescriptorBufferInfo =
R"doc(Constructs a vulkan descriptor buffer info which can be used to
specify and reference the underlying buffer component of the tensor
@ -721,6 +741,8 @@ without exposing it.
@return Descriptor buffer info with own buffer)doc";
static const char *__doc_kp_Tensor_copyBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_createBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_data =
@ -735,9 +757,13 @@ static const char *__doc_kp_Tensor_freeMemoryDestroyGPUResources =
R"doc(Destroys and frees the GPU resources which include the buffer and
memory.)doc";
static const char *__doc_kp_Tensor_getBufferUsageFlags = R"doc()doc";
static const char *__doc_kp_Tensor_getPrimaryBufferUsageFlags = R"doc()doc";
static const char *__doc_kp_Tensor_getMemoryPropertyFlags = R"doc()doc";
static const char *__doc_kp_Tensor_getPrimaryMemoryPropertyFlags = R"doc()doc";
static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc";
static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc";
static const char *__doc_kp_Tensor_init =
R"doc(Initialiser which calls the initialisation for all the respective
@ -751,24 +777,32 @@ R"doc(Returns true if the tensor initialisation function has been carried
out successful, which would mean that the buffer and memory will have
been provisioned.)doc";
static const char *__doc_kp_Tensor_mBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_mData = R"doc()doc";
static const char *__doc_kp_Tensor_mDevice = R"doc()doc";
static const char *__doc_kp_Tensor_mFreeBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_mFreePrimaryBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_mFreeMemory = R"doc()doc";
static const char *__doc_kp_Tensor_mFreePrimaryMemory = R"doc()doc";
static const char *__doc_kp_Tensor_mFreeStagingBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_mFreeStagingMemory = R"doc()doc";
static const char *__doc_kp_Tensor_mIsInit = R"doc()doc";
static const char *__doc_kp_Tensor_mMemory = R"doc()doc";
static const char *__doc_kp_Tensor_mPhysicalDevice = R"doc()doc";
static const char *__doc_kp_Tensor_mPrimaryBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_mPrimaryMemory = R"doc()doc";
static const char *__doc_kp_Tensor_mShape = R"doc()doc";
static const char *__doc_kp_Tensor_mStagingBuffer = R"doc()doc";
static const char *__doc_kp_Tensor_mStagingMemory = R"doc()doc";
static const char *__doc_kp_Tensor_mTensorType = R"doc()doc";
static const char *__doc_kp_Tensor_mapDataFromHostMemory =
@ -809,6 +843,24 @@ a staging buffer transfer, or to gather output (between others).
createBarrier Whether to create a barrier that ensures the data is
copied before further operations. Default is true.)doc";
static const char *__doc_kp_Tensor_recordCopyFromDeviceToStaging =
R"doc(Records a copy from the internal device memory to the staging memory
using an optional barrier to wait for the operation. This function
would only be relevant for kp::Tensors of type eDevice.
@param commandBuffer Vulkan Command Buffer to record the commands into
@param createBarrier Whether to create a barrier that ensures the data
is copied before further operations. Default is true.)doc";
static const char *__doc_kp_Tensor_recordCopyFromStagingToDevice =
R"doc(Records a copy from the internal staging memory to the device memory
using an optional barrier to wait for the operation. This function
would only be relevant for kp::Tensors of type eDevice.
@param commandBuffer Vulkan Command Buffer to record the commands into
@param createBarrier Whether to create a barrier that ensures the data
is copied before further operations. Default is true.)doc";
static const char *__doc_kp_Tensor_setData =
R"doc(Sets / resets the vector data of the tensor. This function does not
perform any copies into GPU memory and is only performed on the host.)doc";

View file

@ -88,25 +88,24 @@ PYBIND11_MODULE(kp, m) {
py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
.def("init", &kp::Sequence::init, "Initialises Vulkan resources within sequence using provided device.")
.def("init", &kp::Sequence::init, DOC(kp, Sequence, init))
// record
.def("begin", &kp::Sequence::begin, "Clears previous commands and starts recording commands in sequence which can be run in batch.")
.def("end", &kp::Sequence::end, "Stops listening and recording for new commands.")
.def("begin", &kp::Sequence::begin, DOC(kp, Sequence, begin))
.def("end", &kp::Sequence::end, DOC(kp, Sequence, end))
// eval
.def("eval", &kp::Sequence::eval, "Executes the currently recorded commands synchronously by waiting on Vulkan Fence.")
.def("eval_async", &kp::Sequence::evalAsync, "Executes the currently recorded commands asynchronously.")
.def("eval_await", &kp::Sequence::evalAwait, "Waits until the execution finishes using Vulkan Fence.")
.def("eval", &kp::Sequence::eval, DOC(kp, Sequence, eval))
.def("eval_async", &kp::Sequence::evalAsync, DOC(kp, Sequence, evalAsync))
.def("eval_await", &kp::Sequence::evalAwait, DOC(kp, Sequence, evalAwait))
// status
.def("is_running", &kp::Sequence::isRunning, "Checks whether the Sequence operations are currently still executing.")
.def("is_rec", &kp::Sequence::isRecording, "Checks whether the Sequence is currently in recording mode.")
.def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized")
.def("is_running", &kp::Sequence::isRunning, DOC(kp, Sequence, isRunning))
.def("is_rec", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording))
.def("is_init", &kp::Sequence::isInit, DOC(kp, Sequence, isInit))
// record
.def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>,
"Records operation to copy one tensor to one or many tensors")
.def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>, DOC(kp, Sequence, record))
.def("record_tensor_sync_device", &kp::Sequence::record<kp::OpTensorSyncDevice>,
"Records operation to sync tensor from local memory to GPU memory")
.def("record_tensor_sync_local", &kp::Sequence::record<kp::OpTensorSyncLocal>,
@ -166,7 +165,19 @@ PYBIND11_MODULE(kp, m) {
.def("rebuild", py::overload_cast<std::shared_ptr<kp::Tensor>, bool>(&kp::Manager::rebuild),
py::arg("tensor"), py::arg("syncDataToGPU") = true,
"Build and initialise tensor")
.def("destroy", py::overload_cast<std::shared_ptr<kp::Tensor>>(&kp::Manager::destroy),
py::arg("tensor"), DOC(kp, Manager, destroy))
.def("destroy", py::overload_cast<std::vector<std::shared_ptr<kp::Tensor>>>(&kp::Manager::destroy),
py::arg("tensors"), DOC(kp, Manager, destroy, 2))
.def("destroy", py::overload_cast<std::vector<std::shared_ptr<kp::Sequence>>>(&kp::Manager::destroy),
py::arg("sequences"), DOC(kp, Manager, destroy, 3))
.def("destroy", py::overload_cast<std::shared_ptr<kp::Sequence>>(&kp::Manager::destroy),
py::arg("sequence"), DOC(kp, Manager, destroy, 4))
.def("destroy", py::overload_cast<const std::string &>(&kp::Manager::destroy),
py::arg("sequenceName"), DOC(kp, Manager, destroy, 5))
.def("destroy", py::overload_cast<const std::vector<std::string>&>(&kp::Manager::destroy),
py::arg("sequenceNames"), DOC(kp, Manager, destroy, 6))
// Await functions
.def("eval_await", &kp::Manager::evalOpAwait,
py::arg("sequenceName"), py::arg("waitFor") = UINT64_MAX,

View file

@ -109,10 +109,19 @@ def test_sequence():
seq.end()
seq.eval()
mgr.destroy("op")
assert seq.is_init() == False
assert tensor_out.data() == [2.0, 4.0, 6.0]
assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])
mgr.destroy(tensor_in_a)
mgr.destroy([tensor_in_b, tensor_out])
assert tensor_in_a.is_init() == False
assert tensor_in_b.is_init() == False
assert tensor_out.is_init() == False
def test_workgroup():
mgr = kp.Manager(0)
@ -146,8 +155,17 @@ def test_workgroup():
seq.end()
seq.eval()
mgr.destroy(seq)
assert seq.is_init() == False
mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel())
assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel())
mgr.destroy([tensor_a, tensor_b])
assert tensor_a.is_init() == False
assert tensor_b.is_init() == False

View file

@ -8,3 +8,4 @@ Sphinx==3.2.1
sphinx_material==0.0.30
breathe==4.20.0
m2r2==0.2.5
git+git://github.com/pybind/pybind11_mkdoc.git@master

View file

@ -1513,23 +1513,7 @@ class Manager
std::shared_ptr<Tensor> tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager tensor triggered");
SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
std::shared_ptr<Tensor> tensor =
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
tensor->init(this->mPhysicalDevice, this->mDevice);
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
this->mManagedTensors.insert(tensor);
return tensor;
}
bool syncDataToGPU = true);
/**
* Function that simplifies the common workflow of tensor initialisation. It
@ -1539,22 +1523,9 @@ class Manager
*
* @param tensors Array of tensors to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager rebuild triggered");
for (std::shared_ptr<Tensor> tensor : tensors) {
// False syncData to run all tensors at once instead one by one
this->rebuild(tensor, false);
}
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>(tensors);
}
}
bool syncDataToGPU = true);
/**
* Function that simplifies the common workflow of tensor initialisation. It
@ -1564,29 +1535,59 @@ class Manager
*
* @param tensors Single tensor to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
void rebuild(std::shared_ptr<kp::Tensor> tensor,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
bool syncDataToGPU = true);
if (tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
}
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* single tensor.
*
* @param tensors Single tensor to rebuild
*/
void destroy(std::shared_ptr<kp::Tensor> tensor);
tensor->init(this->mPhysicalDevice, this->mDevice);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* vector of tensors.
*
* @param tensors Single tensor to rebuild
*/
void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors);
std::set<std::shared_ptr<Tensor>>::iterator it =
this->mManagedTensors.find(tensor);
if (it == this->mManagedTensors.end()) {
this->mManagedTensors.insert(tensor);
}
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* vector of sequences. Destroying by sequence name is more efficent
* and hence recommended instead of by object.
*
* @param sequences Vector for shared ptrs with sequences to destroy
*/
void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences);
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
}
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* single sequence. Destroying by sequence name is more efficent
* and hence recommended instead of by object.
*
* @param sequences Single sequence to rebuild
*/
void destroy(std::shared_ptr<kp::Sequence> sequence);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* sequence by name.
*
* @param sequenceName Single name of named sequence to destroy
*/
void destroy(const std::string& sequenceName);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* sequences using vector of named sequence names.
*
* @param sequenceName Vector of sequence names to destroy
*/
void destroy(const std::vector<std::string>& sequenceNames);
private:
// -------------- OPTIONALLY OWNED RESOURCES

View file

@ -328,4 +328,151 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
SPDLOG_DEBUG("Kompute Manager compute queue obtained");
}
std::shared_ptr<Tensor>
Manager::tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType,
bool syncDataToGPU)
{
SPDLOG_DEBUG("Kompute Manager tensor triggered");
SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
std::shared_ptr<Tensor> tensor =
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
tensor->init(this->mPhysicalDevice, this->mDevice);
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
this->mManagedTensors.insert(tensor);
return tensor;
}
void
Manager::rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
bool syncDataToGPU)
{
SPDLOG_DEBUG("Kompute Manager rebuild triggered");
for (std::shared_ptr<Tensor> tensor : tensors) {
// False syncData to run all tensors at once instead one by one
this->rebuild(tensor, false);
}
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>(tensors);
}
}
void
Manager::rebuild(std::shared_ptr<kp::Tensor> tensor,
bool syncDataToGPU)
{
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
if (tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
}
tensor->init(this->mPhysicalDevice, this->mDevice);
std::set<std::shared_ptr<Tensor>>::iterator it =
this->mManagedTensors.find(tensor);
if (it == this->mManagedTensors.end()) {
this->mManagedTensors.insert(tensor);
}
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
}
void
Manager::destroy(std::shared_ptr<kp::Tensor> tensor)
{
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
if (tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
}
// TODO: Confirm not limiting destroying tensors owned by this manager allowed
std::set<std::shared_ptr<Tensor>>::iterator it =
this->mManagedTensors.find(tensor);
if (it != this->mManagedTensors.end()) {
this->mManagedTensors.erase(tensor);
}
}
void
Manager::destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors)
{
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
for (std::shared_ptr<Tensor> tensor : tensors) {
this->destroy(tensor);
}
}
void
Manager::destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences)
{
SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
for (std::shared_ptr<kp::Sequence> sequence : sequences) {
this->destroy(sequence);
}
}
void
Manager::destroy(std::shared_ptr<kp::Sequence> sequence)
{
SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
// Inefficient but required to delete by value
// Depending on the amount of named sequences created may be worth creating
// a set to ensure efficient delete.
for (std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator it = this->mManagedSequences.begin(); it != this->mManagedSequences.end(); it++) {
if (it->second == sequence) {
this->mManagedSequences.erase(it);
break;
}
}
if (sequence->isInit()) {
sequence->freeMemoryDestroyGPUResources();
}
}
void
Manager::destroy(const std::string& sequenceName)
{
SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
found = this->mManagedSequences.find(sequenceName);
if (found != this->mManagedSequences.end()) {
// We don't call destroy(sequence) as erasing sequence by name more efficient
if (found->second->isInit()) {
found->second->freeMemoryDestroyGPUResources();
}
this->mManagedSequences.erase(sequenceName);
}
}
void
Manager::destroy(const std::vector<std::string>& sequenceNames)
{
SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
for (const std::string& sequenceName : sequenceNames) {
this->destroy(sequenceName);
}
}
}

View file

@ -226,23 +226,7 @@ class Manager
std::shared_ptr<Tensor> tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager tensor triggered");
SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
std::shared_ptr<Tensor> tensor =
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
tensor->init(this->mPhysicalDevice, this->mDevice);
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
this->mManagedTensors.insert(tensor);
return tensor;
}
bool syncDataToGPU = true);
/**
* Function that simplifies the common workflow of tensor initialisation. It
@ -252,22 +236,9 @@ class Manager
*
* @param tensors Array of tensors to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager rebuild triggered");
for (std::shared_ptr<Tensor> tensor : tensors) {
// False syncData to run all tensors at once instead one by one
this->rebuild(tensor, false);
}
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>(tensors);
}
}
bool syncDataToGPU = true);
/**
* Function that simplifies the common workflow of tensor initialisation. It
@ -277,29 +248,59 @@ class Manager
*
* @param tensors Single tensor to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
void rebuild(std::shared_ptr<kp::Tensor> tensor,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
bool syncDataToGPU = true);
if (tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
}
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* single tensor.
*
* @param tensors Single tensor to rebuild
*/
void destroy(std::shared_ptr<kp::Tensor> tensor);
tensor->init(this->mPhysicalDevice, this->mDevice);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* vector of tensors.
*
* @param tensors Single tensor to rebuild
*/
void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors);
std::set<std::shared_ptr<Tensor>>::iterator it =
this->mManagedTensors.find(tensor);
if (it == this->mManagedTensors.end()) {
this->mManagedTensors.insert(tensor);
}
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* vector of sequences. Destroying by sequence name is more efficent
* and hence recommended instead of by object.
*
* @param sequences Vector for shared ptrs with sequences to destroy
*/
void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences);
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
}
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* single sequence. Destroying by sequence name is more efficent
* and hence recommended instead of by object.
*
* @param sequences Single sequence to rebuild
*/
void destroy(std::shared_ptr<kp::Sequence> sequence);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* sequence by name.
*
* @param sequenceName Single name of named sequence to destroy
*/
void destroy(const std::string& sequenceName);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* sequences using vector of named sequence names.
*
* @param sequenceName Vector of sequence names to destroy
*/
void destroy(const std::vector<std::string>& sequenceNames);
private:
// -------------- OPTIONALLY OWNED RESOURCES

370
test/TestDestroy.cpp Normal file
View file

@ -0,0 +1,370 @@
#include "gtest/gtest.h"
#include "kompute/Kompute.hpp"
TEST(TestDestroy, TestDestroyTensorSingle)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
mgr.rebuild({ tensorA });
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq->end();
sq->eval();
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
mgr.destroy(tensorA);
EXPECT_FALSE(tensorA->isInit());
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}
TEST(TestDestroy, TestDestroyTensorVector)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
layout(set = 0, binding = 1) buffer b { float pb[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
pb[index] = pb[index] + 2;
})");
{
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
mgr.rebuild({ tensorA, tensorB });
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
{ tensorA, tensorB }, std::vector<char>(shader.begin(), shader.end()));
sq->end();
sq->eval();
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
mgr.destroy({ tensorA, tensorB });
EXPECT_FALSE(tensorA->isInit());
EXPECT_FALSE(tensorB->isInit());
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 3, 3 }));
}
TEST(TestDestroy, TestDestroyTensorVectorUninitialised)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
{
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
mgr.rebuild({ tensorA, tensorB });
mgr.destroy({ tensorA, tensorB });
EXPECT_FALSE(tensorA->isInit());
EXPECT_FALSE(tensorB->isInit());
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}
TEST(TestDestroy, TestDestroySequenceSingle)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
mgr.rebuild({ tensorA });
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq->end();
sq->eval();
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
mgr.destroy(sq);
EXPECT_FALSE(sq->isInit());
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}
TEST(TestDestroy, TestDestroySequenceVector)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
std::shared_ptr<kp::Sequence> sq1 = nullptr;
std::shared_ptr<kp::Sequence> sq2 = nullptr;
{
kp::Manager mgr;
mgr.rebuild({ tensorA });
sq1 = mgr.sequence("One");
sq1->begin();
sq1->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq1->end();
sq1->eval();
sq2 = mgr.sequence("Two");
sq2->begin();
sq2->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq2->end();
sq2->eval();
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
mgr.destroy({ sq1, sq2 });
EXPECT_FALSE(sq1->isInit());
EXPECT_FALSE(sq2->isInit());
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
}
TEST(TestDestroy, TestDestroySequenceNameSingleInsideManager)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
kp::Manager mgr;
{
mgr.rebuild({ tensorA });
mgr.evalOp<kp::OpAlgoBase>(
{ tensorA }, "one",
std::vector<char>(shader.begin(), shader.end()));
mgr.evalOp<kp::OpAlgoBase>(
{ tensorA }, "two",
std::vector<char>(shader.begin(), shader.end()));
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
mgr.destroy("one");
mgr.destroy("two");
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
}
TEST(TestDestroy, TestDestroySequenceNameSingleOutsideManager)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
std::shared_ptr<kp::Sequence> sq1 = nullptr;
{
kp::Manager mgr;
mgr.rebuild({ tensorA });
sq1 = mgr.sequence("One");
sq1->begin();
sq1->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq1->end();
sq1->eval();
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
mgr.destroy("One");
EXPECT_FALSE(sq1->isInit());
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}
TEST(TestDestroy, TestDestroySequenceNameVectorInsideManager)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
kp::Manager mgr;
{
mgr.rebuild({ tensorA });
mgr.evalOp<kp::OpAlgoBase>(
{ tensorA }, "one",
std::vector<char>(shader.begin(), shader.end()));
mgr.evalOp<kp::OpAlgoBase>(
{ tensorA }, "two",
std::vector<char>(shader.begin(), shader.end()));
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
mgr.destroy(std::vector<std::string>({"one", "two"}));
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
}
TEST(TestDestroy, TestDestroySequenceNameVectorOutsideManager)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
kp::Manager mgr;
{
mgr.rebuild({ tensorA });
mgr.evalOp<kp::OpAlgoBase>(
{ tensorA }, "one",
std::vector<char>(shader.begin(), shader.end()));
mgr.evalOp<kp::OpAlgoBase>(
{ tensorA }, "two",
std::vector<char>(shader.begin(), shader.end()));
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
mgr.destroy(std::vector<std::string>({"one", "two"}));
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
}
TEST(TestDestroy, TestDestroySequenceNameDefaultOutsideManager)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
kp::Manager mgr;
{
mgr.rebuild({ tensorA });
mgr.evalOpDefault<kp::OpAlgoBase>(
{ tensorA },
std::vector<char>(shader.begin(), shader.end()));
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
mgr.destroy(KP_DEFAULT_SESSION);
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}