Merge pull request #138 from EthicalML/136_memory_hierarchy_tensor_manager

Amend memory ownership hierarchy to have Tensor owned by Manager instead of OpCreateTensor / OpBase
This commit is contained in:
Alejandro Saucedo 2021-02-10 07:07:54 +00:00 committed by GitHub
commit d5df5c1f41
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
42 changed files with 507 additions and 576 deletions

1
.ccls
View file

@ -13,6 +13,7 @@
-DDEBUG=1
-DKOMPUTE_INCLUDE_FOR_SYNTAX
-I/usr/include/python3.6/
-I./python/pybind11/include/
-I./external/Vulkan-Headers/include/
-I./external/googletest/googletest/include/

View file

@ -156,6 +156,11 @@ vs_run_tests: vs_build_tests
./build/test/$(VS_BUILD_TYPE)/test_kompute.exe --gtest_filter=$(FILTER_TESTS)
#### PYTHONG ####
test_python:
python -m pytest -s --log-cli-level=DEBUG -v python/test/
####### Run CI Commands #######
# This command uses act to replicate github action

View file

@ -54,9 +54,9 @@ int main() {
kp::Manager mgr;
// 2. Create and initialise Kompute Tensors through manager
auto tensorInA = mgr.buildTensor({ 2., 2., 2. });
auto tensorInB = mgr.buildTensor({ 1., 2., 3. });
auto tensorOut = mgr.buildTensor({ 0., 0., 0. });
auto tensorInA = mgr.tensor({ 2., 2., 2. });
auto tensorInB = mgr.tensor({ 1., 2., 3. });
auto tensorOut = mgr.tensor({ 0., 0., 0. });
// 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path)
std::string shaderString = (R"(

View file

@ -97,7 +97,7 @@ Record commands in a single submit by using a Sequence to send in batch to GPU.
mgr.evalOpDefault<kp::OpCreateTensor>({tensorLHS, tensorRHS, tensorOutput});
// Create a new sequence
std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.getOrCreateManagedSequence();
std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.sequence();
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock())
{
@ -226,8 +226,8 @@ Back to `examples list <#simple-examples>`_.
// We need to create explicit sequences with their respective queues
// The second parameter is the index in the familyIndex array which is relative
// to the vector we created the manager with.
mgr.createManagedSequence("queueOne", 0);
mgr.createManagedSequence("queueTwo", 1);
mgr.sequence("queueOne", 0);
mgr.sequence("queueTwo", 1);
// Creates tensor an initializes GPU memory (below we show more granularity)
auto tensorA = std::make_shared<kp::Tensor>(kp::Tensor(std::vector<float>(10, 0.0)));
@ -422,7 +422,7 @@ Now that we have the inputs and outputs we will be able to use them in the proce
kp::Manager mgr;
if (std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("createTensors").lock())
mgr.sequence("createTensors").lock())
{
// ...

View file

@ -208,8 +208,8 @@ It's worth mentioning you can have multiple sequences referencing the same queue
// We need to create explicit sequences with their respective queues
// The second parameter is the index in the familyIndex array which is relative
// to the vector we created the manager with.
mgr.createManagedSequence("queueOne", 0);
mgr.createManagedSequence("queueTwo", 1);
mgr.sequence("queueOne", 0);
mgr.sequence("queueTwo", 1);
We create the tensors without modifications.

View file

@ -86,16 +86,6 @@ The kp::OpMult operation is a sample implementation of the kp::OpAlgoBase class.
.. doxygenclass:: kp::OpMult
:members:
OpTensorCreate
-------
The kp::OpTensorCreate is a tensor only operations which initialises a kp::Tensor by creating the respective vk::Buffer and vk::Memory, as well as transferring the local data into the GPU.
.. image:: ../images/kompute-vulkan-architecture-opcreatetensor.jpg
:width: 100%
.. doxygenclass:: kp::OpTensorCreate
:members:
OpTensorCopy
-------

View file

@ -42,16 +42,9 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
kp::Manager mgr;
{
mgr.rebuild(params);
std::shared_ptr<kp::Sequence> sqTensor =
mgr.createManagedSequence();
sqTensor->begin();
sqTensor->record<kp::OpTensorCreate>(params);
sqTensor->end();
sqTensor->eval();
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
// Record op algo base
sq->begin();

View file

@ -14,9 +14,9 @@ int main()
kp::Manager mgr;
auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 });
auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 });
auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 });
auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 });
auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 });
auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 });
#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
std::string shader(R"(

View file

@ -12,7 +12,7 @@ void KomputeSummatorNode::add(float value) {
// Set the new data in the local device
this->mSecondaryTensor->setData({value});
// Execute recorded sequence
if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock()) {
if (std::shared_ptr<kp::Sequence> sq = this->mSequence) {
sq->eval();
}
else {
@ -29,12 +29,12 @@ float KomputeSummatorNode::get_total() const {
void KomputeSummatorNode::_init() {
std::cout << "CALLING INIT" << std::endl;
this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 });
this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 });
this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq");
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
this->mSequence = this->mManager.sequence("AdditionSeq");
// We now record the steps in the sequence
if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock())
if (std::shared_ptr<kp::Sequence> sq = this->mSequence)
{
std::string shader(R"(
@ -59,7 +59,7 @@ void KomputeSummatorNode::_init() {
{ this->mSecondaryTensor });
// Then we run the operation with both tensors
sq->record<kp::OpAlgoBase<>>(
sq->record<kp::OpAlgoBase>(
{ this->mPrimaryTensor, this->mSecondaryTensor },
std::vector<char>(shader.begin(), shader.end()));

View file

@ -28,9 +28,9 @@ float KomputeSummator::get_total() const {
void KomputeSummator::_init() {
std::cout << "CALLING INIT" << std::endl;
this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 });
this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 });
this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq");
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
this->mSequence = this->mManager.sequence("AdditionSeq");
// We now record the steps in the sequence
{

View file

@ -50,15 +50,10 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
{
kp::Manager mgr;
std::shared_ptr<kp::Sequence> sqTensor =
mgr.createManagedSequence();
mgr.rebuild(params);
sqTensor->begin();
sqTensor->record<kp::OpTensorCreate>(params);
sqTensor->end();
sqTensor->eval();
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
{
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
// Record op algo base
sq->begin();

View file

@ -55,15 +55,9 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
kp::Manager mgr;
{
std::shared_ptr<kp::Sequence> sqTensor =
mgr.createManagedSequence();
mgr.rebuild(params);
sqTensor->begin();
sqTensor->record<kp::OpTensorCreate>(params);
sqTensor->end();
sqTensor->eval();
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
// Record op algo base
sq->begin();

View file

@ -35,15 +35,9 @@ int main()
kp::Manager mgr;
std::shared_ptr<kp::Sequence> sqTensor =
mgr.createManagedSequence();
mgr.rebuild(params);
sqTensor->begin();
sqTensor->record<kp::OpTensorCreate>(params);
sqTensor->end();
sqTensor->eval();
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
// Record op algo base
sq->begin();

View file

@ -119,7 +119,7 @@ integrate with the vulkan kompute use.
@param device Vulkan logical device to use for all base resources
@param physicalDeviceIndex Index for vulkan physical device used)doc";
static const char *__doc_kp_Manager_buildTensor =
static const char *__doc_kp_Manager_tensor =
R"doc(Function that simplifies the common workflow of tensor creation and
initialization. It will take the constructor parameters for a Tensor
and will will us it to create a new Tensor and then create it using
@ -133,15 +133,6 @@ static const char *__doc_kp_Manager_createDevice = R"doc()doc";
static const char *__doc_kp_Manager_createInstance = R"doc()doc";
static const char *__doc_kp_Manager_createManagedSequence =
R"doc(Create a new managed Kompute sequence so it's available within the
manager.
@param sequenceName The name for the named sequence to be created, if
empty then default indexed value is used @param queueIndex The queue
to use from the available queues @return Weak pointer to the manager
owned sequence resource)doc";
static const char *__doc_kp_Manager_evalOp =
R"doc(Function that evaluates operation against named sequence.
@ -187,7 +178,7 @@ R"doc(Function that evaluates operation against a newly created sequence.
TArgs Template parameters that will be used to initialise Operation to
allow for extensible configurations on initialisation)doc";
static const char *__doc_kp_Manager_getOrCreateManagedSequence =
static const char *__doc_kp_Manager_sequence =
R"doc(Get or create a managed Sequence that will be contained by this
manager. If the named sequence does not currently exist, it would be
created and initialised.

View file

@ -105,8 +105,6 @@ PYBIND11_MODULE(kp, m) {
.def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized")
// record
.def("record_tensor_create", &kp::Sequence::record<kp::OpTensorCreate>,
"Records operation to create and initialise tensor GPU memory and buffer")
.def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>,
"Records operation to copy one tensor to one or many tensors")
.def("record_tensor_sync_device", &kp::Sequence::record<kp::OpTensorSyncDevice>,
@ -157,11 +155,16 @@ PYBIND11_MODULE(kp, m) {
[](uint32_t physicalDeviceIndex, const std::vector<uint32_t>& familyQueueIndices) {
return std::unique_ptr<kp::Manager>(new kp::Manager(physicalDeviceIndex, familyQueueIndices));
}), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.")
.def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name")
.def("create_sequence", &kp::Manager::createManagedSequence,
py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues")
.def("build_tensor", &kp::Manager::buildTensor,
py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice,
.def("sequence", &kp::Manager::sequence,
py::arg("name") = "", py::arg("queueIndex") = 0, "Get or create a sequence with specific name and specified index of available queues")
.def("tensor", &kp::Manager::tensor,
py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true,
"Build and initialise tensor")
.def("rebuild", py::overload_cast<std::vector<std::shared_ptr<kp::Tensor>>, bool>(&kp::Manager::rebuild),
py::arg("tensors"), py::arg("syncDataToGPU") = true,
"Build and initialise list of tensors")
.def("rebuild", py::overload_cast<std::shared_ptr<kp::Tensor>, bool>(&kp::Manager::rebuild),
py::arg("tensor"), py::arg("syncDataToGPU") = true,
"Build and initialise tensor")
// Await functions
@ -172,8 +175,6 @@ PYBIND11_MODULE(kp, m) {
py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created")
// eval default
.def("eval_tensor_create_def", &kp::Manager::evalOpDefault<kp::OpTensorCreate>,
"Evaluates operation to create and initialise tensor GPU memory and buffer with new anonymous Sequence")
.def("eval_tensor_copy_def", &kp::Manager::evalOpDefault<kp::OpTensorCopy>,
"Evaluates operation to copy one tensor to one or many tensors with new anonymous Sequence")
.def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncDevice>,
@ -209,8 +210,6 @@ PYBIND11_MODULE(kp, m) {
"Evaluates operation to run left right out operation with custom shader with new anonymous Sequence")
// eval
.def("eval_tensor_create", &kp::Manager::evalOp<kp::OpTensorCreate>,
"Evaluates operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
.def("eval_tensor_copy", &kp::Manager::evalOp<kp::OpTensorCopy>,
"Evaluates operation to copy one tensor to one or many tensors with explicitly named Sequence")
.def("eval_tensor_sync_device", &kp::Manager::evalOp<kp::OpTensorSyncDevice>,
@ -249,8 +248,6 @@ PYBIND11_MODULE(kp, m) {
"Evaluates operation to run left right out operation with custom shader with explicitly named Sequence")
// eval async default
.def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCreate>,
"Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with anonymous Sequence")
.def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCopy>,
"Evaluates asynchronously operation to copy one tensor to one or many tensors with anonymous Sequence")
.def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncDevice>,
@ -286,8 +283,6 @@ PYBIND11_MODULE(kp, m) {
"Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence")
// eval async
.def("eval_async_tensor_create", &kp::Manager::evalOpAsync<kp::OpTensorCreate>,
"Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
.def("eval_async_tensor_copy", &kp::Manager::evalOpAsync<kp::OpTensorCopy>,
"Evaluates asynchronously operation to copy one tensor to one or many tensors with explicitly named Sequence")
.def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync<kp::OpTensorSyncDevice>,

View file

@ -14,7 +14,7 @@ def test_array_multiplication():
tensor_out = kp.Tensor([0, 0, 0])
# 3. Initialise the Kompute Tensors in the GPU
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
# 4. Define the multiplication shader code to run on the GPU
@ps.python2shader

View file

@ -2,6 +2,7 @@ import os
import kp
import numpy as np
import logging
DIRNAME = os.path.dirname(os.path.abspath(__file__))
@ -16,7 +17,7 @@ def test_opmult():
mgr = kp.Manager()
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out])
@ -41,7 +42,7 @@ def test_opalgobase_data():
layout (local_size_x = 1) in;
// The input tensors bind index is relative to index in parameter passed
// The input rebuild bind index is relative to index in parameter passed
layout(set = 0, binding = 0) buffer bina { float tina[]; };
layout(set = 0, binding = 1) buffer binb { float tinb[]; };
layout(set = 0, binding = 2) buffer bout { float tout[]; };
@ -52,7 +53,7 @@ def test_opalgobase_data():
}
"""
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], shaderData)
@ -75,7 +76,7 @@ def test_opalgobase_file():
shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
@ -93,14 +94,14 @@ def test_sequence():
tensor_in_b = kp.Tensor([1, 2, 3])
tensor_out = kp.Tensor([0, 0, 0])
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
mgr.eval_await_def()
seq = mgr.create_sequence("op")
seq = mgr.sequence("op")
seq.begin()
seq.record_tensor_sync_local([tensor_in_a])
seq.record_tensor_sync_local([tensor_in_b])
@ -118,32 +119,35 @@ def test_workgroup():
tensor_a = kp.Tensor(np.zeros([16,8]))
tensor_b = kp.Tensor(np.zeros([16,8]))
mgr.eval_tensor_create_def([tensor_a, tensor_b])
mgr.rebuild([tensor_a, tensor_b])
shader_src = """
#version 450
layout (local_size_x = 1) in;
// The input tensors bind index is relative to index in parameter passed
// The input rebuild bind index is relative to index in parameter passed
layout(set = 0, binding = 0) writeonly buffer bout { float toutx[]; };
layout(set = 0, binding = 1) writeonly buffer bout2 { float touty[]; };
void main() {
uint index = gl_WorkGroupID.x*gl_NumWorkGroups.y + gl_WorkGroupID.y;
toutx[index] = gl_GlobalInvocationID.x;
touty[index] = gl_GlobalInvocationID.y;
}
"""
shader_src = bytes(shader_src, encoding='utf8')
seq = mgr.create_sequence()
seq = mgr.sequence("new")
seq.begin()
seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1))
seq.end()
seq.eval()
mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel())
assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel())

View file

@ -66,10 +66,10 @@ def test_logistic_regression():
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
mgr.eval_tensor_create_def(params)
mgr.rebuild(params)
# Create a managed sequence
sq = mgr.create_sequence()
sq = mgr.sequence()
# Clear previous operations and begin recording for new operations
sq.begin()

View file

@ -8,7 +8,6 @@
#include "kompute/operations/OpAlgoBase.hpp"
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
#include "kompute/operations/OpMult.hpp"
#include "kompute/operations/OpTensorCreate.hpp"
#include "kompute/operations/OpTensorCopy.hpp"
#include "kompute/operations/OpTensorSyncDevice.hpp"
#include "kompute/operations/OpTensorSyncLocal.hpp"

View file

@ -697,6 +697,7 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4920;
}
#endif // define SHADEROP_SHADERLOGISTICREGRESSION_HPP
#include <set>
#include <unordered_map>
#define KP_MAX_DIM_SIZE 1
@ -723,7 +724,7 @@ class Tensor
enum class TensorTypes
{
eDevice = 0, ///< Type is device memory, source and destination
eHost = 1, ///< Type is host memory, source and destination
eHost = 1, ///< Type is host memory, source and destination
eStorage = 2, ///< Type is Device memory (only)
};
@ -736,7 +737,8 @@ class Tensor
* Default constructor with data provided which would be used to create the
* respective vulkan buffer and memory.
*
* @param data Non-zero-sized vector of data that will be used by the tensor
* @param data Non-zero-sized vector of data that will be used by the
* tensor
* @param tensorType Type for the tensor which is of type TensorTypes
*/
Tensor(const std::vector<float>& data,
@ -829,24 +831,30 @@ class Tensor
bool createBarrier);
/**
* Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
* Records a copy from the internal staging memory to the device memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromStagingToDevice(std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
void recordCopyFromStagingToDevice(
std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
/**
* Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
* Records a copy from the internal device memory to the staging memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromDeviceToStaging(std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
void recordCopyFromDeviceToStaging(
std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
/**
* Records the buffer memory barrier into the command buffer which
@ -908,9 +916,17 @@ class Tensor
bool mIsInit = false;
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
void createBuffer(std::shared_ptr<vk::Buffer> buffer, vk::BufferUsageFlags bufferUsageFlags);
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer, std::shared_ptr<vk::DeviceMemory> memory, vk::MemoryPropertyFlags memoryPropertyFlags);
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer, std::shared_ptr<vk::Buffer> bufferFrom, std::shared_ptr<vk::Buffer> bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier);
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
vk::BufferUsageFlags bufferUsageFlags);
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
std::shared_ptr<vk::DeviceMemory> memory,
vk::MemoryPropertyFlags memoryPropertyFlags);
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier);
// Private util functions
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
@ -949,13 +965,11 @@ class OpBase
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param freeTensors Whether operation manages the memory of the Tensors
*/
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors)
std::vector<std::shared_ptr<Tensor>>& tensors)
{
SPDLOG_DEBUG("Compute OpBase constructor with params");
@ -963,14 +977,12 @@ class OpBase
this->mDevice = device;
this->mCommandBuffer = commandBuffer;
this->mTensors = tensors;
this->mFreeTensors = freeTensors;
}
/**
* Default destructor for OpBase class. This OpBase destructor class should
* always be called to destroy and free owned resources unless it is
* intended to destroy the resources in the parent class. This can be done
* by passing the mFreeTensors=false.
* intended to destroy the resources in the parent class.
*/
virtual ~OpBase()
{
@ -1234,50 +1246,38 @@ class Sequence
namespace kp {
/**
Operation that creates tensor and manages the memory of the components
created
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/
class OpTensorCreate : public OpBase
class OpTensorSyncDevice : public OpBase
{
public:
OpTensorCreate();
OpTensorSyncDevice();
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
* @param freeTensors Whether operation manages the memory of the Tensors
*/
OpTensorCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor which in this case expects the parent class to free
* the tensors
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorCreate() override;
~OpTensorSyncDevice() override;
/**
* In charge of initialising the primary Tensor as well as the staging
* tensor as required. It will only initialise a staging tensor if the
* Primary tensor is of type Device. For staging tensors it performs a
* mapDataIntoHostMemory which would perform immediately as opposed to
* on sequence eval/submission.
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
/**
* Record runs the core actions to create the tensors. For device tensors
* it records a copyCommand to move the data from the staging tensor to the
* device tensor. The mapping for staging tensors happens in the init function
* not in the record function.
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
*/
void record() override;
@ -1287,8 +1287,7 @@ class OpTensorCreate : public OpBase
virtual void preEval() override;
/**
* Performs a copy back into the main tensor to ensure that the data
* contained is the one that is now being stored in the GPU.
* Does not perform any postEval commands.
*/
virtual void postEval() override;
@ -1352,23 +1351,12 @@ class Manager
*
* @param sequenceName The name for the named sequence to be retrieved or
* created
* @param queueIndex The queue to use from the available queues
* @return Shared pointer to the manager owned sequence resource
*/
std::shared_ptr<Sequence> getOrCreateManagedSequence(
std::string sequenceName);
/**
* Create a new managed Kompute sequence so it's available within the
* manager.
*
* @param sequenceName The name for the named sequence to be created, if
* empty then default indexed value is used
* @param queueIndex The queue to use from the available queues
* @return Weak pointer to the manager owned sequence resource
*/
std::shared_ptr<Sequence> createManagedSequence(
std::string sequenceName = "",
uint32_t queueIndex = 0);
std::shared_ptr<Sequence> sequence(
std::string sequenceName = KP_DEFAULT_SESSION,
uint32_t queueIndex = 0);
/**
* Function that evaluates operation against named sequence.
@ -1385,7 +1373,7 @@ class Manager
{
SPDLOG_DEBUG("Kompute Manager evalOp triggered");
std::shared_ptr<kp::Sequence> sq =
this->getOrCreateManagedSequence(sequenceName);
this->sequence(sequenceName);
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
sq->begin();
@ -1415,10 +1403,8 @@ class Manager
{
SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
this->mCurrentSequenceIndex++;
this->evalOp<T>(tensors,
KP_DEFAULT_SESSION +
std::to_string(this->mCurrentSequenceIndex),
std::forward<TArgs>(params)...);
this->evalOp<T>(
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
}
/**
@ -1437,7 +1423,7 @@ class Manager
SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered");
std::shared_ptr<kp::Sequence> sq =
this->getOrCreateManagedSequence(sequenceName);
this->sequence(sequenceName);
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
sq->begin();
@ -1468,10 +1454,8 @@ class Manager
{
SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
this->mCurrentSequenceIndex++;
this->evalOpAsync<T>(tensors,
KP_DEFAULT_SESSION +
std::to_string(this->mCurrentSequenceIndex),
std::forward<TArgs>(params)...);
this->evalOpAsync<T>(
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
}
/**
@ -1512,36 +1496,98 @@ class Manager
void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
{
SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
this->evalOpAwait(KP_DEFAULT_SESSION +
std::to_string(this->mCurrentSequenceIndex),
waitFor);
this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
}
/**
* Function that simplifies the common workflow of tensor creation and
* initialization. It will take the constructor parameters for a Tensor
* and will will us it to create a new Tensor and then create it using
* the OpCreateTensor command.
* and will will us it to create a new Tensor and then create it. The
* tensor memory will then be managed and owned by the manager.
*
* @param data The data to initialize the tensor with
* @param tensorType The type of tensor to initialize
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
std::shared_ptr<Tensor> buildTensor(
std::shared_ptr<Tensor> tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager createInitTensor triggered");
SPDLOG_DEBUG("Kompute Manager tensor triggered");
SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
std::shared_ptr<Tensor> tensor =
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
this->evalOpDefault<OpTensorCreate>({ tensor });
tensor->init(this->mPhysicalDevice, this->mDevice);
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
this->mManagedTensors.insert(tensor);
return tensor;
}
/**
* Function that simplifies the common workflow of tensor initialisation. It
* will take the constructor parameters for a Tensor and will will us it to
* create a new Tensor. The tensor memory will then be managed and owned by
* the manager.
*
* @param tensors Array of tensors to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager rebuild triggered");
for (std::shared_ptr<Tensor> tensor : tensors) {
// False syncData to run all tensors at once instead one by one
this->rebuild(tensor, false);
}
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>(tensors);
}
}
/**
* Function that simplifies the common workflow of tensor initialisation. It
* will take the constructor parameters for a Tensor and will will us it to
* create a new Tensor. The tensor memory will then be managed and owned by
* the manager.
*
* @param tensors Single tensor to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
void rebuild(std::shared_ptr<kp::Tensor> tensor,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
if (tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
}
tensor->init(this->mPhysicalDevice, this->mDevice);
std::set<std::shared_ptr<Tensor>>::iterator it =
this->mManagedTensors.find(tensor);
if (it == this->mManagedTensors.end()) {
this->mManagedTensors.insert(tensor);
}
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
}
private:
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Instance> mInstance = nullptr;
@ -1552,6 +1598,8 @@ class Manager
bool mFreeDevice = false;
// -------------- ALWAYS OWNED RESOURCES
std::set<std::shared_ptr<Tensor>> mManagedTensors;
std::unordered_map<std::string, std::shared_ptr<Sequence>>
mManagedSequences;
@ -1999,59 +2047,6 @@ class OpTensorCopy : public OpBase
namespace kp {
/**
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/
class OpTensorSyncDevice : public OpBase
{
public:
OpTensorSyncDevice();
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorSyncDevice() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
/**
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
*/
void record() override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Does not perform any postEval commands.
*/
virtual void postEval() override;
private:
};
} // End namespace kp
namespace kp {
/**
Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/

60
src/Manager.cpp Executable file → Normal file
View file

@ -68,6 +68,18 @@ Manager::~Manager()
this->mManagedSequences.clear();
}
if (this->mManagedTensors.size()) {
SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors");
for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
if (!tensor->isInit()) {
SPDLOG_ERROR("Kompute Manager attempted to free managed tensor "
"but not tensor is not initialised");
}
tensor->freeMemoryDestroyGPUResources();
}
this->mManagedTensors.clear();
}
if (this->mFreeDevice) {
SPDLOG_INFO("Destroying device");
this->mDevice->destroy(
@ -99,48 +111,34 @@ Manager::~Manager()
}
std::shared_ptr<Sequence>
Manager::getOrCreateManagedSequence(std::string sequenceName)
Manager::sequence(std::string sequenceName, uint32_t queueIndex)
{
SPDLOG_DEBUG("Kompute Manager creating Sequence object");
SPDLOG_DEBUG("Kompute Manager sequence() with sequenceName: {} "
"and queueIndex: {}",
sequenceName,
queueIndex);
std::shared_ptr<Sequence> sq = nullptr;
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
this->mManagedSequences.find(sequenceName);
if (found == this->mManagedSequences.end()) {
return this->createManagedSequence(sequenceName);
std::shared_ptr<Sequence> sq =
std::make_shared<Sequence>(this->mPhysicalDevice,
this->mDevice,
this->mComputeQueues[queueIndex],
this->mComputeQueueFamilyIndices[queueIndex]);
sq->init();
this->mManagedSequences.insert({ sequenceName, sq });
return sq;
} else {
return found->second;
}
}
std::shared_ptr<Sequence>
Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex)
{
SPDLOG_DEBUG("Kompute Manager createManagedSequence with sequenceName: {} "
"and queueIndex: {}",
sequenceName,
queueIndex);
std::shared_ptr<Sequence> sq =
std::make_shared<Sequence>(this->mPhysicalDevice,
this->mDevice,
this->mComputeQueues[queueIndex],
this->mComputeQueueFamilyIndices[queueIndex]);
sq->init();
if (sequenceName.empty()) {
this->mCurrentSequenceIndex++;
this->mManagedSequences.insert(
{ KP_DEFAULT_SESSION + std::to_string(this->mCurrentSequenceIndex),
sq });
} else {
// TODO: Check if sequence doesn't already exist
this->mManagedSequences.insert({ sequenceName, sq });
}
return sq;
}
void
Manager::createInstance()
{

View file

@ -14,7 +14,7 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
KomputeWorkgroup komputeWorkgroup)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
: OpBase(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}",
tensors.size());

View file

@ -12,7 +12,7 @@ OpTensorCopy::OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
: OpBase(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpTensorCopy constructor with params");
}

View file

@ -1,76 +0,0 @@
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpTensorCreate.hpp"
namespace kp {
OpTensorCreate::OpTensorCreate()
{
SPDLOG_DEBUG("Kompute OpTensorCreate constructor base");
}
OpTensorCreate::OpTensorCreate(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, true)
{
SPDLOG_DEBUG("Kompute OpTensorCreate constructor with params");
}
OpTensorCreate::~OpTensorCreate()
{
SPDLOG_DEBUG("Kompute OpTensorCreate destructor started");
}
void
OpTensorCreate::init()
{
SPDLOG_DEBUG("Kompute OpTensorCreate init called");
if (this->mTensors.size() < 1) {
throw std::runtime_error(
"Kompute OpTensorCreate called with less than 1 tensor");
}
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
if (tensor->isInit()) {
throw std::runtime_error(
"Kompute OpTensorCreate: Tensor has already been initialized");
}
if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
tensor->init(this->mPhysicalDevice, this->mDevice);
tensor->mapDataIntoHostMemory();
}
}
}
void
OpTensorCreate::record()
{
SPDLOG_DEBUG("Kompute OpTensorCreate record called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromStagingToDevice(
this->mCommandBuffer, false);
}
}
}
void
OpTensorCreate::preEval()
{
SPDLOG_DEBUG("Kompute OpTensorCreate preEval called");
}
void
OpTensorCreate::postEval()
{
SPDLOG_DEBUG("Kompute OpTensorCreate postEval called");
}
}

View file

@ -15,7 +15,7 @@ OpTensorSyncDevice::OpTensorSyncDevice(
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
: OpBase(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
}

View file

@ -15,7 +15,7 @@ OpTensorSyncLocal::OpTensorSyncLocal(
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
: OpBase(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
}

View file

@ -20,7 +20,7 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
this->mDevice = device;
this->mComputeQueue = computeQueue;
this->mQueueIndex = queueIndex;
this->mIsInit = true;
this->mIsInit = false;
}
Sequence::~Sequence()
@ -203,6 +203,8 @@ Sequence::isInit()
void
Sequence::freeMemoryDestroyGPUResources()
{
SPDLOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");
if (!this->mIsInit) {
SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
"but Sequence is not initialized so there's no relevant "

View file

@ -229,8 +229,12 @@ Tensor::mapDataFromHostMemory()
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else {
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
SPDLOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
@ -252,8 +256,12 @@ Tensor::mapDataIntoHostMemory()
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else {
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
SPDLOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();

View file

@ -1,12 +1,13 @@
#pragma once
#include <set>
#include <unordered_map>
#include "kompute/Core.hpp"
#include "kompute/Sequence.hpp"
#include "kompute/operations/OpTensorCreate.hpp"
#include "kompute/operations/OpTensorSyncDevice.hpp"
#define KP_DEFAULT_SESSION "DEFAULT"
@ -63,23 +64,12 @@ class Manager
*
* @param sequenceName The name for the named sequence to be retrieved or
* created
* @param queueIndex The queue to use from the available queues
* @return Shared pointer to the manager owned sequence resource
*/
std::shared_ptr<Sequence> getOrCreateManagedSequence(
std::string sequenceName);
/**
* Create a new managed Kompute sequence so it's available within the
* manager.
*
* @param sequenceName The name for the named sequence to be created, if
* empty then default indexed value is used
* @param queueIndex The queue to use from the available queues
* @return Weak pointer to the manager owned sequence resource
*/
std::shared_ptr<Sequence> createManagedSequence(
std::string sequenceName = "",
uint32_t queueIndex = 0);
std::shared_ptr<Sequence> sequence(
std::string sequenceName = KP_DEFAULT_SESSION,
uint32_t queueIndex = 0);
/**
* Function that evaluates operation against named sequence.
@ -96,7 +86,7 @@ class Manager
{
SPDLOG_DEBUG("Kompute Manager evalOp triggered");
std::shared_ptr<kp::Sequence> sq =
this->getOrCreateManagedSequence(sequenceName);
this->sequence(sequenceName);
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
sq->begin();
@ -126,10 +116,8 @@ class Manager
{
SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
this->mCurrentSequenceIndex++;
this->evalOp<T>(tensors,
KP_DEFAULT_SESSION +
std::to_string(this->mCurrentSequenceIndex),
std::forward<TArgs>(params)...);
this->evalOp<T>(
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
}
/**
@ -148,7 +136,7 @@ class Manager
SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered");
std::shared_ptr<kp::Sequence> sq =
this->getOrCreateManagedSequence(sequenceName);
this->sequence(sequenceName);
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
sq->begin();
@ -179,10 +167,8 @@ class Manager
{
SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
this->mCurrentSequenceIndex++;
this->evalOpAsync<T>(tensors,
KP_DEFAULT_SESSION +
std::to_string(this->mCurrentSequenceIndex),
std::forward<TArgs>(params)...);
this->evalOpAsync<T>(
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
}
/**
@ -223,36 +209,98 @@ class Manager
void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
{
SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
this->evalOpAwait(KP_DEFAULT_SESSION +
std::to_string(this->mCurrentSequenceIndex),
waitFor);
this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
}
/**
* Function that simplifies the common workflow of tensor creation and
* initialization. It will take the constructor parameters for a Tensor
* and will will us it to create a new Tensor and then create it using
* the OpCreateTensor command.
* and will will us it to create a new Tensor and then create it. The
* tensor memory will then be managed and owned by the manager.
*
* @param data The data to initialize the tensor with
* @param tensorType The type of tensor to initialize
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
std::shared_ptr<Tensor> buildTensor(
std::shared_ptr<Tensor> tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager createInitTensor triggered");
SPDLOG_DEBUG("Kompute Manager tensor triggered");
SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
std::shared_ptr<Tensor> tensor =
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
this->evalOpDefault<OpTensorCreate>({ tensor });
tensor->init(this->mPhysicalDevice, this->mDevice);
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
this->mManagedTensors.insert(tensor);
return tensor;
}
/**
* Function that simplifies the common workflow of tensor initialisation. It
* will take the constructor parameters for a Tensor and will will us it to
* create a new Tensor. The tensor memory will then be managed and owned by
* the manager.
*
* @param tensors Array of tensors to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager rebuild triggered");
for (std::shared_ptr<Tensor> tensor : tensors) {
// False syncData to run all tensors at once instead one by one
this->rebuild(tensor, false);
}
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>(tensors);
}
}
/**
* Function that simplifies the common workflow of tensor initialisation. It
* will take the constructor parameters for a Tensor and will will us it to
* create a new Tensor. The tensor memory will then be managed and owned by
* the manager.
*
* @param tensors Single tensor to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
*/
void rebuild(std::shared_ptr<kp::Tensor> tensor,
bool syncDataToGPU = true)
{
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
if (tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
}
tensor->init(this->mPhysicalDevice, this->mDevice);
std::set<std::shared_ptr<Tensor>>::iterator it =
this->mManagedTensors.find(tensor);
if (it == this->mManagedTensors.end()) {
this->mManagedTensors.insert(tensor);
}
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
}
}
private:
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Instance> mInstance = nullptr;
@ -263,6 +311,8 @@ class Manager
bool mFreeDevice = false;
// -------------- ALWAYS OWNED RESOURCES
std::set<std::shared_ptr<Tensor>> mManagedTensors;
std::unordered_map<std::string, std::shared_ptr<Sequence>>
mManagedSequences;

View file

@ -31,13 +31,11 @@ class OpBase
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param freeTensors Whether operation manages the memory of the Tensors
*/
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors)
std::vector<std::shared_ptr<Tensor>>& tensors)
{
SPDLOG_DEBUG("Compute OpBase constructor with params");
@ -45,14 +43,12 @@ class OpBase
this->mDevice = device;
this->mCommandBuffer = commandBuffer;
this->mTensors = tensors;
this->mFreeTensors = freeTensors;
}
/**
* Default destructor for OpBase class. This OpBase destructor class should
* always be called to destroy and free owned resources unless it is
* intended to destroy the resources in the parent class. This can be done
* by passing the mFreeTensors=false.
* intended to destroy the resources in the parent class.
*/
virtual ~OpBase()
{

View file

@ -1,74 +0,0 @@
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
Operation that creates tensor and manages the memory of the components
created
*/
class OpTensorCreate : public OpBase
{
public:
OpTensorCreate();
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
* @param freeTensors Whether operation manages the memory of the Tensors
*/
OpTensorCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor which in this case expects the parent class to free
* the tensors
*/
~OpTensorCreate() override;
/**
* In charge of initialising the primary Tensor as well as the staging
* tensor as required. It will only initialise a staging tensor if the
* Primary tensor is of type Device. For staging tensors it performs a
* mapDataIntoHostMemory which would perform immediately as opposed to
* on sequence eval/submission.
*/
void init() override;
/**
* Record runs the core actions to create the tensors. For device tensors
* it records a copyCommand to move the data from the staging tensor to the
* device tensor. The mapping for staging tensors happens in the init function
* not in the record function.
*/
void record() override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Performs a copy back into the main tensor to ensure that the data
* contained is the one that is now being stored in the GPU.
*/
virtual void postEval() override;
private:
};
} // End namespace kp

View file

@ -49,7 +49,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
inputsSyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
}
mgr.evalOpDefault<kp::OpTensorCreate>(inputsSyncB);
mgr.rebuild(inputsSyncB);
auto startSync = std::chrono::high_resolution_clock::now();
@ -77,10 +77,10 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
inputsAsyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
}
mgrAsync.evalOpDefault<kp::OpTensorCreate>(inputsAsyncB);
mgrAsync.rebuild(inputsAsyncB);
for (uint32_t i = 0; i < numParallel; i++) {
mgrAsync.createManagedSequence("async" + std::to_string(i), i);
mgrAsync.sequence("async" + std::to_string(i), i);
}
auto startAsync = std::chrono::high_resolution_clock::now();
@ -146,10 +146,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };
mgr.createManagedSequence("asyncOne");
mgr.createManagedSequence("asyncTwo");
mgr.sequence("asyncOne");
mgr.sequence("asyncTwo");
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB });
mgr.evalOpAsync<kp::OpAlgoBase>(
{ tensorA }, "asyncOne", std::vector<char>(shader.begin(), shader.end()));

View file

@ -32,14 +32,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression)
{
kp::Manager mgr;
std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
mgr.rebuild(params);
sqTensor->begin();
sqTensor->record<kp::OpTensorCreate>(params);
sqTensor->end();
sqTensor->eval();
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
// Record op algo base
sq->begin();
@ -122,14 +117,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
{
kp::Manager mgr;
std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
mgr.rebuild(params);
sqTensor->begin();
sqTensor->record<kp::OpTensorCreate>(params);
sqTensor->end();
sqTensor->eval();
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
// Record op algo base
sq->begin();

View file

@ -8,14 +8,14 @@ TEST(TestManager, EndToEndOpMultFlow)
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorLHS });
mgr.rebuild({ tensorLHS });
std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor({ 2, 4, 6 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorRHS });
mgr.rebuild({ tensorRHS });
std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor({ 0, 0, 0 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorOutput });
mgr.rebuild({ tensorOutput });
mgr.evalOpDefault<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
@ -36,15 +36,13 @@ TEST(TestManager, OpMultSequenceFlow)
kp::Manager mgr;
{
mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence");
mgr.sequence("newSequence");
sq->begin();
sq->record<kp::OpTensorCreate>({ tensorLHS });
sq->record<kp::OpTensorCreate>({ tensorRHS });
sq->record<kp::OpTensorCreate>({ tensorOutput });
sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
@ -61,16 +59,16 @@ TEST(TestManager, TestMultipleSequences)
kp::Manager mgr;
std::shared_ptr<kp::Sequence> sqOne =
mgr.getOrCreateManagedSequence("sqOne");
mgr.sequence("sqOne");
std::shared_ptr<kp::Sequence> sqTwo =
mgr.getOrCreateManagedSequence("sqTwo");
mgr.sequence("sqTwo");
std::shared_ptr<kp::Sequence> sqOneRef =
mgr.getOrCreateManagedSequence("sqOne");
mgr.sequence("sqOne");
std::shared_ptr<kp::Sequence> sqTwoRef =
mgr.getOrCreateManagedSequence("sqTwo");
mgr.sequence("sqTwo");
EXPECT_EQ(sqOne, sqOneRef);
EXPECT_NE(sqTwo, sqOneRef);
@ -90,17 +88,17 @@ TEST(TestManager, TestMultipleTensorsAtOnce)
kp::Manager mgr;
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence");
mgr.sequence("newSequence");
{
sq->begin();
sq->record<kp::OpTensorCreate>({ tensorLHS, tensorRHS, tensorOutput });
mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
EXPECT_TRUE(tensorLHS->isInit());
EXPECT_TRUE(tensorRHS->isInit());
EXPECT_TRUE(tensorOutput->isInit());
sq->begin();
sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
@ -116,8 +114,8 @@ TEST(TestManager, TestCreateInitTensor)
{
kp::Manager mgr;
std::shared_ptr<kp::Tensor> tensorA = mgr.buildTensor({ 0, 1, 2 });
std::shared_ptr<kp::Tensor> tensorB = mgr.buildTensor({ 0, 0, 0 });
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 1, 2 });
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
@ -126,7 +124,7 @@ TEST(TestManager, TestCreateInitTensor)
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0, 1, 2 }));
std::shared_ptr<kp::Tensor> tensorC =
mgr.buildTensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorC });

View file

@ -19,14 +19,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
pa[index] = pa[index] + 1;
})");
mgr.rebuild({ tensorA });
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence");
mgr.sequence("newSequence");
{
sq->begin();
sq->record<kp::OpTensorCreate>({ tensorA });
sq->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq->record<kp::OpAlgoBase>(
@ -58,13 +58,15 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
pa[index] = pa[index] + 1;
})");
std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
mgr.rebuild({ tensorA }, false);
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
std::shared_ptr<kp::Sequence> sqTensor = mgr.sequence();
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
// First create the tensor in a separate sequence
sqTensor->begin();
sqTensor->record<kp::OpTensorCreate>({ tensorA });
sqTensor->record<kp::OpTensorSyncDevice>({ tensorA });
sqTensor->end();
sqTensor->eval();
@ -111,24 +113,11 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
pa[index] = pa[index] + 1;
})");
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence");
sq->begin();
sq->record<kp::OpTensorCreate>({ tensorA });
sq->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq->end();
sq->eval();
}
mgr.rebuild({ tensorA });
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence2");
mgr.sequence("newSequence");
sq->begin();
@ -141,7 +130,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence3");
mgr.sequence("newSequence2");
sq->begin();
@ -154,7 +143,20 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence5");
mgr.sequence("newSequence3");
sq->begin();
sq->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq->end();
sq->eval();
}
{
std::shared_ptr<kp::Sequence> sq =
mgr.sequence("newSequence5");
sq->begin();
@ -183,13 +185,15 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
pa[index] = pa[index] + 1;
})");
mgr.rebuild({ tensorA }, false);
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence");
mgr.sequence("newSequence");
sq->begin();
sq->record<kp::OpTensorCreate>({ tensorA });
sq->record<kp::OpTensorSyncDevice>({ tensorA });
sq->end();
sq->eval();
@ -197,7 +201,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence2");
mgr.sequence("newSequence2");
sq->begin();
@ -213,7 +217,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence3");
mgr.sequence("newSequence3");
sq->begin();
@ -238,7 +242,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
std::shared_ptr<kp::Tensor> tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) };
std::shared_ptr<kp::Tensor> tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorInA, tensorInB, tensorOut });
mgr.rebuild({ tensorInA, tensorInB, tensorOut });
std::string shader(R"(
// The version to use
@ -273,9 +277,12 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
kp::Manager mgr;
auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 });
auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 });
auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 });
auto tensorInA = mgr.tensor(
{ 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false);
auto tensorInB = mgr.tensor(
{ 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false);
auto tensorOut = mgr.tensor(
{ 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false);
std::string shader(R"(
// The version to use
@ -296,6 +303,9 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
}
)");
mgr.evalOpDefault<kp::OpTensorSyncDevice>(
{ tensorInA, tensorInB, tensorOut });
mgr.evalOpDefault<kp::OpAlgoBase>(
{ tensorInA, tensorInB, tensorOut },
std::vector<char>(shader.begin(), shader.end()));
@ -304,3 +314,39 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
EXPECT_EQ(tensorOut->data(), std::vector<float>({ 0.0, 4.0, 12.0 }));
}
TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
{
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };
void main() {
uint index = gl_GlobalInvocationID.x;
pa[index] = pa[index] + 1;
})");
{
std::shared_ptr<kp::Sequence> sq = nullptr;
{
kp::Manager mgr;
mgr.rebuild({ tensorA });
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
sq->end();
sq->eval();
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}

View file

@ -30,13 +30,15 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
}
)");
mgr.rebuild({ tensorA, tensorB }, false);
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("default");
mgr.sequence("default");
sq->begin();
sq->record<kp::OpTensorCreate>({ tensorA, tensorB });
sq->record<kp::OpTensorSyncDevice>({ tensorA, tensorB });
sq->end();
@ -45,7 +47,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("run");
mgr.sequence("run");
sq->begin();
@ -63,7 +65,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("export");
mgr.sequence("export");
sq->begin();

View file

@ -11,7 +11,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB });
std::string shader(R"(
#version 450
@ -43,7 +43,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB });
mgr.evalOpDefault<kp::OpAlgoBase>(
{ tensorA, tensorB },
@ -65,7 +65,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB });
mgr.evalOpDefault<kp::OpAlgoBase>(
{ tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp");
@ -82,7 +82,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB });
mgr.evalOpDefault<kp::OpAlgoBase>(
{ tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");

View file

@ -8,13 +8,13 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecA{ 1, 2, 3 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -33,7 +33,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecA{ 2, 3, 4 };
std::vector<float> testVecB{ 0, 0, 0 };
std::vector<float> testVecC{ 0, 0, 0 };
@ -41,7 +41,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor(testVecC) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC });
mgr.rebuild({ tensorA, tensorB, tensorC });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -63,14 +63,17 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecA{ 3, 4, 5 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
testVecB, kp::Tensor::TensorTypes::eHost) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB }, false);
// Only calling sync on device type tensor
mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -89,14 +92,20 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecA{ 4, 5, 6 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
testVecA, kp::Tensor::TensorTypes::eHost) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB }, false);
// Manually copy data into host memory of Tensor
tensorA->mapDataIntoHostMemory();
// Only calling sync on device type tensor
mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorB });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -115,7 +124,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecA{ 5, 6, 7 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
@ -123,7 +132,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
testVecB, kp::Tensor::TensorTypes::eHost) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -142,12 +151,12 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecA{ 6, 7, 8 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
testVecA, kp::Tensor::TensorTypes::eHost) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
mgr.rebuild({ tensorA }, false);
EXPECT_TRUE(tensorA->isInit());

View file

@ -5,20 +5,19 @@
TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
{
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
{
kp::Manager mgr;
EXPECT_TRUE(tensorA->isInit());
mgr.rebuild({ tensorA });
EXPECT_EQ(tensorA->data(), testVecA);
EXPECT_TRUE(tensorA->isInit());
EXPECT_EQ(tensorA->data(), testVecA);
}
tensorA->freeMemoryDestroyGPUResources();
EXPECT_FALSE(tensorA->isInit());
}
@ -33,7 +32,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
mgr.rebuild({ tensorA, tensorB });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -53,8 +52,8 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
mgr.rebuild({ tensorA });
mgr.rebuild({ tensorB });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -63,7 +62,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
EXPECT_EQ(tensorB->data(), testVecB);
}
TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed)
{
std::vector<float> testVecA{ 9, 8, 7 };
@ -74,8 +73,8 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
{
kp::Manager mgr;
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
mgr.rebuild({ tensorA });
mgr.rebuild({ tensorB });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -88,6 +87,32 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
EXPECT_FALSE(tensorB->isInit());
}
TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed)
{
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecB{ 6, 5, 4 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
kp::Manager mgr;
{
mgr.rebuild({ tensorA });
mgr.rebuild({ tensorB });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
EXPECT_EQ(tensorA->data(), testVecA);
EXPECT_EQ(tensorB->data(), testVecB);
}
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
}
TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
{
@ -99,8 +124,8 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
kp::Manager mgr;
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
mgr.rebuild({ tensorA });
mgr.rebuild({ tensorB });
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
@ -123,7 +148,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
kp::Manager mgr;
try {
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
mgr.rebuild({ tensorA });
} catch (const std::runtime_error& err) {
// check exception
ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=

View file

@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecPreA) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
mgr.rebuild({ tensorA }, false);
EXPECT_TRUE(tensorA->isInit());
@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor({ 0, 0, 0 }) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC });
mgr.rebuild({ tensorA, tensorB, tensorC }, false);
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());

View file

@ -9,7 +9,7 @@ TEST(TestSequence, CmdBufSequenceBeginEnd)
{
std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("newSequence");
mgr.sequence("newSequence");
EXPECT_TRUE(sq->eval());
EXPECT_TRUE(!sq->isRecording());
@ -32,10 +32,11 @@ TEST(TestSequence, SequenceDestructorViaManager)
{
kp::Manager mgr;
sq = mgr.getOrCreateManagedSequence("newSequence");
sq = mgr.sequence("newSequence");
EXPECT_TRUE(sq->isInit());
}
EXPECT_FALSE(sq->isInit());
}

View file

@ -23,11 +23,11 @@ TEST(TestTensor, CopyFromHostData)
kp::Manager mgr;
if (std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("new")) {
sq->begin();
mgr.rebuild({ tensorA, tensorB });
sq->record<kp::OpTensorCreate>({ tensorA, tensorB });
if (std::shared_ptr<kp::Sequence> sq =
mgr.sequence("new")) {
sq->begin();
sq->record<kp::OpTensorCopy>({ tensorA, tensorB });