Merge pull request #138 from EthicalML/136_memory_hierarchy_tensor_manager
Amend memory ownership hierarchy to have Tensor owned by Manager instead of OpCreateTensor / OpBase
This commit is contained in:
commit
d5df5c1f41
42 changed files with 507 additions and 576 deletions
1
.ccls
1
.ccls
|
|
@ -13,6 +13,7 @@
|
|||
-DDEBUG=1
|
||||
-DKOMPUTE_INCLUDE_FOR_SYNTAX
|
||||
|
||||
-I/usr/include/python3.6/
|
||||
-I./python/pybind11/include/
|
||||
-I./external/Vulkan-Headers/include/
|
||||
-I./external/googletest/googletest/include/
|
||||
|
|
|
|||
5
Makefile
5
Makefile
|
|
@ -156,6 +156,11 @@ vs_run_tests: vs_build_tests
|
|||
./build/test/$(VS_BUILD_TYPE)/test_kompute.exe --gtest_filter=$(FILTER_TESTS)
|
||||
|
||||
|
||||
#### PYTHONG ####
|
||||
|
||||
test_python:
|
||||
python -m pytest -s --log-cli-level=DEBUG -v python/test/
|
||||
|
||||
####### Run CI Commands #######
|
||||
|
||||
# This command uses act to replicate github action
|
||||
|
|
|
|||
|
|
@ -54,9 +54,9 @@ int main() {
|
|||
kp::Manager mgr;
|
||||
|
||||
// 2. Create and initialise Kompute Tensors through manager
|
||||
auto tensorInA = mgr.buildTensor({ 2., 2., 2. });
|
||||
auto tensorInB = mgr.buildTensor({ 1., 2., 3. });
|
||||
auto tensorOut = mgr.buildTensor({ 0., 0., 0. });
|
||||
auto tensorInA = mgr.tensor({ 2., 2., 2. });
|
||||
auto tensorInB = mgr.tensor({ 1., 2., 3. });
|
||||
auto tensorOut = mgr.tensor({ 0., 0., 0. });
|
||||
|
||||
// 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path)
|
||||
std::string shaderString = (R"(
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ Record commands in a single submit by using a Sequence to send in batch to GPU.
|
|||
mgr.evalOpDefault<kp::OpCreateTensor>({tensorLHS, tensorRHS, tensorOutput});
|
||||
|
||||
// Create a new sequence
|
||||
std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.getOrCreateManagedSequence();
|
||||
std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.sequence();
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock())
|
||||
{
|
||||
|
|
@ -226,8 +226,8 @@ Back to `examples list <#simple-examples>`_.
|
|||
// We need to create explicit sequences with their respective queues
|
||||
// The second parameter is the index in the familyIndex array which is relative
|
||||
// to the vector we created the manager with.
|
||||
mgr.createManagedSequence("queueOne", 0);
|
||||
mgr.createManagedSequence("queueTwo", 1);
|
||||
mgr.sequence("queueOne", 0);
|
||||
mgr.sequence("queueTwo", 1);
|
||||
|
||||
// Creates tensor an initializes GPU memory (below we show more granularity)
|
||||
auto tensorA = std::make_shared<kp::Tensor>(kp::Tensor(std::vector<float>(10, 0.0)));
|
||||
|
|
@ -422,7 +422,7 @@ Now that we have the inputs and outputs we will be able to use them in the proce
|
|||
kp::Manager mgr;
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("createTensors").lock())
|
||||
mgr.sequence("createTensors").lock())
|
||||
{
|
||||
// ...
|
||||
|
||||
|
|
|
|||
|
|
@ -208,8 +208,8 @@ It's worth mentioning you can have multiple sequences referencing the same queue
|
|||
// We need to create explicit sequences with their respective queues
|
||||
// The second parameter is the index in the familyIndex array which is relative
|
||||
// to the vector we created the manager with.
|
||||
mgr.createManagedSequence("queueOne", 0);
|
||||
mgr.createManagedSequence("queueTwo", 1);
|
||||
mgr.sequence("queueOne", 0);
|
||||
mgr.sequence("queueTwo", 1);
|
||||
|
||||
We create the tensors without modifications.
|
||||
|
||||
|
|
|
|||
|
|
@ -86,16 +86,6 @@ The kp::OpMult operation is a sample implementation of the kp::OpAlgoBase class.
|
|||
.. doxygenclass:: kp::OpMult
|
||||
:members:
|
||||
|
||||
OpTensorCreate
|
||||
-------
|
||||
|
||||
The kp::OpTensorCreate is a tensor only operations which initialises a kp::Tensor by creating the respective vk::Buffer and vk::Memory, as well as transferring the local data into the GPU.
|
||||
|
||||
.. image:: ../images/kompute-vulkan-architecture-opcreatetensor.jpg
|
||||
:width: 100%
|
||||
|
||||
.. doxygenclass:: kp::OpTensorCreate
|
||||
:members:
|
||||
|
||||
OpTensorCopy
|
||||
-------
|
||||
|
|
|
|||
|
|
@ -42,16 +42,9 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
|
|||
kp::Manager mgr;
|
||||
|
||||
{
|
||||
mgr.rebuild(params);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTensor =
|
||||
mgr.createManagedSequence();
|
||||
|
||||
sqTensor->begin();
|
||||
sqTensor->record<kp::OpTensorCreate>(params);
|
||||
sqTensor->end();
|
||||
sqTensor->eval();
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@ int main()
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 });
|
||||
auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 });
|
||||
auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 });
|
||||
auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 });
|
||||
auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 });
|
||||
auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 });
|
||||
|
||||
#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
|
||||
std::string shader(R"(
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ void KomputeSummatorNode::add(float value) {
|
|||
// Set the new data in the local device
|
||||
this->mSecondaryTensor->setData({value});
|
||||
// Execute recorded sequence
|
||||
if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock()) {
|
||||
if (std::shared_ptr<kp::Sequence> sq = this->mSequence) {
|
||||
sq->eval();
|
||||
}
|
||||
else {
|
||||
|
|
@ -29,12 +29,12 @@ float KomputeSummatorNode::get_total() const {
|
|||
|
||||
void KomputeSummatorNode::_init() {
|
||||
std::cout << "CALLING INIT" << std::endl;
|
||||
this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 });
|
||||
this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 });
|
||||
this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq");
|
||||
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSequence = this->mManager.sequence("AdditionSeq");
|
||||
|
||||
// We now record the steps in the sequence
|
||||
if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock())
|
||||
if (std::shared_ptr<kp::Sequence> sq = this->mSequence)
|
||||
{
|
||||
|
||||
std::string shader(R"(
|
||||
|
|
@ -59,7 +59,7 @@ void KomputeSummatorNode::_init() {
|
|||
{ this->mSecondaryTensor });
|
||||
|
||||
// Then we run the operation with both tensors
|
||||
sq->record<kp::OpAlgoBase<>>(
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
|
|
|
|||
|
|
@ -28,9 +28,9 @@ float KomputeSummator::get_total() const {
|
|||
|
||||
void KomputeSummator::_init() {
|
||||
std::cout << "CALLING INIT" << std::endl;
|
||||
this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 });
|
||||
this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 });
|
||||
this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq");
|
||||
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSequence = this->mManager.sequence("AdditionSeq");
|
||||
|
||||
// We now record the steps in the sequence
|
||||
{
|
||||
|
|
|
|||
|
|
@ -50,15 +50,10 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTensor =
|
||||
mgr.createManagedSequence();
|
||||
mgr.rebuild(params);
|
||||
|
||||
sqTensor->begin();
|
||||
sqTensor->record<kp::OpTensorCreate>(params);
|
||||
sqTensor->end();
|
||||
sqTensor->eval();
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
|
|
|
|||
|
|
@ -55,15 +55,9 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
kp::Manager mgr;
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sqTensor =
|
||||
mgr.createManagedSequence();
|
||||
mgr.rebuild(params);
|
||||
|
||||
sqTensor->begin();
|
||||
sqTensor->record<kp::OpTensorCreate>(params);
|
||||
sqTensor->end();
|
||||
sqTensor->eval();
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
|
|
|
|||
|
|
@ -35,15 +35,9 @@ int main()
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTensor =
|
||||
mgr.createManagedSequence();
|
||||
mgr.rebuild(params);
|
||||
|
||||
sqTensor->begin();
|
||||
sqTensor->record<kp::OpTensorCreate>(params);
|
||||
sqTensor->end();
|
||||
sqTensor->eval();
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ integrate with the vulkan kompute use.
|
|||
@param device Vulkan logical device to use for all base resources
|
||||
@param physicalDeviceIndex Index for vulkan physical device used)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_buildTensor =
|
||||
static const char *__doc_kp_Manager_tensor =
|
||||
R"doc(Function that simplifies the common workflow of tensor creation and
|
||||
initialization. It will take the constructor parameters for a Tensor
|
||||
and will will us it to create a new Tensor and then create it using
|
||||
|
|
@ -133,15 +133,6 @@ static const char *__doc_kp_Manager_createDevice = R"doc()doc";
|
|||
|
||||
static const char *__doc_kp_Manager_createInstance = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_Manager_createManagedSequence =
|
||||
R"doc(Create a new managed Kompute sequence so it's available within the
|
||||
manager.
|
||||
|
||||
@param sequenceName The name for the named sequence to be created, if
|
||||
empty then default indexed value is used @param queueIndex The queue
|
||||
to use from the available queues @return Weak pointer to the manager
|
||||
owned sequence resource)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_evalOp =
|
||||
R"doc(Function that evaluates operation against named sequence.
|
||||
|
||||
|
|
@ -187,7 +178,7 @@ R"doc(Function that evaluates operation against a newly created sequence.
|
|||
TArgs Template parameters that will be used to initialise Operation to
|
||||
allow for extensible configurations on initialisation)doc";
|
||||
|
||||
static const char *__doc_kp_Manager_getOrCreateManagedSequence =
|
||||
static const char *__doc_kp_Manager_sequence =
|
||||
R"doc(Get or create a managed Sequence that will be contained by this
|
||||
manager. If the named sequence does not currently exist, it would be
|
||||
created and initialised.
|
||||
|
|
|
|||
|
|
@ -105,8 +105,6 @@ PYBIND11_MODULE(kp, m) {
|
|||
.def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized")
|
||||
|
||||
// record
|
||||
.def("record_tensor_create", &kp::Sequence::record<kp::OpTensorCreate>,
|
||||
"Records operation to create and initialise tensor GPU memory and buffer")
|
||||
.def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>,
|
||||
"Records operation to copy one tensor to one or many tensors")
|
||||
.def("record_tensor_sync_device", &kp::Sequence::record<kp::OpTensorSyncDevice>,
|
||||
|
|
@ -157,11 +155,16 @@ PYBIND11_MODULE(kp, m) {
|
|||
[](uint32_t physicalDeviceIndex, const std::vector<uint32_t>& familyQueueIndices) {
|
||||
return std::unique_ptr<kp::Manager>(new kp::Manager(physicalDeviceIndex, familyQueueIndices));
|
||||
}), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.")
|
||||
.def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name")
|
||||
.def("create_sequence", &kp::Manager::createManagedSequence,
|
||||
py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues")
|
||||
.def("build_tensor", &kp::Manager::buildTensor,
|
||||
py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice,
|
||||
.def("sequence", &kp::Manager::sequence,
|
||||
py::arg("name") = "", py::arg("queueIndex") = 0, "Get or create a sequence with specific name and specified index of available queues")
|
||||
.def("tensor", &kp::Manager::tensor,
|
||||
py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true,
|
||||
"Build and initialise tensor")
|
||||
.def("rebuild", py::overload_cast<std::vector<std::shared_ptr<kp::Tensor>>, bool>(&kp::Manager::rebuild),
|
||||
py::arg("tensors"), py::arg("syncDataToGPU") = true,
|
||||
"Build and initialise list of tensors")
|
||||
.def("rebuild", py::overload_cast<std::shared_ptr<kp::Tensor>, bool>(&kp::Manager::rebuild),
|
||||
py::arg("tensor"), py::arg("syncDataToGPU") = true,
|
||||
"Build and initialise tensor")
|
||||
|
||||
// Await functions
|
||||
|
|
@ -172,8 +175,6 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created")
|
||||
|
||||
// eval default
|
||||
.def("eval_tensor_create_def", &kp::Manager::evalOpDefault<kp::OpTensorCreate>,
|
||||
"Evaluates operation to create and initialise tensor GPU memory and buffer with new anonymous Sequence")
|
||||
.def("eval_tensor_copy_def", &kp::Manager::evalOpDefault<kp::OpTensorCopy>,
|
||||
"Evaluates operation to copy one tensor to one or many tensors with new anonymous Sequence")
|
||||
.def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncDevice>,
|
||||
|
|
@ -209,8 +210,6 @@ PYBIND11_MODULE(kp, m) {
|
|||
"Evaluates operation to run left right out operation with custom shader with new anonymous Sequence")
|
||||
|
||||
// eval
|
||||
.def("eval_tensor_create", &kp::Manager::evalOp<kp::OpTensorCreate>,
|
||||
"Evaluates operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
|
||||
.def("eval_tensor_copy", &kp::Manager::evalOp<kp::OpTensorCopy>,
|
||||
"Evaluates operation to copy one tensor to one or many tensors with explicitly named Sequence")
|
||||
.def("eval_tensor_sync_device", &kp::Manager::evalOp<kp::OpTensorSyncDevice>,
|
||||
|
|
@ -249,8 +248,6 @@ PYBIND11_MODULE(kp, m) {
|
|||
"Evaluates operation to run left right out operation with custom shader with explicitly named Sequence")
|
||||
|
||||
// eval async default
|
||||
.def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCreate>,
|
||||
"Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with anonymous Sequence")
|
||||
.def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCopy>,
|
||||
"Evaluates asynchronously operation to copy one tensor to one or many tensors with anonymous Sequence")
|
||||
.def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncDevice>,
|
||||
|
|
@ -286,8 +283,6 @@ PYBIND11_MODULE(kp, m) {
|
|||
"Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence")
|
||||
|
||||
// eval async
|
||||
.def("eval_async_tensor_create", &kp::Manager::evalOpAsync<kp::OpTensorCreate>,
|
||||
"Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
|
||||
.def("eval_async_tensor_copy", &kp::Manager::evalOpAsync<kp::OpTensorCopy>,
|
||||
"Evaluates asynchronously operation to copy one tensor to one or many tensors with explicitly named Sequence")
|
||||
.def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync<kp::OpTensorSyncDevice>,
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ def test_array_multiplication():
|
|||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
# 3. Initialise the Kompute Tensors in the GPU
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
# 4. Define the multiplication shader code to run on the GPU
|
||||
@ps.python2shader
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import os
|
|||
|
||||
import kp
|
||||
import numpy as np
|
||||
import logging
|
||||
|
||||
DIRNAME = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
|
@ -16,7 +17,7 @@ def test_opmult():
|
|||
|
||||
mgr = kp.Manager()
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
|
|
@ -41,7 +42,7 @@ def test_opalgobase_data():
|
|||
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
// The input tensors bind index is relative to index in parameter passed
|
||||
// The input rebuild bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer bina { float tina[]; };
|
||||
layout(set = 0, binding = 1) buffer binb { float tinb[]; };
|
||||
layout(set = 0, binding = 2) buffer bout { float tout[]; };
|
||||
|
|
@ -52,7 +53,7 @@ def test_opalgobase_data():
|
|||
}
|
||||
"""
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], shaderData)
|
||||
|
||||
|
|
@ -75,7 +76,7 @@ def test_opalgobase_file():
|
|||
|
||||
shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
|
||||
|
||||
|
|
@ -93,14 +94,14 @@ def test_sequence():
|
|||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
|
||||
mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
|
||||
|
||||
mgr.eval_await_def()
|
||||
|
||||
seq = mgr.create_sequence("op")
|
||||
seq = mgr.sequence("op")
|
||||
seq.begin()
|
||||
seq.record_tensor_sync_local([tensor_in_a])
|
||||
seq.record_tensor_sync_local([tensor_in_b])
|
||||
|
|
@ -118,32 +119,35 @@ def test_workgroup():
|
|||
|
||||
tensor_a = kp.Tensor(np.zeros([16,8]))
|
||||
tensor_b = kp.Tensor(np.zeros([16,8]))
|
||||
mgr.eval_tensor_create_def([tensor_a, tensor_b])
|
||||
|
||||
mgr.rebuild([tensor_a, tensor_b])
|
||||
|
||||
shader_src = """
|
||||
#version 450
|
||||
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
// The input tensors bind index is relative to index in parameter passed
|
||||
// The input rebuild bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) writeonly buffer bout { float toutx[]; };
|
||||
layout(set = 0, binding = 1) writeonly buffer bout2 { float touty[]; };
|
||||
|
||||
void main() {
|
||||
uint index = gl_WorkGroupID.x*gl_NumWorkGroups.y + gl_WorkGroupID.y;
|
||||
|
||||
|
||||
toutx[index] = gl_GlobalInvocationID.x;
|
||||
touty[index] = gl_GlobalInvocationID.y;
|
||||
}
|
||||
"""
|
||||
shader_src = bytes(shader_src, encoding='utf8')
|
||||
|
||||
seq = mgr.create_sequence()
|
||||
seq = mgr.sequence("new")
|
||||
seq.begin()
|
||||
seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1))
|
||||
seq.end()
|
||||
seq.eval()
|
||||
|
||||
|
||||
mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
|
||||
|
||||
assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel())
|
||||
assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel())
|
||||
|
||||
|
|
|
|||
|
|
@ -66,10 +66,10 @@ def test_logistic_regression():
|
|||
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
|
||||
tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
|
||||
|
||||
mgr.eval_tensor_create_def(params)
|
||||
mgr.rebuild(params)
|
||||
|
||||
# Create a managed sequence
|
||||
sq = mgr.create_sequence()
|
||||
sq = mgr.sequence()
|
||||
|
||||
# Clear previous operations and begin recording for new operations
|
||||
sq.begin()
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@
|
|||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
|
||||
#include "kompute/operations/OpMult.hpp"
|
||||
#include "kompute/operations/OpTensorCreate.hpp"
|
||||
#include "kompute/operations/OpTensorCopy.hpp"
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
#include "kompute/operations/OpTensorSyncLocal.hpp"
|
||||
|
|
|
|||
|
|
@ -697,6 +697,7 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4920;
|
|||
}
|
||||
#endif // define SHADEROP_SHADERLOGISTICREGRESSION_HPP
|
||||
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
#define KP_MAX_DIM_SIZE 1
|
||||
|
|
@ -723,7 +724,7 @@ class Tensor
|
|||
enum class TensorTypes
|
||||
{
|
||||
eDevice = 0, ///< Type is device memory, source and destination
|
||||
eHost = 1, ///< Type is host memory, source and destination
|
||||
eHost = 1, ///< Type is host memory, source and destination
|
||||
eStorage = 2, ///< Type is Device memory (only)
|
||||
};
|
||||
|
||||
|
|
@ -736,7 +737,8 @@ class Tensor
|
|||
* Default constructor with data provided which would be used to create the
|
||||
* respective vulkan buffer and memory.
|
||||
*
|
||||
* @param data Non-zero-sized vector of data that will be used by the tensor
|
||||
* @param data Non-zero-sized vector of data that will be used by the
|
||||
* tensor
|
||||
* @param tensorType Type for the tensor which is of type TensorTypes
|
||||
*/
|
||||
Tensor(const std::vector<float>& data,
|
||||
|
|
@ -829,24 +831,30 @@ class Tensor
|
|||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
|
||||
* Records a copy from the internal staging memory to the device memory
|
||||
* using an optional barrier to wait for the operation. This function would
|
||||
* only be relevant for kp::Tensors of type eDevice.
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFromStagingToDevice(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier);
|
||||
void recordCopyFromStagingToDevice(
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
|
||||
* Records a copy from the internal device memory to the staging memory
|
||||
* using an optional barrier to wait for the operation. This function would
|
||||
* only be relevant for kp::Tensors of type eDevice.
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFromDeviceToStaging(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier);
|
||||
void recordCopyFromDeviceToStaging(
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records the buffer memory barrier into the command buffer which
|
||||
|
|
@ -908,9 +916,17 @@ class Tensor
|
|||
bool mIsInit = false;
|
||||
|
||||
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
|
||||
void createBuffer(std::shared_ptr<vk::Buffer> buffer, vk::BufferUsageFlags bufferUsageFlags);
|
||||
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer, std::shared_ptr<vk::DeviceMemory> memory, vk::MemoryPropertyFlags memoryPropertyFlags);
|
||||
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer, std::shared_ptr<vk::Buffer> bufferFrom, std::shared_ptr<vk::Buffer> bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier);
|
||||
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
|
||||
vk::BufferUsageFlags bufferUsageFlags);
|
||||
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
|
||||
std::shared_ptr<vk::DeviceMemory> memory,
|
||||
vk::MemoryPropertyFlags memoryPropertyFlags);
|
||||
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier);
|
||||
|
||||
// Private util functions
|
||||
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
|
||||
|
|
@ -949,13 +965,11 @@ class OpBase
|
|||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Compute OpBase constructor with params");
|
||||
|
||||
|
|
@ -963,14 +977,12 @@ class OpBase
|
|||
this->mDevice = device;
|
||||
this->mCommandBuffer = commandBuffer;
|
||||
this->mTensors = tensors;
|
||||
this->mFreeTensors = freeTensors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default destructor for OpBase class. This OpBase destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
* intended to destroy the resources in the parent class. This can be done
|
||||
* by passing the mFreeTensors=false.
|
||||
* intended to destroy the resources in the parent class.
|
||||
*/
|
||||
virtual ~OpBase()
|
||||
{
|
||||
|
|
@ -1234,50 +1246,38 @@ class Sequence
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that creates tensor and manages the memory of the components
|
||||
created
|
||||
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
|
||||
*/
|
||||
class OpTensorCreate : public OpBase
|
||||
class OpTensorSyncDevice : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorCreate();
|
||||
OpTensorSyncDevice();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpTensorCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor which in this case expects the parent class to free
|
||||
* the tensors
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorCreate() override;
|
||||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* In charge of initialising the primary Tensor as well as the staging
|
||||
* tensor as required. It will only initialise a staging tensor if the
|
||||
* Primary tensor is of type Device. For staging tensors it performs a
|
||||
* mapDataIntoHostMemory which would perform immediately as opposed to
|
||||
* on sequence eval/submission.
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* Record runs the core actions to create the tensors. For device tensors
|
||||
* it records a copyCommand to move the data from the staging tensor to the
|
||||
* device tensor. The mapping for staging tensors happens in the init function
|
||||
* not in the record function.
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
|
|
@ -1287,8 +1287,7 @@ class OpTensorCreate : public OpBase
|
|||
virtual void preEval() override;
|
||||
|
||||
/**
|
||||
* Performs a copy back into the main tensor to ensure that the data
|
||||
* contained is the one that is now being stored in the GPU.
|
||||
* Does not perform any postEval commands.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
|
|
@ -1352,23 +1351,12 @@ class Manager
|
|||
*
|
||||
* @param sequenceName The name for the named sequence to be retrieved or
|
||||
* created
|
||||
* @param queueIndex The queue to use from the available queues
|
||||
* @return Shared pointer to the manager owned sequence resource
|
||||
*/
|
||||
std::shared_ptr<Sequence> getOrCreateManagedSequence(
|
||||
std::string sequenceName);
|
||||
|
||||
/**
|
||||
* Create a new managed Kompute sequence so it's available within the
|
||||
* manager.
|
||||
*
|
||||
* @param sequenceName The name for the named sequence to be created, if
|
||||
* empty then default indexed value is used
|
||||
* @param queueIndex The queue to use from the available queues
|
||||
* @return Weak pointer to the manager owned sequence resource
|
||||
*/
|
||||
std::shared_ptr<Sequence> createManagedSequence(
|
||||
std::string sequenceName = "",
|
||||
uint32_t queueIndex = 0);
|
||||
std::shared_ptr<Sequence> sequence(
|
||||
std::string sequenceName = KP_DEFAULT_SESSION,
|
||||
uint32_t queueIndex = 0);
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against named sequence.
|
||||
|
|
@ -1385,7 +1373,7 @@ class Manager
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp triggered");
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
this->getOrCreateManagedSequence(sequenceName);
|
||||
this->sequence(sequenceName);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
|
@ -1415,10 +1403,8 @@ class Manager
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->evalOp<T>(tensors,
|
||||
KP_DEFAULT_SESSION +
|
||||
std::to_string(this->mCurrentSequenceIndex),
|
||||
std::forward<TArgs>(params)...);
|
||||
this->evalOp<T>(
|
||||
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1437,7 +1423,7 @@ class Manager
|
|||
SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
this->getOrCreateManagedSequence(sequenceName);
|
||||
this->sequence(sequenceName);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
|
@ -1468,10 +1454,8 @@ class Manager
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->evalOpAsync<T>(tensors,
|
||||
KP_DEFAULT_SESSION +
|
||||
std::to_string(this->mCurrentSequenceIndex),
|
||||
std::forward<TArgs>(params)...);
|
||||
this->evalOpAsync<T>(
|
||||
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1512,36 +1496,98 @@ class Manager
|
|||
void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
|
||||
this->evalOpAwait(KP_DEFAULT_SESSION +
|
||||
std::to_string(this->mCurrentSequenceIndex),
|
||||
waitFor);
|
||||
this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor creation and
|
||||
* initialization. It will take the constructor parameters for a Tensor
|
||||
* and will will us it to create a new Tensor and then create it using
|
||||
* the OpCreateTensor command.
|
||||
* and will will us it to create a new Tensor and then create it. The
|
||||
* tensor memory will then be managed and owned by the manager.
|
||||
*
|
||||
* @param data The data to initialize the tensor with
|
||||
* @param tensorType The type of tensor to initialize
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
* @returns Initialized Tensor with memory Syncd to GPU device
|
||||
*/
|
||||
std::shared_ptr<Tensor> buildTensor(
|
||||
std::shared_ptr<Tensor> tensor(
|
||||
const std::vector<float>& data,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
|
||||
bool syncDataToGPU = true)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager createInitTensor triggered");
|
||||
SPDLOG_DEBUG("Kompute Manager tensor triggered");
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
|
||||
std::shared_ptr<Tensor> tensor =
|
||||
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
|
||||
|
||||
this->evalOpDefault<OpTensorCreate>({ tensor });
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
}
|
||||
this->mManagedTensors.insert(tensor);
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
* create a new Tensor. The tensor memory will then be managed and owned by
|
||||
* the manager.
|
||||
*
|
||||
* @param tensors Array of tensors to rebuild
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
* @returns Initialized Tensor with memory Syncd to GPU device
|
||||
*/
|
||||
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
bool syncDataToGPU = true)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager rebuild triggered");
|
||||
for (std::shared_ptr<Tensor> tensor : tensors) {
|
||||
|
||||
// False syncData to run all tensors at once instead one by one
|
||||
this->rebuild(tensor, false);
|
||||
}
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>(tensors);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
* create a new Tensor. The tensor memory will then be managed and owned by
|
||||
* the manager.
|
||||
*
|
||||
* @param tensors Single tensor to rebuild
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
* @returns Initialized Tensor with memory Syncd to GPU device
|
||||
*/
|
||||
void rebuild(std::shared_ptr<kp::Tensor> tensor,
|
||||
bool syncDataToGPU = true)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
|
||||
|
||||
if (tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
std::set<std::shared_ptr<Tensor>>::iterator it =
|
||||
this->mManagedTensors.find(tensor);
|
||||
if (it == this->mManagedTensors.end()) {
|
||||
this->mManagedTensors.insert(tensor);
|
||||
}
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
|
|
@ -1552,6 +1598,8 @@ class Manager
|
|||
bool mFreeDevice = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::set<std::shared_ptr<Tensor>> mManagedTensors;
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>
|
||||
mManagedSequences;
|
||||
|
||||
|
|
@ -1999,59 +2047,6 @@ class OpTensorCopy : public OpBase
|
|||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
|
||||
*/
|
||||
class OpTensorSyncDevice : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorSyncDevice();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
|
||||
/**
|
||||
* Does not perform any postEval commands.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
|
||||
*/
|
||||
|
|
|
|||
60
src/Manager.cpp
Executable file → Normal file
60
src/Manager.cpp
Executable file → Normal file
|
|
@ -68,6 +68,18 @@ Manager::~Manager()
|
|||
this->mManagedSequences.clear();
|
||||
}
|
||||
|
||||
if (this->mManagedTensors.size()) {
|
||||
SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors");
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
SPDLOG_ERROR("Kompute Manager attempted to free managed tensor "
|
||||
"but not tensor is not initialised");
|
||||
}
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
this->mManagedTensors.clear();
|
||||
}
|
||||
|
||||
if (this->mFreeDevice) {
|
||||
SPDLOG_INFO("Destroying device");
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -99,48 +111,34 @@ Manager::~Manager()
|
|||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Manager::getOrCreateManagedSequence(std::string sequenceName)
|
||||
Manager::sequence(std::string sequenceName, uint32_t queueIndex)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager creating Sequence object");
|
||||
SPDLOG_DEBUG("Kompute Manager sequence() with sequenceName: {} "
|
||||
"and queueIndex: {}",
|
||||
sequenceName,
|
||||
queueIndex);
|
||||
|
||||
std::shared_ptr<Sequence> sq = nullptr;
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
|
||||
this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found == this->mManagedSequences.end()) {
|
||||
return this->createManagedSequence(sequenceName);
|
||||
std::shared_ptr<Sequence> sq =
|
||||
std::make_shared<Sequence>(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]);
|
||||
sq->init();
|
||||
|
||||
this->mManagedSequences.insert({ sequenceName, sq });
|
||||
|
||||
return sq;
|
||||
} else {
|
||||
return found->second;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex)
|
||||
{
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager createManagedSequence with sequenceName: {} "
|
||||
"and queueIndex: {}",
|
||||
sequenceName,
|
||||
queueIndex);
|
||||
|
||||
std::shared_ptr<Sequence> sq =
|
||||
std::make_shared<Sequence>(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]);
|
||||
sq->init();
|
||||
|
||||
if (sequenceName.empty()) {
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->mManagedSequences.insert(
|
||||
{ KP_DEFAULT_SESSION + std::to_string(this->mCurrentSequenceIndex),
|
||||
sq });
|
||||
} else {
|
||||
// TODO: Check if sequence doesn't already exist
|
||||
this->mManagedSequences.insert({ sequenceName, sq });
|
||||
}
|
||||
return sq;
|
||||
}
|
||||
|
||||
void
|
||||
Manager::createInstance()
|
||||
{
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
KomputeWorkgroup komputeWorkgroup)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}",
|
||||
tensors.size());
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ OpTensorCopy::OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCopy constructor with params");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,76 +0,0 @@
|
|||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpTensorCreate.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorCreate::OpTensorCreate()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate constructor base");
|
||||
}
|
||||
|
||||
OpTensorCreate::OpTensorCreate(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, true)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate constructor with params");
|
||||
}
|
||||
|
||||
OpTensorCreate::~OpTensorCreate()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCreate called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (tensor->isInit()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCreate: Tensor has already been initialized");
|
||||
}
|
||||
if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
tensor->mapDataIntoHostMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate record called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(
|
||||
this->mCommandBuffer, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::preEval()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCreate::postEval()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate postEval called");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -15,7 +15,7 @@ OpTensorSyncDevice::OpTensorSyncDevice(
|
|||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ OpTensorSyncLocal::OpTensorSyncLocal(
|
|||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
this->mDevice = device;
|
||||
this->mComputeQueue = computeQueue;
|
||||
this->mQueueIndex = queueIndex;
|
||||
this->mIsInit = true;
|
||||
this->mIsInit = false;
|
||||
}
|
||||
|
||||
Sequence::~Sequence()
|
||||
|
|
@ -203,6 +203,8 @@ Sequence::isInit()
|
|||
void
|
||||
Sequence::freeMemoryDestroyGPUResources()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");
|
||||
|
||||
if (!this->mIsInit) {
|
||||
SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
|
||||
"but Sequence is not initialized so there's no relevant "
|
||||
|
|
|
|||
|
|
@ -229,8 +229,12 @@ Tensor::mapDataFromHostMemory()
|
|||
|
||||
if (this->mTensorType == TensorTypes::eHost) {
|
||||
hostVisibleMemory = this->mPrimaryMemory;
|
||||
} else {
|
||||
} else if (this->mTensorType == TensorTypes::eDevice) {
|
||||
hostVisibleMemory = this->mStagingMemory;
|
||||
} else {
|
||||
SPDLOG_WARN(
|
||||
"Kompute Tensor mapping data not supported on storage tensor");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
|
@ -252,8 +256,12 @@ Tensor::mapDataIntoHostMemory()
|
|||
|
||||
if (this->mTensorType == TensorTypes::eHost) {
|
||||
hostVisibleMemory = this->mPrimaryMemory;
|
||||
} else {
|
||||
} else if (this->mTensorType == TensorTypes::eDevice) {
|
||||
hostVisibleMemory = this->mStagingMemory;
|
||||
} else {
|
||||
SPDLOG_WARN(
|
||||
"Kompute Tensor mapping data not supported on storage tensor");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Sequence.hpp"
|
||||
|
||||
#include "kompute/operations/OpTensorCreate.hpp"
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
|
||||
#define KP_DEFAULT_SESSION "DEFAULT"
|
||||
|
||||
|
|
@ -63,23 +64,12 @@ class Manager
|
|||
*
|
||||
* @param sequenceName The name for the named sequence to be retrieved or
|
||||
* created
|
||||
* @param queueIndex The queue to use from the available queues
|
||||
* @return Shared pointer to the manager owned sequence resource
|
||||
*/
|
||||
std::shared_ptr<Sequence> getOrCreateManagedSequence(
|
||||
std::string sequenceName);
|
||||
|
||||
/**
|
||||
* Create a new managed Kompute sequence so it's available within the
|
||||
* manager.
|
||||
*
|
||||
* @param sequenceName The name for the named sequence to be created, if
|
||||
* empty then default indexed value is used
|
||||
* @param queueIndex The queue to use from the available queues
|
||||
* @return Weak pointer to the manager owned sequence resource
|
||||
*/
|
||||
std::shared_ptr<Sequence> createManagedSequence(
|
||||
std::string sequenceName = "",
|
||||
uint32_t queueIndex = 0);
|
||||
std::shared_ptr<Sequence> sequence(
|
||||
std::string sequenceName = KP_DEFAULT_SESSION,
|
||||
uint32_t queueIndex = 0);
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against named sequence.
|
||||
|
|
@ -96,7 +86,7 @@ class Manager
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp triggered");
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
this->getOrCreateManagedSequence(sequenceName);
|
||||
this->sequence(sequenceName);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
|
@ -126,10 +116,8 @@ class Manager
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->evalOp<T>(tensors,
|
||||
KP_DEFAULT_SESSION +
|
||||
std::to_string(this->mCurrentSequenceIndex),
|
||||
std::forward<TArgs>(params)...);
|
||||
this->evalOp<T>(
|
||||
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -148,7 +136,7 @@ class Manager
|
|||
SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
this->getOrCreateManagedSequence(sequenceName);
|
||||
this->sequence(sequenceName);
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
|
@ -179,10 +167,8 @@ class Manager
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->evalOpAsync<T>(tensors,
|
||||
KP_DEFAULT_SESSION +
|
||||
std::to_string(this->mCurrentSequenceIndex),
|
||||
std::forward<TArgs>(params)...);
|
||||
this->evalOpAsync<T>(
|
||||
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -223,36 +209,98 @@ class Manager
|
|||
void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
|
||||
this->evalOpAwait(KP_DEFAULT_SESSION +
|
||||
std::to_string(this->mCurrentSequenceIndex),
|
||||
waitFor);
|
||||
this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor creation and
|
||||
* initialization. It will take the constructor parameters for a Tensor
|
||||
* and will will us it to create a new Tensor and then create it using
|
||||
* the OpCreateTensor command.
|
||||
* and will will us it to create a new Tensor and then create it. The
|
||||
* tensor memory will then be managed and owned by the manager.
|
||||
*
|
||||
* @param data The data to initialize the tensor with
|
||||
* @param tensorType The type of tensor to initialize
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
* @returns Initialized Tensor with memory Syncd to GPU device
|
||||
*/
|
||||
std::shared_ptr<Tensor> buildTensor(
|
||||
std::shared_ptr<Tensor> tensor(
|
||||
const std::vector<float>& data,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
|
||||
bool syncDataToGPU = true)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager createInitTensor triggered");
|
||||
SPDLOG_DEBUG("Kompute Manager tensor triggered");
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
|
||||
std::shared_ptr<Tensor> tensor =
|
||||
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
|
||||
|
||||
this->evalOpDefault<OpTensorCreate>({ tensor });
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
}
|
||||
this->mManagedTensors.insert(tensor);
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
* create a new Tensor. The tensor memory will then be managed and owned by
|
||||
* the manager.
|
||||
*
|
||||
* @param tensors Array of tensors to rebuild
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
* @returns Initialized Tensor with memory Syncd to GPU device
|
||||
*/
|
||||
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
bool syncDataToGPU = true)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager rebuild triggered");
|
||||
for (std::shared_ptr<Tensor> tensor : tensors) {
|
||||
|
||||
// False syncData to run all tensors at once instead one by one
|
||||
this->rebuild(tensor, false);
|
||||
}
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>(tensors);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
* create a new Tensor. The tensor memory will then be managed and owned by
|
||||
* the manager.
|
||||
*
|
||||
* @param tensors Single tensor to rebuild
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
* @returns Initialized Tensor with memory Syncd to GPU device
|
||||
*/
|
||||
void rebuild(std::shared_ptr<kp::Tensor> tensor,
|
||||
bool syncDataToGPU = true)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
|
||||
|
||||
if (tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
std::set<std::shared_ptr<Tensor>>::iterator it =
|
||||
this->mManagedTensors.find(tensor);
|
||||
if (it == this->mManagedTensors.end()) {
|
||||
this->mManagedTensors.insert(tensor);
|
||||
}
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
|
|
@ -263,6 +311,8 @@ class Manager
|
|||
bool mFreeDevice = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::set<std::shared_ptr<Tensor>> mManagedTensors;
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>
|
||||
mManagedSequences;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,13 +31,11 @@ class OpBase
|
|||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Compute OpBase constructor with params");
|
||||
|
||||
|
|
@ -45,14 +43,12 @@ class OpBase
|
|||
this->mDevice = device;
|
||||
this->mCommandBuffer = commandBuffer;
|
||||
this->mTensors = tensors;
|
||||
this->mFreeTensors = freeTensors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default destructor for OpBase class. This OpBase destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
* intended to destroy the resources in the parent class. This can be done
|
||||
* by passing the mFreeTensors=false.
|
||||
* intended to destroy the resources in the parent class.
|
||||
*/
|
||||
virtual ~OpBase()
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,74 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that creates tensor and manages the memory of the components
|
||||
created
|
||||
*/
|
||||
class OpTensorCreate : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorCreate();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpTensorCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor which in this case expects the parent class to free
|
||||
* the tensors
|
||||
*/
|
||||
~OpTensorCreate() override;
|
||||
|
||||
/**
|
||||
* In charge of initialising the primary Tensor as well as the staging
|
||||
* tensor as required. It will only initialise a staging tensor if the
|
||||
* Primary tensor is of type Device. For staging tensors it performs a
|
||||
* mapDataIntoHostMemory which would perform immediately as opposed to
|
||||
* on sequence eval/submission.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* Record runs the core actions to create the tensors. For device tensors
|
||||
* it records a copyCommand to move the data from the staging tensor to the
|
||||
* device tensor. The mapping for staging tensors happens in the init function
|
||||
* not in the record function.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
|
||||
/**
|
||||
* Performs a copy back into the main tensor to ensure that the data
|
||||
* contained is the one that is now being stored in the GPU.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -49,7 +49,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
inputsSyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
|
||||
}
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>(inputsSyncB);
|
||||
mgr.rebuild(inputsSyncB);
|
||||
|
||||
auto startSync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
|
|
@ -77,10 +77,10 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
inputsAsyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
|
||||
}
|
||||
|
||||
mgrAsync.evalOpDefault<kp::OpTensorCreate>(inputsAsyncB);
|
||||
mgrAsync.rebuild(inputsAsyncB);
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgrAsync.createManagedSequence("async" + std::to_string(i), i);
|
||||
mgrAsync.sequence("async" + std::to_string(i), i);
|
||||
}
|
||||
|
||||
auto startAsync = std::chrono::high_resolution_clock::now();
|
||||
|
|
@ -146,10 +146,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
|
|||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };
|
||||
|
||||
mgr.createManagedSequence("asyncOne");
|
||||
mgr.createManagedSequence("asyncTwo");
|
||||
mgr.sequence("asyncOne");
|
||||
mgr.sequence("asyncTwo");
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
mgr.evalOpAsync<kp::OpAlgoBase>(
|
||||
{ tensorA }, "asyncOne", std::vector<char>(shader.begin(), shader.end()));
|
||||
|
|
|
|||
|
|
@ -32,14 +32,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
|
||||
mgr.rebuild(params);
|
||||
|
||||
sqTensor->begin();
|
||||
sqTensor->record<kp::OpTensorCreate>(params);
|
||||
sqTensor->end();
|
||||
sqTensor->eval();
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
|
|
@ -122,14 +117,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
|
||||
mgr.rebuild(params);
|
||||
|
||||
sqTensor->begin();
|
||||
sqTensor->record<kp::OpTensorCreate>(params);
|
||||
sqTensor->end();
|
||||
sqTensor->eval();
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
|
|
|
|||
|
|
@ -8,14 +8,14 @@ TEST(TestManager, EndToEndOpMultFlow)
|
|||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorLHS });
|
||||
mgr.rebuild({ tensorLHS });
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor({ 2, 4, 6 }) };
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorRHS });
|
||||
mgr.rebuild({ tensorRHS });
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorOutput });
|
||||
mgr.rebuild({ tensorOutput });
|
||||
|
||||
mgr.evalOpDefault<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
|
|
@ -36,15 +36,13 @@ TEST(TestManager, OpMultSequenceFlow)
|
|||
kp::Manager mgr;
|
||||
|
||||
{
|
||||
mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence");
|
||||
mgr.sequence("newSequence");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorCreate>({ tensorLHS });
|
||||
sq->record<kp::OpTensorCreate>({ tensorRHS });
|
||||
sq->record<kp::OpTensorCreate>({ tensorOutput });
|
||||
|
||||
sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
|
||||
|
|
@ -61,16 +59,16 @@ TEST(TestManager, TestMultipleSequences)
|
|||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqOne =
|
||||
mgr.getOrCreateManagedSequence("sqOne");
|
||||
mgr.sequence("sqOne");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTwo =
|
||||
mgr.getOrCreateManagedSequence("sqTwo");
|
||||
mgr.sequence("sqTwo");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqOneRef =
|
||||
mgr.getOrCreateManagedSequence("sqOne");
|
||||
mgr.sequence("sqOne");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTwoRef =
|
||||
mgr.getOrCreateManagedSequence("sqTwo");
|
||||
mgr.sequence("sqTwo");
|
||||
|
||||
EXPECT_EQ(sqOne, sqOneRef);
|
||||
EXPECT_NE(sqTwo, sqOneRef);
|
||||
|
|
@ -90,17 +88,17 @@ TEST(TestManager, TestMultipleTensorsAtOnce)
|
|||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence");
|
||||
mgr.sequence("newSequence");
|
||||
|
||||
{
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorCreate>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
EXPECT_TRUE(tensorLHS->isInit());
|
||||
EXPECT_TRUE(tensorRHS->isInit());
|
||||
EXPECT_TRUE(tensorOutput->isInit());
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
|
||||
|
|
@ -116,8 +114,8 @@ TEST(TestManager, TestCreateInitTensor)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.buildTensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.buildTensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
|
|
@ -126,7 +124,7 @@ TEST(TestManager, TestCreateInitTensor)
|
|||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorC =
|
||||
mgr.buildTensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
|
||||
mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorC });
|
||||
|
||||
|
|
|
|||
|
|
@ -19,14 +19,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence");
|
||||
mgr.sequence("newSequence");
|
||||
|
||||
{
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorCreate>({ tensorA });
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
|
|
@ -58,13 +58,15 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
|
||||
mgr.rebuild({ tensorA }, false);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
|
||||
std::shared_ptr<kp::Sequence> sqTensor = mgr.sequence();
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// First create the tensor in a separate sequence
|
||||
sqTensor->begin();
|
||||
sqTensor->record<kp::OpTensorCreate>({ tensorA });
|
||||
sqTensor->record<kp::OpTensorSyncDevice>({ tensorA });
|
||||
sqTensor->end();
|
||||
sqTensor->eval();
|
||||
|
||||
|
|
@ -111,24 +113,11 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorCreate>({ tensorA });
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence2");
|
||||
mgr.sequence("newSequence");
|
||||
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -141,7 +130,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
|||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence3");
|
||||
mgr.sequence("newSequence2");
|
||||
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -154,7 +143,20 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
|||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence5");
|
||||
mgr.sequence("newSequence3");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence5");
|
||||
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -183,13 +185,15 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
mgr.rebuild({ tensorA }, false);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence");
|
||||
mgr.sequence("newSequence");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorCreate>({ tensorA });
|
||||
sq->record<kp::OpTensorSyncDevice>({ tensorA });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
|
|
@ -197,7 +201,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
|||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence2");
|
||||
mgr.sequence("newSequence2");
|
||||
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -213,7 +217,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
|||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence3");
|
||||
mgr.sequence("newSequence3");
|
||||
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -238,7 +242,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
|
|||
std::shared_ptr<kp::Tensor> tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorInA, tensorInB, tensorOut });
|
||||
mgr.rebuild({ tensorInA, tensorInB, tensorOut });
|
||||
|
||||
std::string shader(R"(
|
||||
// The version to use
|
||||
|
|
@ -273,9 +277,12 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 });
|
||||
auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 });
|
||||
auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 });
|
||||
auto tensorInA = mgr.tensor(
|
||||
{ 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false);
|
||||
auto tensorInB = mgr.tensor(
|
||||
{ 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false);
|
||||
auto tensorOut = mgr.tensor(
|
||||
{ 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false);
|
||||
|
||||
std::string shader(R"(
|
||||
// The version to use
|
||||
|
|
@ -296,6 +303,9 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
|
|||
}
|
||||
)");
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncDevice>(
|
||||
{ tensorInA, tensorInB, tensorOut });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorInA, tensorInB, tensorOut },
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
|
@ -304,3 +314,39 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
|
|||
|
||||
EXPECT_EQ(tensorOut->data(), std::vector<float>({ 0.0, 4.0, 12.0 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
sq = mgr.sequence();
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, std::vector<char>(shader.begin(), shader.end()));
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,13 +30,15 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
|
|||
}
|
||||
)");
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB }, false);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("default");
|
||||
mgr.sequence("default");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
sq->record<kp::OpTensorSyncDevice>({ tensorA, tensorB });
|
||||
|
||||
sq->end();
|
||||
|
||||
|
|
@ -45,7 +47,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
|
|||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("run");
|
||||
mgr.sequence("run");
|
||||
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -63,7 +65,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
|
|||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("export");
|
||||
mgr.sequence("export");
|
||||
|
||||
sq->begin();
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
|
|||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -43,7 +43,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
|
|||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB },
|
||||
|
|
@ -65,7 +65,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile)
|
|||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp");
|
||||
|
|
@ -82,7 +82,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)
|
|||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");
|
||||
|
|
|
|||
|
|
@ -8,13 +8,13 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecA{ 1, 2, 3 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -33,7 +33,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecA{ 2, 3, 4 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
std::vector<float> testVecC{ 0, 0, 0 };
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
|
|||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor(testVecC) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC });
|
||||
mgr.rebuild({ tensorA, tensorB, tensorC });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -63,14 +63,17 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecA{ 3, 4, 5 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
|
||||
testVecB, kp::Tensor::TensorTypes::eHost) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB }, false);
|
||||
|
||||
// Only calling sync on device type tensor
|
||||
mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -89,14 +92,20 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecA{ 4, 5, 6 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
|
||||
testVecA, kp::Tensor::TensorTypes::eHost) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB }, false);
|
||||
|
||||
// Manually copy data into host memory of Tensor
|
||||
tensorA->mapDataIntoHostMemory();
|
||||
|
||||
// Only calling sync on device type tensor
|
||||
mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -115,7 +124,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecA{ 5, 6, 7 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
|
||||
|
|
@ -123,7 +132,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
|
|||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
|
||||
testVecB, kp::Tensor::TensorTypes::eHost) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -142,12 +151,12 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecA{ 6, 7, 8 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
|
||||
testVecA, kp::Tensor::TensorTypes::eHost) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
|
||||
mgr.rebuild({ tensorA }, false);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
|
|
|
|||
|
|
@ -5,20 +5,19 @@
|
|||
|
||||
TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
}
|
||||
|
||||
tensorA->freeMemoryDestroyGPUResources();
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +32,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp)
|
|||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -53,8 +52,8 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
|
|||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
|
||||
mgr.rebuild({ tensorA });
|
||||
mgr.rebuild({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -63,7 +62,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
|
|||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
|
||||
TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed)
|
||||
{
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
|
|
@ -74,8 +73,8 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
|
|||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
|
||||
mgr.rebuild({ tensorA });
|
||||
mgr.rebuild({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -88,6 +87,32 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
|
|||
EXPECT_FALSE(tensorB->isInit());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed)
|
||||
{
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
mgr.rebuild({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
|
||||
{
|
||||
|
||||
|
|
@ -99,8 +124,8 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
|
||||
mgr.rebuild({ tensorA });
|
||||
mgr.rebuild({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -123,7 +148,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
|
|||
kp::Manager mgr;
|
||||
|
||||
try {
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
|
||||
mgr.rebuild({ tensorA });
|
||||
} catch (const std::runtime_error& err) {
|
||||
// check exception
|
||||
ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
|
|||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecPreA) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
|
||||
mgr.rebuild({ tensorA }, false);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
|
|||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC });
|
||||
mgr.rebuild({ tensorA, tensorB, tensorC }, false);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ TEST(TestSequence, CmdBufSequenceBeginEnd)
|
|||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("newSequence");
|
||||
mgr.sequence("newSequence");
|
||||
|
||||
EXPECT_TRUE(sq->eval());
|
||||
EXPECT_TRUE(!sq->isRecording());
|
||||
|
|
@ -32,10 +32,11 @@ TEST(TestSequence, SequenceDestructorViaManager)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
sq = mgr.getOrCreateManagedSequence("newSequence");
|
||||
sq = mgr.sequence("newSequence");
|
||||
|
||||
EXPECT_TRUE(sq->isInit());
|
||||
}
|
||||
|
||||
EXPECT_FALSE(sq->isInit());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,11 +23,11 @@ TEST(TestTensor, CopyFromHostData)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.getOrCreateManagedSequence("new")) {
|
||||
sq->begin();
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
sq->record<kp::OpTensorCreate>({ tensorA, tensorB });
|
||||
if (std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("new")) {
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue