Initial checkpoint with reasonable workflow

This commit is contained in:
Alejandro Saucedo 2021-02-24 08:39:09 +00:00
parent 5db9abd06e
commit 9aae5d69db
46 changed files with 1158 additions and 695 deletions

1
.ccls
View file

@ -19,6 +19,7 @@
-I./external/googletest/googletest/include/
-I./external/glslang/
-I./external/spdlog/include/
-I./external/fmt/include/
-I./src/include/
-I./single_include/
-I./vk_ndk_wrapper_include/

View file

@ -52,7 +52,7 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
// Newer versions of Android are able to use shaderc to read raw string
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
params, kp::Shader::compile_source(LR_SHADER));
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });

View file

@ -37,7 +37,7 @@ int main()
}
)");
mgr.evalOpDefault<kp::OpAlgoBase>(
mgr.evalOpDefault<kp::OpAlgoCreate>(
{ tensorInA, tensorInB, tensorOut },
kp::Shader::compile_source(shader));

View file

@ -59,7 +59,7 @@ void KomputeSummatorNode::_init() {
{ this->mSecondaryTensor });
// Then we run the operation with both tensors
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ this->mPrimaryTensor, this->mSecondaryTensor },
kp::Shader::compile_source(shader));

View file

@ -56,7 +56,7 @@ void KomputeSummator::_init() {
{ this->mSecondaryTensor });
// Then we run the operation with both tensors
this->mSequence->record<kp::OpAlgoBase>(
this->mSequence->record<kp::OpAlgoCreate>(
{ this->mPrimaryTensor, this->mSecondaryTensor },
kp::Shader::compile_source(shader));

View file

@ -62,11 +62,11 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
// Newer versions of Android are able to use shaderc to read raw string
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
params, std::vector<char>(LR_SHADER.begin(), LR_SHADER.end()));
#else
// Older versions of Android require the SPIRV binary directly
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
params, std::vector<char>(
kp::shader_data::shaders_glsl_logisticregression_comp_spv,
kp::shader_data::shaders_glsl_logisticregression_comp_spv

View file

@ -66,11 +66,11 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
// Newer versions of Android are able to use shaderc to read raw string
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
params, std::vector<char>(LR_SHADER.begin(), LR_SHADER.end()));
#else
// Older versions of Android require the SPIRV binary directly
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
params, std::vector<char>(
kp::shader_data::shaders_glsl_logisticregression_comp_spv,
kp::shader_data::shaders_glsl_logisticregression_comp_spv

View file

@ -44,7 +44,7 @@ int main()
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
params, std::vector<uint32_t>(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv

View file

@ -266,23 +266,23 @@ The type of tensor to initialize @param syncDataToGPU Whether to sync
the data to GPU memory @returns Initialized Tensor with memory Syncd
to GPU device)doc";
static const char *__doc_kp_OpAlgoBase =
static const char *__doc_kp_OpAlgoCreate =
R"doc(Operation that provides a general abstraction that simplifies the use
of algorithm and parameter components which can be used with shaders.
By default it enables the user to provide a dynamic number of tensors
which are then passed as inputs.)doc";
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup = R"doc()doc";
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup = R"doc()doc";
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_x = R"doc()doc";
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_x = R"doc()doc";
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_y = R"doc()doc";
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_y = R"doc()doc";
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_z = R"doc()doc";
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_z = R"doc()doc";
static const char *__doc_kp_OpAlgoBase_OpAlgoBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_2 =
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_2 =
R"doc(Default constructor with parameters that provides the bare minimum
requirements for the operations to be able to create and manage their
sub-components.
@ -295,7 +295,7 @@ shaderFilePath Optional parameter to specify the shader to load
(either in spirv or raw format) @param komputeWorkgroup Optional
parameter to specify the layout for processing)doc";
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_3 =
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_3 =
R"doc(Constructor that enables a file to be passed to the operation with the
contents of the shader. This can be either in raw format or in
compiled SPIR-V binary format.
@ -308,7 +308,7 @@ shaderFilePath Parameter to specify the shader to load (either in
spirv or raw format) @param komputeWorkgroup Optional parameter to
specify the layout for processing)doc";
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_4 =
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_4 =
R"doc(Constructor that enables raw shader data to be passed to the main
operation which can be either in raw shader glsl code or in compiled
SPIR-V binary.
@ -321,37 +321,37 @@ shaderDataRaw Optional parameter to specify the shader data either in
binary or raw form @param komputeWorkgroup Optional parameter to
specify the layout for processing)doc";
static const char *__doc_kp_OpAlgoBase_fetchSpirvBinaryData = R"doc()doc";
static const char *__doc_kp_OpAlgoCreate_fetchSpirvBinaryData = R"doc()doc";
static const char *__doc_kp_OpAlgoBase_init =
static const char *__doc_kp_OpAlgoCreate_init =
R"doc(The init function is responsible for the initialisation of the
algorithm component based on the parameters specified, and allows for
extensibility on the options provided. Further dependent classes can
perform more specific checks such as ensuring tensors provided are
initialised, etc.)doc";
static const char *__doc_kp_OpAlgoBase_mAlgorithm = R"doc()doc";
static const char *__doc_kp_OpAlgoCreate_mAlgorithm = R"doc()doc";
static const char *__doc_kp_OpAlgoBase_mFreeAlgorithm = R"doc()doc";
static const char *__doc_kp_OpAlgoCreate_mFreeAlgorithm = R"doc()doc";
static const char *__doc_kp_OpAlgoBase_mKomputeWorkgroup = R"doc()doc";
static const char *__doc_kp_OpAlgoCreate_mKomputeWorkgroup = R"doc()doc";
static const char *__doc_kp_OpAlgoBase_mShaderDataRaw =
static const char *__doc_kp_OpAlgoCreate_mShaderDataRaw =
R"doc(< Optional member variable which can be provided to contain either the
raw shader content or the spirv binary content)doc";
static const char *__doc_kp_OpAlgoBase_mShaderFilePath =
R"doc(< Optional member variable which can be provided for the OpAlgoBase to
static const char *__doc_kp_OpAlgoCreate_mShaderFilePath =
R"doc(< Optional member variable which can be provided for the OpAlgoCreate to
find the data automatically and load for processing)doc";
static const char *__doc_kp_OpAlgoBase_postEval =
static const char *__doc_kp_OpAlgoCreate_postEval =
R"doc(Executes after the recorded commands are submitted, and performs a
copy of the GPU Device memory into the staging buffer so the output
data can be retrieved.)doc";
static const char *__doc_kp_OpAlgoBase_preEval = R"doc(Does not perform any preEval commands.)doc";
static const char *__doc_kp_OpAlgoCreate_preEval = R"doc(Does not perform any preEval commands.)doc";
static const char *__doc_kp_OpAlgoBase_record =
static const char *__doc_kp_OpAlgoCreate_record =
R"doc(This records the commands that are to be sent to the GPU. This
includes the barriers that ensure the memory has been copied before
going in and out of the shader, as well as the dispatch operation that

View file

@ -133,7 +133,7 @@ PYBIND11_MODULE(kp, m) {
.def("record_tensor_sync_local", &kp::Sequence::record<kp::OpTensorSyncLocal>,
"Records operation to sync tensor(s) from GPU memory to local memory")
.def("record_algo_file", &kp::Sequence::record<
kp::OpAlgoBase,
kp::OpAlgoCreate,
const std::string&,
kp::Workgroup,
kp::Constants>,
@ -148,7 +148,7 @@ PYBIND11_MODULE(kp, m) {
py::buffer_info info(py::buffer(bytes).request());
const char *data = reinterpret_cast<const char *>(info.ptr);
size_t length = static_cast<size_t>(info.size);
return self.record<kp::OpAlgoBase>(
return self.record<kp::OpAlgoCreate>(
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
},
"Records an operation using a custom shader provided as spirv bytes",
@ -211,7 +211,7 @@ PYBIND11_MODULE(kp, m) {
.def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncLocal>,
"Evaluates operation to sync tensor(s) from GPU memory to local memory with new anonymous Sequence")
.def("eval_algo_file_def", &kp::Manager::evalOpDefault<
kp::OpAlgoBase,
kp::OpAlgoCreate,
const std::string&,
kp::Workgroup,
kp::Constants>,
@ -226,7 +226,7 @@ PYBIND11_MODULE(kp, m) {
py::buffer_info info(py::buffer(bytes).request());
const char *data = reinterpret_cast<const char *>(info.ptr);
size_t length = static_cast<size_t>(info.size);
self.evalOpDefault<kp::OpAlgoBase>(
self.evalOpDefault<kp::OpAlgoCreate>(
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
},
"Evaluates an operation using a custom shader provided as spirv bytes with new anonymous Sequence",
@ -240,7 +240,7 @@ PYBIND11_MODULE(kp, m) {
.def("eval_tensor_sync_local", &kp::Manager::evalOp<kp::OpTensorSyncLocal>,
"Evaluates operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
.def("eval_algo_file", &kp::Manager::evalOp<
kp::OpAlgoBase,
kp::OpAlgoCreate,
const std::string&,
kp::Workgroup,
kp::Constants>,
@ -256,7 +256,7 @@ PYBIND11_MODULE(kp, m) {
py::buffer_info info(py::buffer(bytes).request());
const char *data = reinterpret_cast<const char *>(info.ptr);
size_t length = static_cast<size_t>(info.size);
self.evalOp<kp::OpAlgoBase>(
self.evalOp<kp::OpAlgoCreate>(
tensors, sequenceName, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
},
"Evaluates an operation using a custom shader provided as spirv bytes with explicitly named Sequence",
@ -270,7 +270,7 @@ PYBIND11_MODULE(kp, m) {
.def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncLocal>,
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with anonymous Sequence")
.def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault<
kp::OpAlgoBase,
kp::OpAlgoCreate,
const std::string&,
kp::Workgroup,
kp::Constants>,
@ -285,7 +285,7 @@ PYBIND11_MODULE(kp, m) {
py::buffer_info info(py::buffer(bytes).request());
const char *data = reinterpret_cast<const char *>(info.ptr);
size_t length = static_cast<size_t>(info.size);
self.evalOpAsyncDefault<kp::OpAlgoBase>(
self.evalOpAsyncDefault<kp::OpAlgoCreate>(
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
},
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence",
@ -299,7 +299,7 @@ PYBIND11_MODULE(kp, m) {
.def("eval_async_tensor_sync_local", &kp::Manager::evalOpAsync<kp::OpTensorSyncLocal>,
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
.def("eval_async_algo_file", &kp::Manager::evalOpAsync<
kp::OpAlgoBase,
kp::OpAlgoCreate,
const std::string&,
kp::Workgroup,
kp::Constants>,
@ -315,7 +315,7 @@ PYBIND11_MODULE(kp, m) {
py::buffer_info info(py::buffer(bytes).request());
const char *data = reinterpret_cast<const char *>(info.ptr);
size_t length = static_cast<size_t>(info.size);
self.evalOpAsync<kp::OpAlgoBase>(
self.evalOpAsync<kp::OpAlgoCreate>(
tensors, sequenceName, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
},
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence",

View file

@ -28,6 +28,424 @@ def test_opalgobase_file():
assert tensor_out.data() == [2.0, 4.0, 6.0]
params = [kp.Tensor([2, 2, 2]), kp.Tensor([1, 2, 3]), kp.Tensor([0, 0, 0])]
mgr = kp.Manager()
op_ct = kp.OpTensorCreate(params)
op_ct = mgr.rebuild(op_ct)
mgr.eval_op(op_ct)
algo = kp.Algo(params, spirv)
op_ac = kp.OpAlgoCreate(algo)
op_ac = mgr.rebuild(op_ac)
mgr.eval_op(op_ac)
op_ac = kp.OpAlgoCreate(kp.Algo(params, spirv))
mgr.eval_op(kp.OpAlgoCreate(algo))
mgr = kp.Manager()
op_ct = kp.OpTensorCreate(mgr, params) # This initialises operation
op_ct.eval()
algo = kp.Algo(params, spirv)
op_ac = kp.OpAlgoCreate(mgr, algo)
op_ct.eval()
op_tsd = kp.OpTensorSyncDevice(mgr, params)
op_ad = kp.OpAlgoDispatch(mgr, algo)
op_tsl = kp.OpTensorSyncLocal(mgr, params)
sq = kp.Sequence(mgr, "newSeq")
sq.record([op_tsd, op_ad, op_tsl])
sq.eval()
sq.destroy()
# Explore consistent interface:
op_tsd = kp.OpTensorSyncDevice(sq, params)
op_ad = kp.OpAlgoDispatch(sq, algo)
op_tsl = kp.OpTensorSyncLocal(sq, params)
op_tsd.record()
op_ad.record()
op_tsl.record()
sq.eval()
auto params = ...;
std::string shader = "...";
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
// Example passing mgr
kp::Manager mgr;
kp::OpTensorCreate op_tc(mgr, params);
op_tc.eval()
kp::Algorithm algo(params, spirv);
kp::OpAlgoCreate op_ac(mgr, algo);
op_ac.eval()
op_ac.destroy()
op_tc.destroy()
kp::OpTensorAlgoCreate op_c(mgr, params, algo);
op_c.eval()
kp::Sequence sq(mgr);
kp::OpTensorSyncDevice op_tsd(mgr, params);
kp::OpAlgoDispatch op_ad(mgr, algo);
kp::OpTensorSyncLocal op_tsl(mgr, params);
sq.record({op_tsd, op_ad, op_tsl})
for(...) {
sq.eval();
tensorA...
}
######
#######
#######
#######
#######
######
// Example not passing mgr
kp::Manager mgr;
std::shared_ptr<kp::OpTensorCreate> op_tc_1{ new kp::OpTensorCreate(params) };
auto sq_1 = mgr.eval(op_tc_1); // Initialises and stores op as part of new sequence
mgr.eval(op_tc_1); // Fails as this op can only be "initialised" once
mgr.destroy(op_tc_1);
mgr.eval(op_tc_1); // This works as it's a new setup
mgr.eval<kp::OpTensorCreate>(params); // Fails as tensors already created
// NOT ALLOED TO DELETE JUST TENSORS ANYMORE - SEE BELOW
mgr.destroy(params); // Sends to inconsistent state as op_tc_1 will still destroy these parameters
mgr.destroy(op_tc_1, recursive=false); // Destroys only operation, which is useful when you need to ensure another operation owns the parameters
auto op_tc_2 = mgr.eval<kp::OpTensorCreate>(params);
std::shared_ptr<kp::OpTensorCreate> op_tc_2{ new kp::OpTensorCreate(params) }; // fails as tensors already created
op_tc_2.destroy(); // Manager still holds dangling reference so requires explicit termination in manager
mgr.destroy(op_tc_2);
auto op_tc_3 = mgr.eval({ new kp::OpTensorCreate(params) });
std::shared_ptr<kp::Algorithm> algo{ new kp::Algorithm(params, spirv, kp::Workgroup(), kp::SpecConst(), kp::PushConst()) };
std::shared_ptr<kp::OpAlgoCreate> op_ac_1{ new kp::OpAlgoCreate(algo) };
mgr.eval(op_ac_1); // Initialises and stores op as part of manager
mgr.eval(op_ac_1); // Fails as this op can only be "initialised" once
mgr.destroy(op_ac_1);
std::shared_ptr<kp::OpAlgoCreate> op_ac_2 =
mgr.eval({ new kp::OpAlgoCreate(params, { new kp::Algorithm(spirv) }) });
std::shared_ptr<kp::OpAlgoMultCreate> op_amc{ new kp::OpAlgoMultCreate(params) };
mgr.eval(op_amc);
std::shared_ptr<kp::Algorithm> algo_mult = op_amc.algorithm()
std::vector<std::shared_ptr<kp::Tensor>> params = op_amc.tensors()
auto op_tsd = std::make_shared<kp::OpTensorSyncDevice>(params);
auto op_ad = std::make_shared<kp::OpAlgoSetPushConst>(algo);
auto op_ad = std::make_shared<kp::OpAlgoDispatch>(algo);
auto op_tsl = std::make_shared<kp::OpTensorSyncLocal>(mgr, params);
op_params = {op_tsd, op_ad, op_tsl};
mgr.record(op_params);
mgr.eval(); // Runs recorded default sequence
mgr.record(op_params, clear=false); // Non-create ops ok if rerun
mgr.eval(); // Runs twice the recorded paams
mgr.record("namedSeq", op_params);
mgr.eval("namedSeq");
kp::Manager mgrAsync(0, {0, 2});
mgr.sequence("namedSeq2", 0); // Create named sequence with queue in index 0
mgr.sequence("namedSeq3", 1);
mgr.eval_async("namedSeq2", op_params); // Clear, record params and eval
mgr.eval_async("namedSeq3", op_params); // Clear, record params and eval
mgr.eval_await("namedSeq2");
mgr.eval_await("namedSeq3");
mgr.destroy("namedSeq"); // Destroy named sequence
mgr.destroy({"namedSeq2", "namedSeq3"}); // Destroy multiple named sequences
mgr.destroy("namedSeq"); // Error
mgr = kp.Manager(0, [0, 2])
// Manager does not need to manage seq anymore
sq_1 = kp.Sequence(mgr, 0)
t1 = kp.Tensor(sq_1, [0, 0, 0])
t2 = kp.Tensor(sq_1, [0, 1, 2])
algo = kp.Algorithm(sq_1)
op_tc = kp.OpTensorCreate(sq_1, params)
op_tsd = kp.OpTensorSyncDevice(sq_1, params)
op_ac = kp.OpAlgoCreate(sq_1, algo)
op_ad = kp.OpAlgoDispatch(sq_1, algo)
sq_1.clear()
op_tc.record()
op_tsd.record()
op_ac.record()
op_ad.record()
op_ad.record()
op_ad.record()
sq_1.eval()
std::shared_ptr<kp::Manager> mgr = kp::ManagerSP(0, {0, 1});
std::shared_ptr<kp::Sequence> sq_2 = kp::SequenceSP(mgr, 1)
std::shared_ptr<kp::Tensor> t1 = kp::TensorSP(sq_2, {1, 2, 3});
std::shared_ptr<kp::Tensor> t2 = kp::TensorSP(sq_2, {2, 3, 4});
auto params = ...
std::shared_ptr<kp::Algorithm> algo2 = kp::AlgorithmSP(sq_2, params, spirv, workgroup);
// How do we deal with this?
{
auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
}
sq_2.eval()
// HEAP ONLY - This would fail
kp::Manager mgr = kp::Manager(0, {0, 1});
kp::Sequence sq_2 = kp::Sequence(mgr, 1)
kp::Tensor t1 = kp::Tensor(sq_2, {1, 2, 3});
kp::Tensor t2 = kp::Tensor(sq_2, {2, 3, 4});
auto params = ...
kp::Algorithm algo2 = kp::AlgorithmSP(sq_2, params, spirv, workgroup);
// How do we deal with this?
{
auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
}
sq_2.eval()
kp::Manager mgr = kp::Manager(0, {0, 1});
kp::Sequence sq_2 = kp::Sequence(mgr, 1)
kp::Tensor t1 = kp::Tensor(sq_2, {1, 2, 3});
kp::Tensor t2 = kp::Tensor(sq_2, {2, 3, 4});
auto params = ...
kp::Algorithm* algo2 = new kp::Algorithm(sq_2, params, spirv, workgroup);
// How do we deal with this?
{
auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
}
sq_2.eval()
kp::Manager mgr = kp::Manager;
auto sq_2 = mgr.sequence()
{
// What if we want to use tensor in a different sequence?
auto t1 = sq_2.tensor({1, 2, 3});
auto t2 = sq_2.tensor({1, 2, 3});
auto algo2 = sq_2.algorithm();
sq_2.record(kp::OpTensorRebuild({ t1 }))
sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv))
sq_2.record(kp::OpTensorSyncDevice(prams))
sq_2.record(kp::OpAlgoDispatch(prams, algo2))
}
sq_2.eval()
kp::Manager mgr = kp::Manager;
auto t1 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared
auto t2 = mgr.tensor({1, 2, 3});
auto algo2 = mgr.algorithm();
{
auto sq_2 = mgr.sequence()
{
sq_2.record(kp::OpTensorRebuild({ t1 })) // record only supports move operator &&
sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv))
sq_2.record(kp::OpTensorSyncDevice(prams))
sq_2.record(kp::OpAlgoDispatch(prams, algo2))
}
sq_2.eval()
}
// What about only tensors being init with it
{
kp::Manager mgr = kp::Manager;
auto t0 = mgr.tensor({0, 0, 0})
{
auto t1 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared (refc 1)
{
auto sq_2 = mgr.sequence()
{
auto t2 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared (refc 1)
auto algo2 = mgr.algorithm(); // Held as weak ptr but passed as shared (refc 1)
params = {t1, t2}
sq_2.record(kp::OpTensorRebuild(params, {1, 2, 3, 4})) // Refc is now 2 for 3 for params
sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv)) // refc is now 2 for algo2, 3 for parms
sq_2.record(kp::OpTensorSyncDevice(prams)) // refc for params 4
sq_2.record(kp::OpAlgoDispatch(prams, algo2)) // refc for params 5, 3 for algo2
}
sq_2.eval() // all refcs stil valid
} // seq destroyed so refc for algo2 and t2 drops to 0, gets destroyed, t1 has 1
} // t1 refc drops to 0, gets destroyed
// refc of t0 is still 1
mgr.gc() // Iterates through all tensor, sequence and algo weak_ptr and removes unused
// can we have something like
mgr.sequence()
.record(kp::OpTensorRebuild(params, {1, 2, 3, 4}))
.record(kp::OpAlgoDispatch(params, algo2))
.eval();
}// refc is destroyed by manager manually, the rest are empty shells so ignored
kp::Manager mgr = kp::Manager(0, {0, 1});
std::shared_ptr<kp::Tensor> t1 = mgr.tensor({1, 2, 3});
std::shared_ptr<kp::Tensor> t2 = mgr.tensor({1, 2, 3});
auto params = ...
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm(params, spirv, workgroup);
sq_2.record<kp::OpTensorSyncDevice>(prams)
sq_2.record<kp::OpAlgoDispatch>(algo)
// WHY NO MORE DETROY TENSORS:
* std::shared_ptr<kp::OpTensorCreate> op_tc1{ kp::OpTensorCreate(params) };
* {
* std::shared_ptr<kp::OpTensorCreate> op_tc2{ kp::OpTensorCreate(params) };
* mgr.eval(op_tc2);
* mgr.destroy(params);
*
* mgr.eval(op_tc1);
*
* } // op_tc1 is destroyed and all parameters are freed
// NO LONGER ALLOWED: Mainly as manager now needs to regsiter ops
// If we still want it, then sequence wil have to hold ref to manager
auto sq = mgr.sequence();
auto op_tsd = std::make_shared<kp::OpTensorSyncDevice>(params);
auto op_ad = std::make_shared<kp::OpAlgoDispatch>(algo);
auto op_tsl = std::make_shared<kp::OpTensorSyncLocal>(mgr, params);
sq.record({op_tsd, op_ad, op_tsl}); // Clear and record
sq.eval();
sq.record({op_tsd, op_ad, op_tsl}, clear=false); // record on top
sq.eval();
sq.clear(); // explicitly clear
mgr = kp.Manager()
op_ct = kp.OpTensorCreate(params)
mgr.eval(op_ct)
algo = kp.Algo(params, spirv)
op_ac = kp.OpAlgoCreate(algo)
mgr.eval(op_ac) # Runs init on operator function (below shows explicit steps)
op_tsd = kp.OpTensorSyncDevice(params)
op_ad = kp.OpAlgoDispatch(algo)
op_tsl = kp.OpTensorSyncLocal(params)
sq = mgr.sequence()
sq.record([op_tsd, op_ad, op_tsl])
sq.eval()
sq.eval()
sq.eval()
mgr.eval(op_ac) # Would fail as algo is initialised
mgr.destroy(op_ac) # Destroys Op and Algo owned object
mgr.eval(op_ac) # Succeeds with new
mgr.destroy(op_ac)
mgr.init(op_ac)
mgr.eval(op_ac, init=False)
def test_shader_str():
"""
Test basic OpAlgoBase operation

View file

@ -6,7 +6,7 @@
#include "kompute/Manager.hpp"
#include "kompute/Sequence.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/operations/OpAlgoBase.hpp"
#include "kompute/operations/OpAlgoCreate.hpp"
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
#include "kompute/operations/OpMult.hpp"
#include "kompute/operations/OpTensorCopy.hpp"

View file

@ -1893,14 +1893,14 @@ namespace kp {
* By default it enables the user to provide a dynamic number of tensors
* which are then passed as inputs.
*/
class OpAlgoBase : public OpBase
class OpAlgoCreate : public OpBase
{
public:
/**
* Base constructor, should not be used unless explicitly intended.
*/
OpAlgoBase();
OpAlgoCreate();
/**
* Default constructor with parameters that provides the bare minimum
@ -1914,7 +1914,7 @@ class OpAlgoBase : public OpBase
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
OpAlgoCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
@ -1933,7 +1933,7 @@ class OpAlgoBase : public OpBase
* @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
OpAlgoCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
@ -1952,7 +1952,7 @@ class OpAlgoBase : public OpBase
* @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
OpAlgoCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
@ -1964,7 +1964,7 @@ class OpAlgoBase : public OpBase
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
virtual ~OpAlgoBase() override;
virtual ~OpAlgoCreate() override;
/**
* The init function is responsible for the initialisation of the algorithm
@ -2005,9 +2005,9 @@ class OpAlgoBase : public OpBase
// -------------- ALWAYS OWNED RESOURCES
Workgroup mKomputeWorkgroup;
Workgroup mWorkgroup;
std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoCreate to find the data automatically and load for processing
std::vector<uint32_t> mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content
virtual std::vector<uint32_t> fetchSpirvBinaryData();
@ -2024,7 +2024,7 @@ namespace kp {
* right hand and left hand side datapoints together with a single output.
* The expected data passed is two input tensors and one output tensor.
*/
class OpAlgoLhsRhsOut : public OpAlgoBase
class OpAlgoLhsRhsOut : public OpAlgoCreate
{
public:
/**
@ -2102,7 +2102,7 @@ namespace kp {
* Operation that performs multiplication on two tensors and outpus on third
* tensor.
*/
class OpMult : public OpAlgoBase
class OpMult : public OpAlgoCreate
{
public:
/**
@ -2128,7 +2128,7 @@ class OpMult : public OpAlgoBase
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors,
const Workgroup& komputeWorkgroup = {})
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
: OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
{
KP_LOG_DEBUG("Kompute OpMult constructor with params");

View file

@ -4,36 +4,62 @@
namespace kp {
Algorithm::Algorithm()
{
KP_LOG_DEBUG("Kompute Algorithm base constructor");
}
Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
const Constants& specializationConstants)
Algorithm::Algorithm(
std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants,
const Constants& pushConstants)
{
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
this->mDevice = device;
this->mCommandBuffer = commandBuffer;
this->mSpecializationConstants = specializationConstants;
this->setWorkgroup(workgroup);
this->mPushConstants = pushConstants;
this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants);
}
Algorithm::~Algorithm()
{
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
this->freeMemoryDestroyGPUResources();
}
void
Algorithm::rebuild(
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants,
const Constants& pushConstants)
{
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
// Descriptor pool is created first so if available then destroy all before rebuild
if (this->mFreeDescriptorPool) {
this->freeMemoryDestroyGPUResources();
}
this->createParameters(tensors);
this->createShaderModule();
this->createPipeline();
}
void
Algorithm::freeMemoryDestroyGPUResources() {
if (!this->mDevice) {
KP_LOG_ERROR(
"Kompute Algorithm destructor reached with null Device pointer");
KP_LOG_WARN(
"Kompute Algorithm destroy function reached with null Device pointer");
return;
}
if (this->mFreePipeline) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
if (!this->mPipeline) {
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline but it is null");
}
this->mDevice->destroy(
@ -44,7 +70,7 @@ Algorithm::~Algorithm()
if (this->mFreePipelineCache) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
if (!this->mPipelineCache) {
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline cache but it is null");
}
this->mDevice->destroy(
@ -55,7 +81,7 @@ Algorithm::~Algorithm()
if (this->mFreePipelineLayout) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
if (!this->mPipelineLayout) {
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline layout but it is null");
}
this->mDevice->destroy(
@ -66,7 +92,7 @@ Algorithm::~Algorithm()
if (this->mFreeShaderModule) {
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
if (!this->mShaderModule) {
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy shader "
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
"module but it is null");
}
this->mDevice->destroy(
@ -77,7 +103,7 @@ Algorithm::~Algorithm()
if (this->mFreeDescriptorSet) {
KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set");
if (!this->mDescriptorSet) {
KP_LOG_ERROR(
KP_LOG_WARN(
"Kompute Algorithm Error requested to free descriptor set");
}
this->mDevice->freeDescriptorSets(
@ -87,7 +113,7 @@ Algorithm::~Algorithm()
if (this->mFreeDescriptorSetLayout) {
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
if (!this->mDescriptorSetLayout) {
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"descriptor set layout but it is null");
}
this->mDevice->destroy(
@ -98,7 +124,7 @@ Algorithm::~Algorithm()
if (this->mFreeDescriptorPool) {
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool");
if (!this->mDescriptorPool) {
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"descriptor pool but it is null");
}
this->mDevice->destroy(
@ -108,27 +134,7 @@ Algorithm::~Algorithm()
}
void
Algorithm::init(const std::vector<uint32_t>& shaderFileData,
std::vector<std::shared_ptr<Tensor>> tensorParams)
{
KP_LOG_DEBUG("Kompute Algorithm init started");
this->createParameters(tensorParams);
this->createShaderModule(shaderFileData);
for (std::shared_ptr<Tensor> tensor : tensorParams) {
this->mSpecializationConstants.push_back(tensor->size());
}
this->createPipeline();
}
void
Algorithm::createDescriptorPool()
{}
void
Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
Algorithm::createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams)
{
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
@ -207,17 +213,17 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
}
void
Algorithm::createShaderModule(const std::vector<uint32_t>& shaderFileData)
Algorithm::createShaderModule()
{
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
vk::ShaderModuleCreateInfo shaderModuleInfo(
vk::ShaderModuleCreateFlags(),
sizeof(uint32_t) * shaderFileData.size(),
shaderFileData.data());
sizeof(uint32_t) * this->mSpirv.size(),
this->mSpirv.data());
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
shaderFileData.size());
this->mSpirv.size());
this->mFreeShaderModule = true;
this->mShaderModule = std::make_shared<vk::ShaderModule>();
this->mDevice->createShaderModule(
@ -300,21 +306,42 @@ Algorithm::createPipeline()
}
void
Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z)
Algorithm::recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute Algorithm calling record dispatch");
this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute,
commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute,
*this->mPipeline);
this->mCommandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute,
commandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute,
*this->mPipelineLayout,
0, // First set
*this->mDescriptorSet,
nullptr // Dispatcher
);
this->mCommandBuffer->dispatch(x, y, z);
commandBuffer->dispatch(this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
}
void
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (workgroup[0] > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mWorkgroup = {
workgroup[0],
workgroup[1] > 0 ? workgroup[1] : 1,
workgroup[2] > 0 ? workgroup[2] : 1
};
} else {
this->mWorkgroup = { minSize, 1, 1 };
}
KP_LOG_INFO("Kompute OpAlgoCreate dispatch size X: {}, Y: {}, Z: {}",
this->mWorkgroup[0],
this->mWorkgroup[1],
this->mWorkgroup[2]);
}
}

View file

@ -61,21 +61,30 @@ Manager::~Manager()
if (this->mManagedSequences.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly running destructor for "
"managed sequences");
for (const std::pair<std::string, std::shared_ptr<Sequence>>& sqPair :
this->mManagedSequences) {
sqPair.second->freeMemoryDestroyGPUResources();
for (const std::weak_ptr<Sequence>& weakSq : this->mManagedSequences) {
if (std::shared_ptr<Sequence> sq = weakSq.lock()) {
sq->freeMemoryDestroyGPUResources();
}
}
this->mManagedSequences.clear();
}
if (this->mManagedAlgorithms.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
for (const std::weak_ptr<Algorithm>& weakAlgorithm : this->mManagedAlgorithms) {
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
algorithm->freeMemoryDestroyGPUResources();
}
}
this->mManagedTensors.clear();
}
if (this->mManagedTensors.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
if (!tensor->isInit()) {
KP_LOG_ERROR("Kompute Manager attempted to free managed tensor "
"but not tensor is not initialised");
for (const std::weak_ptr<Tensor>& weakTensor : this->mManagedTensors) {
if (std::shared_ptr<Tensor> tensor = weakTensor.lock()) {
tensor->freeMemoryDestroyGPUResources();
}
tensor->freeMemoryDestroyGPUResources();
}
this->mManagedTensors.clear();
}
@ -111,32 +120,21 @@ Manager::~Manager()
}
std::shared_ptr<Sequence>
Manager::sequence(std::string sequenceName, uint32_t queueIndex)
Manager::sequence(uint32_t queueIndex)
{
KP_LOG_DEBUG("Kompute Manager sequence() with sequenceName: {} "
"and queueIndex: {}",
sequenceName,
queueIndex);
std::shared_ptr<Sequence> sq = nullptr;
std::shared_ptr<Sequence> sq =
std::make_shared<Sequence>(this->mPhysicalDevice,
this->mDevice,
this->mComputeQueues[queueIndex],
this->mComputeQueueFamilyIndices[queueIndex]);
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
this->mManagedSequences.find(sequenceName);
this->mManagedSequences.insert(sq);
if (found == this->mManagedSequences.end()) {
std::shared_ptr<Sequence> sq =
std::make_shared<Sequence>(this->mPhysicalDevice,
this->mDevice,
this->mComputeQueues[queueIndex],
this->mComputeQueueFamilyIndices[queueIndex]);
sq->init();
this->mManagedSequences.insert({ sequenceName, sq });
return sq;
} else {
return found->second;
}
return sq;
}
void
@ -334,13 +332,10 @@ Manager::tensor(
Tensor::TensorTypes tensorType,
bool syncDataToGPU)
{
KP_LOG_DEBUG("Kompute Manager tensor triggered");
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
KP_LOG_DEBUG("Kompute Manager creating new tensor shared ptr");
std::shared_ptr<Tensor> tensor =
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
tensor->init(this->mPhysicalDevice, this->mDevice);
std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>(
kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType));
if (syncDataToGPU) {
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
@ -349,6 +344,29 @@ Manager::tensor(
return tensor;
}
std::shared_ptr<Algorithm>
Manager::algorithm(
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants,
const Constants& pushConstants) {
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
std::shared_ptr<Algorithm> algorithm = std::make_shared<Algorithm>(
kp::Algorithm(
this->mDevice,
tensors,
spirv,
workgroup,
specializationConstants,
pushConstants));
this->mManagedAlgorithms.insert(algorithm);
return algorithm;
}
void
Manager::rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,

View file

@ -1,176 +0,0 @@
#pragma once
#include "kompute/operations/OpAlgoBase.hpp"
namespace kp {
OpAlgoBase::OpAlgoBase()
{
KP_LOG_DEBUG("Kompute OpAlgoBase constructor base");
}
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
const Workgroup& komputeWorkgroup,
const Constants& specializationConstants)
: OpBase(physicalDevice, device, commandBuffer, tensors)
{
KP_LOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}",
tensors.size());
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (komputeWorkgroup[0] > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mKomputeWorkgroup = {
komputeWorkgroup[0],
komputeWorkgroup[1] > 0 ? komputeWorkgroup[1] : 1,
komputeWorkgroup[2] > 0 ? komputeWorkgroup[2] : 1
};
} else {
this->mKomputeWorkgroup = { tensors[0]->size(), 1, 1 };
}
KP_LOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
this->mKomputeWorkgroup[0],
this->mKomputeWorkgroup[1],
this->mKomputeWorkgroup[2]);
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer, specializationConstants);
}
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
std::string shaderFilePath,
const Workgroup& komputeWorkgroup,
const Constants& specializationConstants)
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants)
{
KP_LOG_DEBUG(
"Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}",
shaderFilePath);
this->mShaderFilePath = shaderFilePath;
}
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& shaderDataRaw,
const Workgroup& komputeWorkgroup,
const Constants& specializationConstants)
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants)
{
KP_LOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw "
"data length: {}",
shaderDataRaw.size());
this->mShaderDataRaw = shaderDataRaw;
}
OpAlgoBase::~OpAlgoBase()
{
KP_LOG_DEBUG("Kompute OpAlgoBase destructor started");
}
void
OpAlgoBase::init()
{
KP_LOG_DEBUG("Kompute OpAlgoBase init called");
if (this->mTensors.size() < 1) {
throw std::runtime_error(
"Kompute OpAlgoBase called with less than 1 tensor");
}
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
if (!tensor->isInit()) {
throw std::runtime_error(
"Kompute OpAlgoBase validation failed; all tensor parameters "
"must be initialised.");
}
}
KP_LOG_DEBUG("Kompute OpAlgoBase fetching spirv data");
std::vector<uint32_t> shaderFileData = this->fetchSpirvBinaryData();
KP_LOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component");
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
void
OpAlgoBase::record()
{
KP_LOG_DEBUG("Kompute OpAlgoBase record called");
// Barrier to ensure the data is finished writing to buffer memory
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
tensor->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
}
this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup[0],
this->mKomputeWorkgroup[1],
this->mKomputeWorkgroup[2]);
}
void
OpAlgoBase::preEval()
{
KP_LOG_DEBUG("Kompute OpAlgoBase preEval called");
}
void
OpAlgoBase::postEval()
{
KP_LOG_DEBUG("Kompute OpAlgoBase postSubmit called");
}
std::vector<uint32_t>
OpAlgoBase::fetchSpirvBinaryData()
{
KP_LOG_DEBUG("Kompute OpAlgoBase Running fetchSpirvBinaryData");
if (this->mShaderFilePath.size()) {
KP_LOG_DEBUG("Kompute OpAlgoBase Reading data from file path");
std::ifstream fileStream(this->mShaderFilePath,
std::ios::binary | std::ios::in |
std::ios::ate);
if (!fileStream.good()) {
throw std::runtime_error("Error reading file: " +
this->mShaderFilePath);
}
size_t shaderFileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
char* shaderDataRaw = new char[shaderFileSize];
fileStream.read(shaderDataRaw, shaderFileSize);
fileStream.close();
KP_LOG_WARN("Kompute OpAlgoBase fetched {} bytes", shaderFileSize);
return std::vector<uint32_t>((uint32_t*)shaderDataRaw, (uint32_t*)(shaderDataRaw + shaderFileSize));
} else if (this->mShaderDataRaw.size()) {
KP_LOG_DEBUG("Kompute OpAlgoBase Reading data from data provided");
return this->mShaderDataRaw;
} else {
throw std::runtime_error(
"Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither "
"filepath nor data provided");
}
}
}

51
src/OpAlgoCreate.cpp Normal file
View file

@ -0,0 +1,51 @@
#pragma once
#include "kompute/operations/OpAlgoCreate.hpp"
namespace kp {
OpAlgoCreate::OpAlgoCreate(std::vector<std::shared_ptr<Tensor>> tensors,
std::shared_ptr<Algorithm> algorithm)
: OpBase(tensors, algorithm)
{
KP_LOG_DEBUG("Kompute OpAlgoCreate constructor");
this->mManagesAlgorithm = true;
this->mManagesTensors = false;
}
OpAlgoCreate::~OpAlgoCreate()
{
KP_LOG_DEBUG("Kompute OpAlgoCreate destructor started");
}
void
OpAlgoCreate::init(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) {
KP_LOG_DEBUG("Kompute OpAlgoCreate init started");
// Explicitly calling top level function to create algo
OpBase::init(physicalDevice, device);
}
void
OpAlgoCreate::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute OpAlgoCreate record called");
}
void
OpAlgoCreate::preEval()
{
KP_LOG_DEBUG("Kompute OpAlgoCreate preEval called");
}
void
OpAlgoCreate::postEval()
{
KP_LOG_DEBUG("Kompute OpAlgoCreate postSubmit called");
}
}

59
src/OpAlgoDispatch.cpp Normal file
View file

@ -0,0 +1,59 @@
#pragma once
#include "kompute/operations/OpAlgoDispatch.hpp"
namespace kp {
OpAlgoDispatch::OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>> tensors,
std::shared_ptr<Algorithm> algorithm)
: OpBase(tensors, algorithm)
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
this->mManagesAlgorithm = false;
this->mManagesTensors = false;
}
OpAlgoDispatch::~OpAlgoDispatch()
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
}
void
OpAlgoDispatch::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch init called");
}
void
OpAlgoDispatch::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");
// Barrier to ensure the data is finished writing to buffer memory
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
tensor->recordBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
}
this->mAlgorithm->recordDispatch(commandBuffer);
}
void
OpAlgoDispatch::preEval()
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch preEval called");
}
void
OpAlgoDispatch::postEval()
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch postSubmit called");
}
}

View file

@ -10,15 +10,12 @@ OpAlgoLhsRhsOut::OpAlgoLhsRhsOut()
}
OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors,
const Workgroup& komputeWorkgroup)
std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<Algorithm> algorithm)
// The inheritance is initialised with the copyOutputData to false given that
// this depencendant class handles the transfer of data via staging buffers in
// a granular way.
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup)
: OpAlgoCreate(tensors, algorithm)
{
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
}
@ -29,7 +26,8 @@ OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut()
}
void
OpAlgoLhsRhsOut::init()
OpAlgoLhsRhsOut::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
{
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
@ -70,12 +68,10 @@ OpAlgoLhsRhsOut::init()
std::vector<uint32_t> shaderFileData = this->fetchSpirvBinaryData();
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
void
OpAlgoLhsRhsOut::record()
OpAlgoLhsRhsOut::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");

View file

@ -3,18 +3,13 @@
namespace kp {
OpTensorCopy::OpTensorCopy()
{
KP_LOG_DEBUG("Kompute OpTensorCopy constructor base");
}
OpTensorCopy::OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors)
OpTensorCopy::OpTensorCopy(std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(tensors, nullptr)
{
KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params");
this->mManagesTensors = false;
this->mManagesAlgorithm = false;
}
OpTensorCopy::~OpTensorCopy()
@ -23,7 +18,8 @@ OpTensorCopy::~OpTensorCopy()
}
void
OpTensorCopy::init()
OpTensorCopy::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
{
KP_LOG_DEBUG("Kompute OpTensorCopy init called");
@ -46,14 +42,14 @@ OpTensorCopy::init()
}
void
OpTensorCopy::record()
OpTensorCopy::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorCopy record called");
// We iterate from the second tensor onwards and record a copy to all
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->recordCopyFrom(
this->mCommandBuffer, this->mTensors[0], false);
commandBuffer, this->mTensors[0], false);
}
}

46
src/OpTensorCreate.cpp Normal file
View file

@ -0,0 +1,46 @@
#include "kompute/operations/OpTensorCreate.hpp"
namespace kp {
OpTensorCreate::OpTensorCreate(
std::vector<std::shared_ptr<Tensor>>& tensors)
: OpBase(tensors, nullptr)
{
KP_LOG_DEBUG("Compute OpTensorCreate constructor with params");
this->mManagesTensors = true;
}
OpTensorCreate::~OpTensorCreate()
{
KP_LOG_DEBUG("Kompute OpTensorCreate destructor started");
}
void
OpTensorCreate::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
{
KP_LOG_DEBUG("Kompute OpTensorCreate init called");
OpBase::init(physicalDevice, device);
}
void
OpTensorCreate::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorCreate record called");
}
void
OpTensorCreate::preEval()
{
KP_LOG_DEBUG("Kompute OpTensorCreate preEval called");
}
void
OpTensorCreate::postEval()
{
KP_LOG_DEBUG("Kompute OpTensorCreate postEval called");
}
}

View file

@ -1,21 +1,11 @@
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpTensorSyncDevice.hpp"
namespace kp {
OpTensorSyncDevice::OpTensorSyncDevice()
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor base");
}
OpTensorSyncDevice::OpTensorSyncDevice(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors)
: OpBase(tensors, nullptr)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
}
@ -26,7 +16,8 @@ OpTensorSyncDevice::~OpTensorSyncDevice()
}
void
OpTensorSyncDevice::init()
OpTensorSyncDevice::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice init called");
@ -50,14 +41,14 @@ OpTensorSyncDevice::init()
}
void
OpTensorSyncDevice::record()
OpTensorSyncDevice::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice record called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromStagingToDevice(
this->mCommandBuffer, false);
commandBuffer, false);
}
}
}

View file

@ -5,19 +5,14 @@
namespace kp {
OpTensorSyncLocal::OpTensorSyncLocal()
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor base");
}
OpTensorSyncLocal::OpTensorSyncLocal(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors)
: OpBase(tensors, nullptr)
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
this->mManagesTensors = false;
this->mManagesAlgorithm = false;
}
OpTensorSyncLocal::~OpTensorSyncLocal()
@ -26,7 +21,8 @@ OpTensorSyncLocal::~OpTensorSyncLocal()
}
void
OpTensorSyncLocal::init()
OpTensorSyncLocal::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal init called");
@ -40,24 +36,18 @@ OpTensorSyncLocal::init()
throw std::runtime_error(
"Kompute OpTensorSyncLocal: Tensor has not been initialized");
}
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
KP_LOG_WARN(
"Kompute OpTensorSyncLocal tensor parameter is of type "
"TensorTypes::eStorage and hence cannot be used to receive or "
"pass data.");
}
}
}
void
OpTensorSyncLocal::record()
OpTensorSyncLocal::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal record called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromDeviceToStaging(
this->mCommandBuffer, true);
commandBuffer, true);
}
}
}

View file

@ -3,12 +3,6 @@
namespace kp {
Sequence::Sequence()
{
KP_LOG_DEBUG("Kompute Sequence base constructor");
this->mIsInit = false;
}
Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::Queue> computeQueue,
@ -20,28 +14,16 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
this->mDevice = device;
this->mComputeQueue = computeQueue;
this->mQueueIndex = queueIndex;
this->mIsInit = false;
this->createCommandPool();
this->createCommandBuffer();
}
Sequence::~Sequence()
{
KP_LOG_DEBUG("Kompute Sequence Destructor started");
if (!this->mIsInit) {
KP_LOG_INFO("Kompute Sequence destructor called but sequence is not "
"initialized so no need to removing GPU resources.");
return;
} else {
this->freeMemoryDestroyGPUResources();
}
}
void
Sequence::init()
{
this->createCommandPool();
this->createCommandBuffer();
this->mIsInit = true;
this->freeMemoryDestroyGPUResources();
}
bool
@ -194,28 +176,14 @@ Sequence::isRecording()
return this->mRecording;
}
bool
Sequence::isInit()
{
return this->mIsInit;
}
void
Sequence::freeMemoryDestroyGPUResources()
{
KP_LOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");
if (!this->mIsInit) {
KP_LOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
"but Sequence is not initialized so there's no relevant "
"GPU resources.");
return;
}
if (!this->mDevice) {
KP_LOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
"with null Device pointer");
this->mIsInit = false;
return;
}
@ -225,7 +193,6 @@ Sequence::freeMemoryDestroyGPUResources()
KP_LOG_ERROR(
"Kompute Sequence freeMemoryDestroyGPUResources called with null "
"CommandPool pointer");
this->mIsInit = false;
return;
}
this->mDevice->freeCommandBuffers(
@ -239,7 +206,6 @@ Sequence::freeMemoryDestroyGPUResources()
KP_LOG_ERROR(
"Kompute Sequence freeMemoryDestroyGPUResources called with null "
"CommandPool pointer");
this->mIsInit = false;
return;
}
this->mDevice->destroy(
@ -253,7 +219,6 @@ Sequence::freeMemoryDestroyGPUResources()
this->mOperations.clear();
}
this->mIsInit = false;
}
void

View file

@ -3,23 +3,19 @@
namespace kp {
Tensor::Tensor()
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<float>& data,
const TensorTypes& tensorType)
{
KP_LOG_DEBUG("Kompute Tensor base constructor");
this->mTensorType = TensorTypes::eDevice;
}
Tensor::Tensor(const std::vector<float>& data, TensorTypes tensorType)
{
#if DEBUG
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
data.size(),
tensorType);
#endif
this->mData = data;
this->mShape = { static_cast<uint32_t>(data.size()) };
this->mTensorType = tensorType;
this->mPhysicalDevice = physicalDevice;
this->mDevice = device;
this->rebuild(data, tensorType);
}
Tensor::~Tensor()
@ -27,25 +23,25 @@ Tensor::~Tensor()
KP_LOG_DEBUG("Kompute Tensor destructor started. Type: {}",
this->tensorType());
if (this->isInit()) {
this->freeMemoryDestroyGPUResources();
}
this->freeMemoryDestroyGPUResources();
KP_LOG_DEBUG("Kompute Tensor destructor success");
}
void
Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
Tensor::rebuild(const std::vector<float>& data,
TensorTypes tensorType)
{
KP_LOG_DEBUG("Kompute Tensor running init with Vulkan params and num data "
"elementS: {}",
this->mData.size());
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}",
data.size());
this->mPhysicalDevice = physicalDevice;
this->mDevice = device;
this->mData = data;
this->mTensorType = tensorType;
this->mIsInit = true;
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
KP_LOG_DEBUG("Kompute Tensor destroying existing resources before rebuild");
this->freeMemoryDestroyGPUResources();
}
this->allocateMemoryCreateGPUResources();
}
@ -71,13 +67,7 @@ Tensor::memorySize()
uint32_t
Tensor::size()
{
return this->mShape[0];
}
std::array<uint32_t, KP_MAX_DIM_SIZE>
Tensor::shape()
{
return this->mShape;
return static_cast<uint32_t>(this->mData.size());
}
Tensor::TensorTypes
@ -86,12 +76,6 @@ Tensor::tensorType()
return this->mTensorType;
}
bool
Tensor::isInit()
{
return this->mIsInit && this->mPrimaryBuffer && this->mPrimaryMemory;
}
void
Tensor::setData(const std::vector<float>& data)
{
@ -166,11 +150,6 @@ Tensor::copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier)
{
if (!this->mIsInit) {
throw std::runtime_error(
"Kompute Tensor attempted to run copyBuffer without init");
}
commandBuffer->copyBuffer(*bufferFrom, *bufferTo, copyRegion);
if (createBarrier) {
@ -344,11 +323,6 @@ Tensor::allocateMemoryCreateGPUResources()
{
KP_LOG_DEBUG("Kompute Tensor creating buffer");
if (!this->mIsInit) {
throw std::runtime_error(
"Kompute Tensor attempted to run createBuffer without init");
}
if (!this->mPhysicalDevice) {
throw std::runtime_error("Kompute Tensor phyisical device is null");
}
@ -457,9 +431,7 @@ Tensor::allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
void
Tensor::freeMemoryDestroyGPUResources()
{
KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources");
this->mIsInit = false;
KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources()");
if (!this->mDevice) {
KP_LOG_ERROR(
@ -519,7 +491,7 @@ Tensor::freeMemoryDestroyGPUResources()
}
}
KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources");
KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources()");
}
}

View file

@ -13,11 +13,6 @@ namespace kp {
class Algorithm
{
public:
/**
Base constructor for Algorithm. Should not be used unless explicit
intended.
*/
Algorithm();
/**
* Default constructor for Algorithm
@ -26,9 +21,13 @@ public:
* @param commandBuffer The vulkan command buffer to bind the pipeline and
* shaders
*/
Algorithm(std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
const Constants& specializationConstants = {});
Algorithm(
std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {},
const Constants& pushConstants = {});
/**
* Initialiser for the shader data provided to the algorithm as well as
@ -39,8 +38,16 @@ public:
* @specalizationInstalces The specialization parameters to pass to the function
* processing
*/
void init(const std::vector<uint32_t>& shaderFileData,
std::vector<std::shared_ptr<Tensor>> tensorParams);
void rebuild(
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {},
const Constants& pushConstants = {});
bool isInit();
void freeMemoryDestroyGPUResources();
/**
* Destructor for Algorithm which is responsible for freeing and desroying
@ -56,12 +63,13 @@ public:
* @param y Layout Y dispatch value
* @param z Layout Z dispatch value
*/
void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1);
void recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer);
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::Device> mDevice;
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
@ -80,15 +88,19 @@ private:
bool mFreePipeline = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<uint32_t> mSpirv;
Constants mSpecializationConstants;
Constants mPushConstants;
Workgroup mWorkgroup;
bool mIsInit;
// Create util functions
void createShaderModule(const std::vector<uint32_t>& shaderFileData);
void createShaderModule();
void createPipeline();
// Parameters
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
void createDescriptorPool();
void createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams);
};
} // End namespace kp

View file

@ -67,9 +67,7 @@ class Manager
* @param queueIndex The queue to use from the available queues
* @return Shared pointer to the manager owned sequence resource
*/
std::shared_ptr<Sequence> sequence(
std::string sequenceName = KP_DEFAULT_SESSION,
uint32_t queueIndex = 0);
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0);
/**
* Function that evaluates operation against named sequence.
@ -228,6 +226,13 @@ class Manager
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
bool syncDataToGPU = true);
std::shared_ptr<Algorithm> algorithm(
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {},
const Constants& pushConstants = {});
/**
* Function that simplifies the common workflow of tensor initialisation. It
* will take the constructor parameters for a Tensor and will will us it to
@ -312,10 +317,10 @@ class Manager
bool mFreeDevice = false;
// -------------- ALWAYS OWNED RESOURCES
std::set<std::shared_ptr<Tensor>> mManagedTensors;
std::unordered_map<std::string, std::shared_ptr<Sequence>>
mManagedSequences;
std::set<std::weak_ptr<Tensor>> mManagedTensors;
std::set<std::weak_ptr<Sequence>> mManagedSequences;
std::set<std::weak_ptr<Algorithm>> mManagedAlgorithms;
//std::unique_ptr<Sequence> mDefaultSequence;
std::vector<uint32_t> mComputeQueueFamilyIndices;
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;

View file

@ -12,11 +12,6 @@ namespace kp {
class Sequence
{
public:
/**
* Base constructor for Sequence. Should not be used unless explicit
* intended.
*/
Sequence();
/**
* Main constructor for sequence which requires core vulkan components to
* generate all dependent resources.
@ -36,12 +31,6 @@ class Sequence
*/
~Sequence();
/**
* Initialises sequence including the creation of the command pool and the
* command buffer.
*/
void init();
/**
* Begins recording commands for commands to be submitted into the command
* buffer.
@ -99,13 +88,6 @@ class Sequence
*/
bool isRunning();
/**
* Returns true if the sequence has been successfully initialised.
*
* @return Boolean stating if sequence has been initialised.
*/
bool isInit();
/**
* Destroys and frees the GPU resources which include the buffer and memory
* and sets the sequence as init=False.
@ -179,7 +161,6 @@ class Sequence
std::vector<std::unique_ptr<OpBase>> mOperations;
// State
bool mIsInit = false;
bool mRecording = false;
bool mIsRunning = false;

View file

@ -2,8 +2,6 @@
#include "kompute/Core.hpp"
#define KP_MAX_DIM_SIZE 1
namespace kp {
/**
@ -30,11 +28,6 @@ class Tensor
eStorage = 2, ///< Type is Device memory (only)
};
/**
* Base constructor, should not be used unless explicitly intended.
*/
Tensor();
/**
* Default constructor with data provided which would be used to create the
* respective vulkan buffer and memory.
@ -43,8 +36,10 @@ class Tensor
* tensor
* @param tensorType Type for the tensor which is of type TensorTypes
*/
Tensor(const std::vector<float>& data,
TensorTypes tensorType = TensorTypes::eDevice);
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<float>& data,
const TensorTypes& tensorType = TensorTypes::eDevice);
/**
* Destructor which is in charge of freeing vulkan resources unless they
@ -58,8 +53,8 @@ class Tensor
* would only be created for the tensors of type TensorType::eDevice as
* otherwise there is no need to copy from host memory.
*/
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device);
void rebuild(const std::vector<float>& data,
TensorTypes tensorType = TensorTypes::eDevice);
/**
* Destroys and frees the GPU resources which include the buffer and memory.
@ -91,26 +86,13 @@ class Tensor
* @return Unsigned integer representing the total number of elements
*/
uint32_t size();
/**
* Returns the shape of the tensor, which includes the number of dimensions
* and the size per dimension.
*
* @return Array containing the sizes for each dimension. Zero means
* respective dimension is not active.
*/
std::array<uint32_t, KP_MAX_DIM_SIZE> shape();
/**
* Retrieve the tensor type of the Tensor
*
* @return Tensor type of tensor
*/
TensorTypes tensorType();
/**
* Returns true if the tensor initialisation function has been carried out
* successful, which would mean that the buffer and memory will have been
* provisioned.
*/
bool isInit();
/**
* Sets / resets the vector data of the tensor. This function does not
@ -214,9 +196,6 @@ class Tensor
TensorTypes mTensorType = TensorTypes::eDevice;
std::array<uint32_t, KP_MAX_DIM_SIZE> mShape;
bool mIsInit = false;
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
vk::BufferUsageFlags bufferUsageFlags);

View file

@ -0,0 +1,77 @@
#pragma once
#include <fstream>
#include "kompute/Core.hpp"
#include "kompute/shaders/shaderopmult.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* By default it enables the user to provide a dynamic number of tensors
* which are then passed as inputs.
*/
class OpAlgoCreate : public OpBase
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoCreate(std::vector<std::shared_ptr<Tensor>> tensors,
std::shared_ptr<kp::Algorithm> algorithm);
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
virtual ~OpAlgoCreate() override;
virtual void init(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* This records the commands that are to be sent to the GPU. This includes
* the barriers that ensure the memory has been copied before going in and
* out of the shader, as well as the dispatch operation that sends the
* shader processing to the gpu. This function also records the GPU memory
* copy of the output data for the staging buffer so it can be read by the
* host.
*/
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Executes after the recorded commands are submitted, and performs a copy
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
*/
virtual void postEval() override;
};
} // End namespace kp

View file

@ -1,14 +1,8 @@
#pragma once
#include <fstream>
#include "kompute/Core.hpp"
#include "kompute/shaders/shaderopmult.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
@ -19,15 +13,10 @@ namespace kp {
* By default it enables the user to provide a dynamic number of tensors
* which are then passed as inputs.
*/
class OpAlgoBase : public OpBase
class OpAlgoDispatch : public OpBase
{
public:
/**
* Base constructor, should not be used unless explicitly intended.
*/
OpAlgoBase();
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
@ -40,12 +29,8 @@ class OpAlgoBase : public OpBase
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
const Workgroup& komputeWorkgroup = {},
const Constants& specializationConstants = {});
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>> tensors,
std::shared_ptr<kp::Algorithm> algorithm);
/**
* Constructor that enables a file to be passed to the operation with
@ -59,13 +44,9 @@ class OpAlgoBase : public OpBase
* @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
std::string shaderFilePath,
const Workgroup& komputeWorkgroup = {},
const Constants& specializationConstants = {});
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<kp::Algorithm>& algorithm,
std::string shaderFilePath);
/**
* Constructor that enables raw shader data to be passed to the main operation
@ -78,19 +59,15 @@ class OpAlgoBase : public OpBase
* @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& shaderDataRaw,
const Workgroup& komputeWorkgroup = {},
const Constants& specializationConstants = {});
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<kp::Algorithm>& algorithm,
const std::vector<uint32_t>& shaderDataRaw);
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
virtual ~OpAlgoBase() override;
virtual ~OpAlgoDispatch() override;
/**
* The init function is responsible for the initialisation of the algorithm
@ -98,7 +75,8 @@ class OpAlgoBase : public OpBase
* on the options provided. Further dependent classes can perform more
* specific checks such as ensuring tensors provided are initialised, etc.
*/
virtual void init() override;
virtual void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* This records the commands that are to be sent to the GPU. This includes
@ -108,7 +86,7 @@ class OpAlgoBase : public OpBase
* copy of the output data for the staging buffer so it can be read by the
* host.
*/
virtual void record() override;
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
@ -123,21 +101,6 @@ class OpAlgoBase : public OpBase
*/
virtual void postEval() override;
protected:
// -------------- NEVER OWNED RESOURCES
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<Algorithm> mAlgorithm;
bool mFreeAlgorithm = false;
// -------------- ALWAYS OWNED RESOURCES
Workgroup mKomputeWorkgroup;
std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
std::vector<uint32_t> mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content
virtual std::vector<uint32_t> fetchSpirvBinaryData();
};
} // End namespace kp

View file

@ -7,7 +7,7 @@
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpAlgoBase.hpp"
#include "kompute/operations/OpAlgoCreate.hpp"
namespace kp {
@ -16,13 +16,9 @@ namespace kp {
* right hand and left hand side datapoints together with a single output.
* The expected data passed is two input tensors and one output tensor.
*/
class OpAlgoLhsRhsOut : public OpAlgoBase
class OpAlgoLhsRhsOut : public OpAlgoCreate
{
public:
/**
* Base constructor, should not be used unless explicitly intended.
*/
OpAlgoLhsRhsOut();
/**
* Default constructor with parameters that provides the bare minimum
@ -36,11 +32,8 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
* @param freeTensors Whether operation manages the memory of the Tensors
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors,
const Workgroup& komputeWorkgroup = {});
OpAlgoLhsRhsOut(std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<Algorithm> algorithm);
/**
* Default destructor, which is in charge of destroying the algorithm
@ -54,7 +47,8 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
* tensors, and creates the algorithm component which processes the
* computation.
*/
virtual void init() override;
virtual void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* This records the commands that are to be sent to the GPU. This includes
@ -64,7 +58,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
* copy of the output data for the staging buffer so it can be read by the
* host.
*/
virtual void record() override;
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Executes after the recorded commands are submitted, and performs a copy

View file

@ -3,6 +3,7 @@
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/Algorithm.hpp"
namespace kp {
@ -17,10 +18,6 @@ namespace kp {
class OpBase
{
public:
/**
* Base constructor, should not be used unless explicitly intended.
*/
OpBase() { KP_LOG_DEBUG("Compute OpBase base constructor"); }
/**
* Default constructor with parameters that provides the bare minimum
@ -32,17 +29,13 @@ class OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
*/
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors)
OpBase(std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<Algorithm> algorithm)
{
KP_LOG_DEBUG("Compute OpBase constructor with params");
this->mPhysicalDevice = physicalDevice;
this->mDevice = device;
this->mCommandBuffer = commandBuffer;
this->mTensors = tensors;
this->mAlgorithm = algorithm;
this->mIsInit = false;
}
/**
@ -53,37 +46,89 @@ class OpBase
virtual ~OpBase()
{
KP_LOG_DEBUG("Kompute OpBase destructor started");
this->destroy();
}
if (!this->mDevice) {
KP_LOG_WARN("Kompute OpBase destructor called with empty device");
return;
}
virtual std::shared_ptr<kp::Algorithm> algorithm() {
return this->mAlgorithm;
}
if (this->mFreeTensors) {
KP_LOG_DEBUG("Kompute OpBase freeing tensors");
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
if (tensor && tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
} else {
KP_LOG_WARN("Kompute OpBase expected to free "
"tensor but has already been freed.");
}
}
}
virtual std::vector<std::shared_ptr<kp::Tensor>> tensors() {
return this->mTensors;
}
virtual bool isInit() {
return this->mIsInit;
}
/**
* The init function is responsible for setting up all the resources and
* should be called after the Operation has been created.
*/
virtual void init() = 0;
// TODO: Potentially remove physicalDevice in favour of memoryProperties (for tensor)
virtual void init(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) {
if (this->mTensors.size() < 1) {
throw std::runtime_error("Kompute OpBase init called with 0 tensors");
}
if (this->mManagesTensors) {
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
if (tensor->isInit()) {
// TODO: Evaluate whether throwing runtime error or just writing error log
throw std::runtime_error(
"Kompute OpTensorCreate: Tensor has already been initialized");
}
else {
tensor->init(physicalDevice, device);
}
}
}
if (this->mManagesAlgorithm) {
this->mAlgorithm->init(device, this->mTensors);
}
}
virtual void destroy() {
if (!this->mIsInit) {
KP_LOG_WARN("Kompute OpBase destroy called but not initialised");
}
if (this->mManagesTensors) {
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
if (!tensor->isInit()) {
KP_LOG_WARN("Kompute OpBase attempted to free managed tensor "
"but tensor is not initialised");
} else {
KP_LOG_DEBUG("Kompute OpBase freeing tensor");
tensor->freeMemoryDestroyGPUResources();
}
}
this->mTensors.clear();
}
if (this->mManagesAlgorithm) {
if (this->mAlgorithm && this->mAlgorithm->isInit()) {
KP_LOG_DEBUG("Kompute OpBase freeing tensor");
this->mAlgorithm->freeMemoryDestroyGPUResources();
} else {
KP_LOG_WARN("Kompute OpBase attempted to free managed algorithm"
"but algorithm is not initialised");
}
}
this->mIsInit = false;
}
/**
* The record function is intended to only send a record command or run
* commands that are expected to record operations that are to be submitted
* as a batch into the GPU.
*/
virtual void record() = 0;
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) = 0;
/**
* Pre eval is called before the Sequence has called eval and submitted the commands to
@ -106,19 +151,14 @@ class OpBase
virtual void postEval() = 0;
protected:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice>
mPhysicalDevice; ///< Vulkan Physical Device
std::shared_ptr<vk::Device> mDevice; ///< Vulkan Logical Device
std::shared_ptr<vk::CommandBuffer>
mCommandBuffer; ///< Vulkan Command Buffer
// -------------- OPTIONALLY OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>>
mTensors; ///< Tensors referenced by operation that can be managed
///< optionally by operation
bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
///< tensors are freed (if they are managed)
std::vector<std::shared_ptr<Tensor>> mTensors;
bool mManagesTensors = false;
std::shared_ptr<kp::Algorithm> mAlgorithm;
bool mManagesAlgorithm = false;
// -------------- ALWAYS OWNED RESOURCES
bool mIsInit;
};
} // End namespace kp

View file

@ -11,7 +11,7 @@
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpAlgoBase.hpp"
#include "kompute/operations/OpAlgoCreate.hpp"
namespace kp {
@ -19,7 +19,7 @@ namespace kp {
* Operation that performs multiplication on two tensors and outpus on third
* tensor.
*/
class OpMult : public OpAlgoBase
class OpMult : public OpAlgoCreate
{
public:
/**
@ -45,7 +45,7 @@ class OpMult : public OpAlgoBase
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors,
const Workgroup& komputeWorkgroup = {})
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
: OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
{
KP_LOG_DEBUG("Kompute OpMult constructor with params");

View file

@ -14,8 +14,6 @@ namespace kp {
class OpTensorCopy : public OpBase
{
public:
OpTensorCopy();
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation.
*
@ -24,10 +22,7 @@ class OpTensorCopy : public OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
OpTensorCopy(std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
@ -37,12 +32,13 @@ class OpTensorCopy : public OpBase
/**
* Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage.
*/
void init() override;
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier.
*/
void record() override;
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Does not perform any preEval commands.

View file

@ -0,0 +1,71 @@
#pragma once
#include "kompute/Core.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/Algorithm.hpp"
namespace kp {
/**
* Base Operation which provides the high level interface that Kompute
* operations implement in order to perform a set of actions in the GPU.
*
* Operations can perform actions on tensors, and optionally can also own an
* Algorithm with respective parameters. kp::Operations with kp::Algorithms
* would inherit from kp::OpBaseAlgo.
*/
class OpTensorCreate : public OpBase
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
*/
OpTensorCreate(std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor for OpTensorCreate class. This OpTensorCreate destructor class should
* always be called to destroy and free owned resources unless it is
* intended to destroy the resources in the parent class.
*/
virtual ~OpTensorCreate() override;
/**
* The init function is responsible for setting up all the resources and
* should be called after the Operation has been created.
*/
virtual void init(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* Record runs the core actions to create the tensors. For device tensors
* it records a copyCommand to move the data from the staging tensor to the
* device tensor. The mapping for staging tensors happens in the init function
* not in the record function.
*/
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Performs a copy back into the main tensor to ensure that the data
* contained is the one that is now being stored in the GPU.
*/
virtual void postEval() override;
};
} // End namespace kp

View file

@ -1,9 +1,8 @@
#pragma once
#include "kompute/Core.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
@ -14,8 +13,6 @@ namespace kp {
class OpTensorSyncDevice : public OpBase
{
public:
OpTensorSyncDevice();
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
*
@ -24,10 +21,7 @@ class OpTensorSyncDevice : public OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
OpTensorSyncDevice(std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
@ -37,12 +31,13 @@ class OpTensorSyncDevice : public OpBase
/**
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
*/
void record() override;
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Does not perform any preEval commands.

View file

@ -14,8 +14,6 @@ namespace kp {
class OpTensorSyncLocal : public OpBase
{
public:
OpTensorSyncLocal();
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
*
@ -24,10 +22,7 @@ class OpTensorSyncLocal : public OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncLocal(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
OpTensorSyncLocal(std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
@ -37,12 +32,13 @@ class OpTensorSyncLocal : public OpBase
/**
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
*/
void record() override;
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Does not perform any preEval commands.

View file

@ -54,7 +54,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
auto startSync = std::chrono::high_resolution_clock::now();
for (uint32_t i = 0; i < numParallel; i++) {
mgr.evalOpDefault<kp::OpAlgoBase>(
mgr.evalOpDefault<kp::OpAlgoCreate>(
{ inputsSyncB[i] }, kp::Shader::compile_source(shader));
}
@ -86,7 +86,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
auto startAsync = std::chrono::high_resolution_clock::now();
for (uint32_t i = 0; i < numParallel; i++) {
mgrAsync.evalOpAsync<kp::OpAlgoBase>(
mgrAsync.evalOpAsync<kp::OpAlgoCreate>(
{ inputsAsyncB[i] },
"async" + std::to_string(i),
kp::Shader::compile_source(shader));
@ -153,10 +153,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
std::vector<uint32_t> result = kp::Shader::compile_source(shader);
mgr.evalOpAsync<kp::OpAlgoBase>(
mgr.evalOpAsync<kp::OpAlgoCreate>(
{ tensorA }, "asyncOne", kp::Shader::compile_source(shader));
mgr.evalOpAsync<kp::OpAlgoBase>(
mgr.evalOpAsync<kp::OpAlgoCreate>(
{ tensorB }, "asyncTwo", kp::Shader::compile_source(shader));
mgr.evalOpAwait("asyncOne");

View file

@ -27,7 +27,7 @@ TEST(TestDestroy, TestDestroyTensorSingle)
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->end();
@ -70,7 +70,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA, tensorB }, kp::Shader::compile_source(shader));
sq->end();
@ -135,7 +135,7 @@ TEST(TestDestroy, TestDestroySequenceSingle)
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->end();
@ -175,14 +175,14 @@ TEST(TestDestroy, TestDestroySequenceVector)
sq1 = mgr.sequence("One");
sq1->begin();
sq1->record<kp::OpAlgoBase>(
sq1->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq1->end();
sq1->eval();
sq2 = mgr.sequence("Two");
sq2->begin();
sq2->record<kp::OpAlgoBase>(
sq2->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq2->end();
sq2->eval();
@ -216,11 +216,11 @@ TEST(TestDestroy, TestDestroySequenceNameSingleInsideManager)
{
mgr.rebuild({ tensorA });
mgr.evalOp<kp::OpAlgoBase>(
mgr.evalOp<kp::OpAlgoCreate>(
{ tensorA }, "one",
kp::Shader::compile_source(shader));
mgr.evalOp<kp::OpAlgoBase>(
mgr.evalOp<kp::OpAlgoCreate>(
{ tensorA }, "two",
kp::Shader::compile_source(shader));
@ -256,7 +256,7 @@ TEST(TestDestroy, TestDestroySequenceNameSingleOutsideManager)
sq1 = mgr.sequence("One");
sq1->begin();
sq1->record<kp::OpAlgoBase>(
sq1->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq1->end();
sq1->eval();
@ -289,11 +289,11 @@ TEST(TestDestroy, TestDestroySequenceNameVectorInsideManager)
{
mgr.rebuild({ tensorA });
mgr.evalOp<kp::OpAlgoBase>(
mgr.evalOp<kp::OpAlgoCreate>(
{ tensorA }, "one",
kp::Shader::compile_source(shader));
mgr.evalOp<kp::OpAlgoBase>(
mgr.evalOp<kp::OpAlgoCreate>(
{ tensorA }, "two",
kp::Shader::compile_source(shader));
@ -323,11 +323,11 @@ TEST(TestDestroy, TestDestroySequenceNameVectorOutsideManager)
{
mgr.rebuild({ tensorA });
mgr.evalOp<kp::OpAlgoBase>(
mgr.evalOp<kp::OpAlgoCreate>(
{ tensorA }, "one",
kp::Shader::compile_source(shader));
mgr.evalOp<kp::OpAlgoBase>(
mgr.evalOp<kp::OpAlgoCreate>(
{ tensorA }, "two",
kp::Shader::compile_source(shader));
@ -357,7 +357,7 @@ TEST(TestDestroy, TestDestroySequenceNameDefaultOutsideManager)
{
mgr.rebuild({ tensorA });
mgr.evalOpDefault<kp::OpAlgoBase>(
mgr.evalOpDefault<kp::OpAlgoCreate>(
{ tensorA },
kp::Shader::compile_source(shader));

View file

@ -41,7 +41,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
params,
std::vector<uint32_t>(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
@ -120,7 +120,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
// Record op algo base
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
params,
std::vector<uint32_t>(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,

View file

@ -27,11 +27,11 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
{
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->record<kp::OpTensorSyncLocal>({ tensorA });
@ -72,19 +72,19 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
// Then perform the computations
sq->begin();
sq->record<kp::OpAlgoBase>({ tensorA },
sq->record<kp::OpAlgoCreate>({ tensorA },
kp::Shader::compile_source(shader));
sq->end();
sq->eval();
sq->begin();
sq->record<kp::OpAlgoBase>({ tensorA },
sq->record<kp::OpAlgoCreate>({ tensorA },
kp::Shader::compile_source(shader));
sq->end();
sq->eval();
sq->begin();
sq->record<kp::OpAlgoBase>({ tensorA },
sq->record<kp::OpAlgoCreate>({ tensorA },
kp::Shader::compile_source(shader));
sq->end();
sq->eval();
@ -121,7 +121,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->end();
@ -134,7 +134,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->end();
@ -147,7 +147,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->end();
@ -205,7 +205,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->end();
@ -263,7 +263,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
}
)");
mgr.evalOpDefault<kp::OpAlgoBase>(
mgr.evalOpDefault<kp::OpAlgoCreate>(
{ tensorInA, tensorInB, tensorOut },
kp::Shader::compile_source(shader));
@ -306,7 +306,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
mgr.evalOpDefault<kp::OpTensorSyncDevice>(
{ tensorInA, tensorInB, tensorOut });
mgr.evalOpDefault<kp::OpAlgoBase>(
mgr.evalOpDefault<kp::OpAlgoCreate>(
{ tensorInA, tensorInB, tensorOut },
kp::Shader::compile_source(shader));
@ -339,7 +339,7 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA }, kp::Shader::compile_source(shader));
sq->end();

View file

@ -51,7 +51,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA, tensorB },
kp::Shader::compile_source(shader));

View file

@ -5,7 +5,7 @@
#include "kompute_test/shaders/shadertest_op_custom_shader.hpp"
TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
{
kp::Manager mgr;
@ -28,7 +28,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
}
)");
mgr.evalOpDefault<kp::OpAlgoBase>(
mgr.evalOpDefault<kp::OpAlgoCreate>(
{ tensorA, tensorB }, kp::Shader::compile_source(shader));
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
@ -37,7 +37,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
}
TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
{
kp::Manager mgr;
@ -45,7 +45,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
mgr.rebuild({ tensorA, tensorB });
mgr.evalOpDefault<kp::OpAlgoBase>(
mgr.evalOpDefault<kp::OpAlgoCreate>(
{ tensorA, tensorB },
std::vector<uint32_t>(
(uint32_t*)kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv,
@ -59,7 +59,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
}
TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)
TEST(TestOpAlgoCreate, ShaderCompiledDataFromFile)
{
kp::Manager mgr;
@ -67,7 +67,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
mgr.rebuild({ tensorA, tensorB });
mgr.evalOpDefault<kp::OpAlgoBase>(
mgr.evalOpDefault<kp::OpAlgoCreate>(
{ tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });

View file

@ -33,7 +33,7 @@ TEST(TestSpecializationConstants, TestTwoConstants)
auto spec = kp::Constants({5.0, 0.3});
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA, tensorB },
kp::Shader::compile_source(shader),
kp::Workgroup(), spec);

View file

@ -23,7 +23,7 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
sq = mgr.sequence();
sq->begin();
sq->record<kp::OpAlgoBase>(
sq->record<kp::OpAlgoCreate>(
{ tensorA, tensorB },
std::vector<uint32_t>(
(uint32_t*)kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv,