Initial checkpoint with reasonable workflow
This commit is contained in:
parent
5db9abd06e
commit
9aae5d69db
46 changed files with 1158 additions and 695 deletions
|
|
@ -266,23 +266,23 @@ The type of tensor to initialize @param syncDataToGPU Whether to sync
|
|||
the data to GPU memory @returns Initialized Tensor with memory Syncd
|
||||
to GPU device)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase =
|
||||
static const char *__doc_kp_OpAlgoCreate =
|
||||
R"doc(Operation that provides a general abstraction that simplifies the use
|
||||
of algorithm and parameter components which can be used with shaders.
|
||||
By default it enables the user to provide a dynamic number of tensors
|
||||
which are then passed as inputs.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_x = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_x = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_y = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_y = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_z = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_z = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_2 =
|
||||
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_2 =
|
||||
R"doc(Default constructor with parameters that provides the bare minimum
|
||||
requirements for the operations to be able to create and manage their
|
||||
sub-components.
|
||||
|
|
@ -295,7 +295,7 @@ shaderFilePath Optional parameter to specify the shader to load
|
|||
(either in spirv or raw format) @param komputeWorkgroup Optional
|
||||
parameter to specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_3 =
|
||||
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_3 =
|
||||
R"doc(Constructor that enables a file to be passed to the operation with the
|
||||
contents of the shader. This can be either in raw format or in
|
||||
compiled SPIR-V binary format.
|
||||
|
|
@ -308,7 +308,7 @@ shaderFilePath Parameter to specify the shader to load (either in
|
|||
spirv or raw format) @param komputeWorkgroup Optional parameter to
|
||||
specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_4 =
|
||||
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_4 =
|
||||
R"doc(Constructor that enables raw shader data to be passed to the main
|
||||
operation which can be either in raw shader glsl code or in compiled
|
||||
SPIR-V binary.
|
||||
|
|
@ -321,37 +321,37 @@ shaderDataRaw Optional parameter to specify the shader data either in
|
|||
binary or raw form @param komputeWorkgroup Optional parameter to
|
||||
specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_fetchSpirvBinaryData = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_fetchSpirvBinaryData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_init =
|
||||
static const char *__doc_kp_OpAlgoCreate_init =
|
||||
R"doc(The init function is responsible for the initialisation of the
|
||||
algorithm component based on the parameters specified, and allows for
|
||||
extensibility on the options provided. Further dependent classes can
|
||||
perform more specific checks such as ensuring tensors provided are
|
||||
initialised, etc.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mAlgorithm = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_mAlgorithm = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mFreeAlgorithm = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_mFreeAlgorithm = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mKomputeWorkgroup = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_mKomputeWorkgroup = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mShaderDataRaw =
|
||||
static const char *__doc_kp_OpAlgoCreate_mShaderDataRaw =
|
||||
R"doc(< Optional member variable which can be provided to contain either the
|
||||
raw shader content or the spirv binary content)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mShaderFilePath =
|
||||
R"doc(< Optional member variable which can be provided for the OpAlgoBase to
|
||||
static const char *__doc_kp_OpAlgoCreate_mShaderFilePath =
|
||||
R"doc(< Optional member variable which can be provided for the OpAlgoCreate to
|
||||
find the data automatically and load for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_postEval =
|
||||
static const char *__doc_kp_OpAlgoCreate_postEval =
|
||||
R"doc(Executes after the recorded commands are submitted, and performs a
|
||||
copy of the GPU Device memory into the staging buffer so the output
|
||||
data can be retrieved.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_record =
|
||||
static const char *__doc_kp_OpAlgoCreate_record =
|
||||
R"doc(This records the commands that are to be sent to the GPU. This
|
||||
includes the barriers that ensure the memory has been copied before
|
||||
going in and out of the shader, as well as the dispatch operation that
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
.def("record_tensor_sync_local", &kp::Sequence::record<kp::OpTensorSyncLocal>,
|
||||
"Records operation to sync tensor(s) from GPU memory to local memory")
|
||||
.def("record_algo_file", &kp::Sequence::record<
|
||||
kp::OpAlgoBase,
|
||||
kp::OpAlgoCreate,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
|
|
@ -148,7 +148,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
return self.record<kp::OpAlgoBase>(
|
||||
return self.record<kp::OpAlgoCreate>(
|
||||
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Records an operation using a custom shader provided as spirv bytes",
|
||||
|
|
@ -211,7 +211,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
.def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncLocal>,
|
||||
"Evaluates operation to sync tensor(s) from GPU memory to local memory with new anonymous Sequence")
|
||||
.def("eval_algo_file_def", &kp::Manager::evalOpDefault<
|
||||
kp::OpAlgoBase,
|
||||
kp::OpAlgoCreate,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
|
|
@ -226,7 +226,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpDefault<kp::OpAlgoBase>(
|
||||
self.evalOpDefault<kp::OpAlgoCreate>(
|
||||
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Evaluates an operation using a custom shader provided as spirv bytes with new anonymous Sequence",
|
||||
|
|
@ -240,7 +240,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
.def("eval_tensor_sync_local", &kp::Manager::evalOp<kp::OpTensorSyncLocal>,
|
||||
"Evaluates operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
|
||||
.def("eval_algo_file", &kp::Manager::evalOp<
|
||||
kp::OpAlgoBase,
|
||||
kp::OpAlgoCreate,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
|
|
@ -256,7 +256,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOp<kp::OpAlgoBase>(
|
||||
self.evalOp<kp::OpAlgoCreate>(
|
||||
tensors, sequenceName, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Evaluates an operation using a custom shader provided as spirv bytes with explicitly named Sequence",
|
||||
|
|
@ -270,7 +270,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
.def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncLocal>,
|
||||
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with anonymous Sequence")
|
||||
.def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault<
|
||||
kp::OpAlgoBase,
|
||||
kp::OpAlgoCreate,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
|
|
@ -285,7 +285,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpAsyncDefault<kp::OpAlgoBase>(
|
||||
self.evalOpAsyncDefault<kp::OpAlgoCreate>(
|
||||
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence",
|
||||
|
|
@ -299,7 +299,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
.def("eval_async_tensor_sync_local", &kp::Manager::evalOpAsync<kp::OpTensorSyncLocal>,
|
||||
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
|
||||
.def("eval_async_algo_file", &kp::Manager::evalOpAsync<
|
||||
kp::OpAlgoBase,
|
||||
kp::OpAlgoCreate,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
|
|
@ -315,7 +315,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpAsync<kp::OpAlgoBase>(
|
||||
self.evalOpAsync<kp::OpAlgoCreate>(
|
||||
tensors, sequenceName, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence",
|
||||
|
|
|
|||
|
|
@ -28,6 +28,424 @@ def test_opalgobase_file():
|
|||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
|
||||
params = [kp.Tensor([2, 2, 2]), kp.Tensor([1, 2, 3]), kp.Tensor([0, 0, 0])]
|
||||
|
||||
mgr = kp.Manager()
|
||||
op_ct = kp.OpTensorCreate(params)
|
||||
op_ct = mgr.rebuild(op_ct)
|
||||
mgr.eval_op(op_ct)
|
||||
|
||||
algo = kp.Algo(params, spirv)
|
||||
op_ac = kp.OpAlgoCreate(algo)
|
||||
op_ac = mgr.rebuild(op_ac)
|
||||
mgr.eval_op(op_ac)
|
||||
|
||||
op_ac = kp.OpAlgoCreate(kp.Algo(params, spirv))
|
||||
mgr.eval_op(kp.OpAlgoCreate(algo))
|
||||
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
op_ct = kp.OpTensorCreate(mgr, params) # This initialises operation
|
||||
op_ct.eval()
|
||||
|
||||
algo = kp.Algo(params, spirv)
|
||||
op_ac = kp.OpAlgoCreate(mgr, algo)
|
||||
op_ct.eval()
|
||||
|
||||
op_tsd = kp.OpTensorSyncDevice(mgr, params)
|
||||
op_ad = kp.OpAlgoDispatch(mgr, algo)
|
||||
op_tsl = kp.OpTensorSyncLocal(mgr, params)
|
||||
|
||||
sq = kp.Sequence(mgr, "newSeq")
|
||||
sq.record([op_tsd, op_ad, op_tsl])
|
||||
sq.eval()
|
||||
sq.destroy()
|
||||
|
||||
# Explore consistent interface:
|
||||
op_tsd = kp.OpTensorSyncDevice(sq, params)
|
||||
op_ad = kp.OpAlgoDispatch(sq, algo)
|
||||
op_tsl = kp.OpTensorSyncLocal(sq, params)
|
||||
|
||||
op_tsd.record()
|
||||
op_ad.record()
|
||||
op_tsl.record()
|
||||
|
||||
sq.eval()
|
||||
|
||||
|
||||
|
||||
auto params = ...;
|
||||
std::string shader = "...";
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
// Example passing mgr
|
||||
kp::Manager mgr;
|
||||
|
||||
kp::OpTensorCreate op_tc(mgr, params);
|
||||
op_tc.eval()
|
||||
|
||||
kp::Algorithm algo(params, spirv);
|
||||
kp::OpAlgoCreate op_ac(mgr, algo);
|
||||
op_ac.eval()
|
||||
|
||||
op_ac.destroy()
|
||||
op_tc.destroy()
|
||||
|
||||
kp::OpTensorAlgoCreate op_c(mgr, params, algo);
|
||||
op_c.eval()
|
||||
|
||||
kp::Sequence sq(mgr);
|
||||
|
||||
kp::OpTensorSyncDevice op_tsd(mgr, params);
|
||||
kp::OpAlgoDispatch op_ad(mgr, algo);
|
||||
kp::OpTensorSyncLocal op_tsl(mgr, params);
|
||||
|
||||
sq.record({op_tsd, op_ad, op_tsl})
|
||||
|
||||
for(...) {
|
||||
sq.eval();
|
||||
|
||||
tensorA...
|
||||
}
|
||||
|
||||
######
|
||||
#######
|
||||
#######
|
||||
#######
|
||||
#######
|
||||
######
|
||||
// Example not passing mgr
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::OpTensorCreate> op_tc_1{ new kp::OpTensorCreate(params) };
|
||||
auto sq_1 = mgr.eval(op_tc_1); // Initialises and stores op as part of new sequence
|
||||
mgr.eval(op_tc_1); // Fails as this op can only be "initialised" once
|
||||
mgr.destroy(op_tc_1);
|
||||
mgr.eval(op_tc_1); // This works as it's a new setup
|
||||
mgr.eval<kp::OpTensorCreate>(params); // Fails as tensors already created
|
||||
// NOT ALLOED TO DELETE JUST TENSORS ANYMORE - SEE BELOW
|
||||
mgr.destroy(params); // Sends to inconsistent state as op_tc_1 will still destroy these parameters
|
||||
mgr.destroy(op_tc_1, recursive=false); // Destroys only operation, which is useful when you need to ensure another operation owns the parameters
|
||||
auto op_tc_2 = mgr.eval<kp::OpTensorCreate>(params);
|
||||
std::shared_ptr<kp::OpTensorCreate> op_tc_2{ new kp::OpTensorCreate(params) }; // fails as tensors already created
|
||||
op_tc_2.destroy(); // Manager still holds dangling reference so requires explicit termination in manager
|
||||
mgr.destroy(op_tc_2);
|
||||
auto op_tc_3 = mgr.eval({ new kp::OpTensorCreate(params) });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo{ new kp::Algorithm(params, spirv, kp::Workgroup(), kp::SpecConst(), kp::PushConst()) };
|
||||
std::shared_ptr<kp::OpAlgoCreate> op_ac_1{ new kp::OpAlgoCreate(algo) };
|
||||
mgr.eval(op_ac_1); // Initialises and stores op as part of manager
|
||||
mgr.eval(op_ac_1); // Fails as this op can only be "initialised" once
|
||||
mgr.destroy(op_ac_1);
|
||||
|
||||
std::shared_ptr<kp::OpAlgoCreate> op_ac_2 =
|
||||
mgr.eval({ new kp::OpAlgoCreate(params, { new kp::Algorithm(spirv) }) });
|
||||
|
||||
std::shared_ptr<kp::OpAlgoMultCreate> op_amc{ new kp::OpAlgoMultCreate(params) };
|
||||
mgr.eval(op_amc);
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo_mult = op_amc.algorithm()
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = op_amc.tensors()
|
||||
|
||||
auto op_tsd = std::make_shared<kp::OpTensorSyncDevice>(params);
|
||||
auto op_ad = std::make_shared<kp::OpAlgoSetPushConst>(algo);
|
||||
auto op_ad = std::make_shared<kp::OpAlgoDispatch>(algo);
|
||||
auto op_tsl = std::make_shared<kp::OpTensorSyncLocal>(mgr, params);
|
||||
|
||||
op_params = {op_tsd, op_ad, op_tsl};
|
||||
|
||||
mgr.record(op_params);
|
||||
mgr.eval(); // Runs recorded default sequence
|
||||
|
||||
mgr.record(op_params, clear=false); // Non-create ops ok if rerun
|
||||
mgr.eval(); // Runs twice the recorded paams
|
||||
|
||||
mgr.record("namedSeq", op_params);
|
||||
mgr.eval("namedSeq");
|
||||
|
||||
kp::Manager mgrAsync(0, {0, 2});
|
||||
mgr.sequence("namedSeq2", 0); // Create named sequence with queue in index 0
|
||||
mgr.sequence("namedSeq3", 1);
|
||||
|
||||
mgr.eval_async("namedSeq2", op_params); // Clear, record params and eval
|
||||
mgr.eval_async("namedSeq3", op_params); // Clear, record params and eval
|
||||
|
||||
mgr.eval_await("namedSeq2");
|
||||
mgr.eval_await("namedSeq3");
|
||||
|
||||
mgr.destroy("namedSeq"); // Destroy named sequence
|
||||
mgr.destroy({"namedSeq2", "namedSeq3"}); // Destroy multiple named sequences
|
||||
mgr.destroy("namedSeq"); // Error
|
||||
|
||||
|
||||
|
||||
|
||||
mgr = kp.Manager(0, [0, 2])
|
||||
|
||||
// Manager does not need to manage seq anymore
|
||||
sq_1 = kp.Sequence(mgr, 0)
|
||||
|
||||
t1 = kp.Tensor(sq_1, [0, 0, 0])
|
||||
t2 = kp.Tensor(sq_1, [0, 1, 2])
|
||||
|
||||
algo = kp.Algorithm(sq_1)
|
||||
|
||||
op_tc = kp.OpTensorCreate(sq_1, params)
|
||||
op_tsd = kp.OpTensorSyncDevice(sq_1, params)
|
||||
op_ac = kp.OpAlgoCreate(sq_1, algo)
|
||||
op_ad = kp.OpAlgoDispatch(sq_1, algo)
|
||||
|
||||
sq_1.clear()
|
||||
|
||||
op_tc.record()
|
||||
op_tsd.record()
|
||||
op_ac.record()
|
||||
op_ad.record()
|
||||
op_ad.record()
|
||||
op_ad.record()
|
||||
|
||||
sq_1.eval()
|
||||
|
||||
|
||||
std::shared_ptr<kp::Manager> mgr = kp::ManagerSP(0, {0, 1});
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq_2 = kp::SequenceSP(mgr, 1)
|
||||
|
||||
std::shared_ptr<kp::Tensor> t1 = kp::TensorSP(sq_2, {1, 2, 3});
|
||||
std::shared_ptr<kp::Tensor> t2 = kp::TensorSP(sq_2, {2, 3, 4});
|
||||
|
||||
auto params = ...
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo2 = kp::AlgorithmSP(sq_2, params, spirv, workgroup);
|
||||
|
||||
// How do we deal with this?
|
||||
{
|
||||
auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
|
||||
auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
|
||||
}
|
||||
|
||||
sq_2.eval()
|
||||
|
||||
|
||||
// HEAP ONLY - This would fail
|
||||
|
||||
kp::Manager mgr = kp::Manager(0, {0, 1});
|
||||
|
||||
kp::Sequence sq_2 = kp::Sequence(mgr, 1)
|
||||
|
||||
kp::Tensor t1 = kp::Tensor(sq_2, {1, 2, 3});
|
||||
kp::Tensor t2 = kp::Tensor(sq_2, {2, 3, 4});
|
||||
|
||||
auto params = ...
|
||||
|
||||
kp::Algorithm algo2 = kp::AlgorithmSP(sq_2, params, spirv, workgroup);
|
||||
|
||||
// How do we deal with this?
|
||||
{
|
||||
auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
|
||||
auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
|
||||
}
|
||||
|
||||
sq_2.eval()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
kp::Manager mgr = kp::Manager(0, {0, 1});
|
||||
|
||||
kp::Sequence sq_2 = kp::Sequence(mgr, 1)
|
||||
|
||||
kp::Tensor t1 = kp::Tensor(sq_2, {1, 2, 3});
|
||||
kp::Tensor t2 = kp::Tensor(sq_2, {2, 3, 4});
|
||||
|
||||
auto params = ...
|
||||
|
||||
kp::Algorithm* algo2 = new kp::Algorithm(sq_2, params, spirv, workgroup);
|
||||
|
||||
// How do we deal with this?
|
||||
{
|
||||
auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
|
||||
auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
|
||||
}
|
||||
|
||||
sq_2.eval()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
kp::Manager mgr = kp::Manager;
|
||||
|
||||
auto sq_2 = mgr.sequence()
|
||||
|
||||
{
|
||||
// What if we want to use tensor in a different sequence?
|
||||
auto t1 = sq_2.tensor({1, 2, 3});
|
||||
auto t2 = sq_2.tensor({1, 2, 3});
|
||||
|
||||
auto algo2 = sq_2.algorithm();
|
||||
|
||||
sq_2.record(kp::OpTensorRebuild({ t1 }))
|
||||
sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv))
|
||||
sq_2.record(kp::OpTensorSyncDevice(prams))
|
||||
sq_2.record(kp::OpAlgoDispatch(prams, algo2))
|
||||
}
|
||||
|
||||
sq_2.eval()
|
||||
|
||||
|
||||
|
||||
kp::Manager mgr = kp::Manager;
|
||||
|
||||
auto t1 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared
|
||||
auto t2 = mgr.tensor({1, 2, 3});
|
||||
|
||||
auto algo2 = mgr.algorithm();
|
||||
|
||||
{
|
||||
auto sq_2 = mgr.sequence()
|
||||
|
||||
{
|
||||
sq_2.record(kp::OpTensorRebuild({ t1 })) // record only supports move operator &&
|
||||
sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv))
|
||||
sq_2.record(kp::OpTensorSyncDevice(prams))
|
||||
sq_2.record(kp::OpAlgoDispatch(prams, algo2))
|
||||
}
|
||||
|
||||
sq_2.eval()
|
||||
}
|
||||
|
||||
|
||||
|
||||
// What about only tensors being init with it
|
||||
|
||||
|
||||
{
|
||||
kp::Manager mgr = kp::Manager;
|
||||
|
||||
auto t0 = mgr.tensor({0, 0, 0})
|
||||
|
||||
{
|
||||
auto t1 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared (refc 1)
|
||||
|
||||
{
|
||||
auto sq_2 = mgr.sequence()
|
||||
|
||||
{
|
||||
|
||||
auto t2 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared (refc 1)
|
||||
auto algo2 = mgr.algorithm(); // Held as weak ptr but passed as shared (refc 1)
|
||||
|
||||
params = {t1, t2}
|
||||
|
||||
sq_2.record(kp::OpTensorRebuild(params, {1, 2, 3, 4})) // Refc is now 2 for 3 for params
|
||||
sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv)) // refc is now 2 for algo2, 3 for parms
|
||||
sq_2.record(kp::OpTensorSyncDevice(prams)) // refc for params 4
|
||||
sq_2.record(kp::OpAlgoDispatch(prams, algo2)) // refc for params 5, 3 for algo2
|
||||
}
|
||||
|
||||
sq_2.eval() // all refcs stil valid
|
||||
} // seq destroyed so refc for algo2 and t2 drops to 0, gets destroyed, t1 has 1
|
||||
} // t1 refc drops to 0, gets destroyed
|
||||
// refc of t0 is still 1
|
||||
|
||||
mgr.gc() // Iterates through all tensor, sequence and algo weak_ptr and removes unused
|
||||
|
||||
// can we have something like
|
||||
mgr.sequence()
|
||||
.record(kp::OpTensorRebuild(params, {1, 2, 3, 4}))
|
||||
.record(kp::OpAlgoDispatch(params, algo2))
|
||||
.eval();
|
||||
|
||||
}// refc is destroyed by manager manually, the rest are empty shells so ignored
|
||||
|
||||
|
||||
|
||||
|
||||
kp::Manager mgr = kp::Manager(0, {0, 1});
|
||||
|
||||
std::shared_ptr<kp::Tensor> t1 = mgr.tensor({1, 2, 3});
|
||||
std::shared_ptr<kp::Tensor> t2 = mgr.tensor({1, 2, 3});
|
||||
|
||||
auto params = ...
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm(params, spirv, workgroup);
|
||||
|
||||
sq_2.record<kp::OpTensorSyncDevice>(prams)
|
||||
sq_2.record<kp::OpAlgoDispatch>(algo)
|
||||
|
||||
|
||||
// WHY NO MORE DETROY TENSORS:
|
||||
|
||||
* std::shared_ptr<kp::OpTensorCreate> op_tc1{ kp::OpTensorCreate(params) };
|
||||
* {
|
||||
* std::shared_ptr<kp::OpTensorCreate> op_tc2{ kp::OpTensorCreate(params) };
|
||||
* mgr.eval(op_tc2);
|
||||
* mgr.destroy(params);
|
||||
*
|
||||
* mgr.eval(op_tc1);
|
||||
*
|
||||
* } // op_tc1 is destroyed and all parameters are freed
|
||||
|
||||
|
||||
|
||||
// NO LONGER ALLOWED: Mainly as manager now needs to regsiter ops
|
||||
// If we still want it, then sequence wil have to hold ref to manager
|
||||
auto sq = mgr.sequence();
|
||||
|
||||
auto op_tsd = std::make_shared<kp::OpTensorSyncDevice>(params);
|
||||
auto op_ad = std::make_shared<kp::OpAlgoDispatch>(algo);
|
||||
auto op_tsl = std::make_shared<kp::OpTensorSyncLocal>(mgr, params);
|
||||
|
||||
sq.record({op_tsd, op_ad, op_tsl}); // Clear and record
|
||||
sq.eval();
|
||||
sq.record({op_tsd, op_ad, op_tsl}, clear=false); // record on top
|
||||
sq.eval();
|
||||
sq.clear(); // explicitly clear
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
op_ct = kp.OpTensorCreate(params)
|
||||
mgr.eval(op_ct)
|
||||
|
||||
algo = kp.Algo(params, spirv)
|
||||
op_ac = kp.OpAlgoCreate(algo)
|
||||
mgr.eval(op_ac) # Runs init on operator function (below shows explicit steps)
|
||||
|
||||
op_tsd = kp.OpTensorSyncDevice(params)
|
||||
op_ad = kp.OpAlgoDispatch(algo)
|
||||
op_tsl = kp.OpTensorSyncLocal(params)
|
||||
|
||||
sq = mgr.sequence()
|
||||
sq.record([op_tsd, op_ad, op_tsl])
|
||||
sq.eval()
|
||||
sq.eval()
|
||||
sq.eval()
|
||||
|
||||
mgr.eval(op_ac) # Would fail as algo is initialised
|
||||
mgr.destroy(op_ac) # Destroys Op and Algo owned object
|
||||
mgr.eval(op_ac) # Succeeds with new
|
||||
mgr.destroy(op_ac)
|
||||
mgr.init(op_ac)
|
||||
mgr.eval(op_ac, init=False)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def test_shader_str():
|
||||
"""
|
||||
Test basic OpAlgoBase operation
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue