Merge pull request #164 from EthicalML/160_op_memory
Amend memory hierarchy to enable for push constants and functional interface for more flexible operations
This commit is contained in:
commit
672cf22bc1
61 changed files with 3128 additions and 4852 deletions
1
.ccls
1
.ccls
|
|
@ -19,6 +19,7 @@
|
|||
-I./external/googletest/googletest/include/
|
||||
-I./external/glslang/
|
||||
-I./external/spdlog/include/
|
||||
-I./external/fmt/include/
|
||||
-I./src/include/
|
||||
-I./single_include/
|
||||
-I./vk_ndk_wrapper_include/
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -57,7 +57,6 @@ MK_KOMPUTE_EXTRA_CXX_FLAGS ?= ""
|
|||
mk_cmake:
|
||||
cmake \
|
||||
-Bbuild \
|
||||
$(MK_CMAKE_EXTRA_FLAGS) \
|
||||
-DKOMPUTE_EXTRA_CXX_FLAGS=$(MK_KOMPUTE_EXTRA_CXX_FLAGS) \
|
||||
-DCMAKE_BUILD_TYPE=$(MK_BUILD_TYPE) \
|
||||
-DCMAKE_INSTALL_PREFIX=$(MK_INSTALL_PATH) \
|
||||
|
|
@ -69,6 +68,7 @@ mk_cmake:
|
|||
-DKOMPUTE_OPT_BUILD_SINGLE_HEADER=1 \
|
||||
-DKOMPUTE_OPT_ENABLE_SPDLOG=1 \
|
||||
-DKOMPUTE_OPT_CODE_COVERAGE=1 \
|
||||
$(MK_CMAKE_EXTRA_FLAGS) \
|
||||
-G "Unix Makefiles"
|
||||
|
||||
mk_build_all:
|
||||
|
|
|
|||
142
README.md
142
README.md
|
|
@ -56,35 +56,65 @@ int main() {
|
|||
// 2. Create and initialise Kompute Tensors through manager
|
||||
auto tensorInA = mgr.tensor({ 2., 2., 2. });
|
||||
auto tensorInB = mgr.tensor({ 1., 2., 3. });
|
||||
auto tensorOut = mgr.tensor({ 0., 0., 0. });
|
||||
auto tensorOutA = mgr.tensor({ 0., 0., 0. });
|
||||
auto tensorOutB = mgr.tensor({ 0., 0., 0. });
|
||||
|
||||
// 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path)
|
||||
std::string shaderString = (R"(
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
|
||||
|
||||
// 3. Create algorithm based on shader (supports buffers & push/spec constants)
|
||||
std::string shader = (R"(
|
||||
#version 450
|
||||
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
// The input tensors bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer bina { float tina[]; };
|
||||
layout(set = 0, binding = 1) buffer binb { float tinb[]; };
|
||||
layout(set = 0, binding = 2) buffer bout { float tout[]; };
|
||||
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
||||
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
||||
|
||||
// Kompute supports push constants updated on dispatch
|
||||
layout(push_constant) uniform PushConstants {
|
||||
float val;
|
||||
} push_const;
|
||||
|
||||
// Kompute also supports spec constants on initalization
|
||||
layout(constant_id = 0) const float const_one = 0;
|
||||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
tout[index] = tina[index] * tinb[index];
|
||||
out_a[index] += in_a[index] * in_b[index];
|
||||
out_b[index] += const_one * push_const.val;
|
||||
}
|
||||
)");
|
||||
|
||||
// 3. Run operation with string shader synchronously
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorInA, tensorInB, tensorOut },
|
||||
kp::Shader::compile_source(shaderString));
|
||||
kp::Workgroup workgroup({3, 1, 1});
|
||||
kp::Constants specConsts({ 2 });
|
||||
|
||||
// 4. Map results back from GPU memory to print the results
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorInA, tensorInB, tensorOut });
|
||||
auto algorithm = mgr.algorithm(params, kp::Shader::compile_source(shader), workgroup, specConsts);
|
||||
|
||||
// Prints the output which is Output: { 2, 4, 6 }
|
||||
for (const float& elem : tensorOut->data()) std::cout << elem << " ";
|
||||
kp::Constants pushConstsA({ 2.0 });
|
||||
kp::Constants pushConstsB({ 3.0 });
|
||||
|
||||
// 4. Run operation synchronously using sequence
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpAlgoDispatch>(algorithm, pushConstsA)
|
||||
->record<kp::OpAlgoDispatch>(algorithm, pushConstsB)
|
||||
->eval();
|
||||
|
||||
// 5. Sync results from the GPU asynchronously
|
||||
sq = mgr.sequence()
|
||||
sq->evalAsync<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
// ... Do other work asynchronously whilst GPU finishes
|
||||
|
||||
sq->evalAwait();
|
||||
|
||||
// Prints the first output which is: { 4, 8, 12 }
|
||||
for (const float& elem : tensorOutA->data()) std::cout << elem << " ";
|
||||
// Prints the second output which is: { 10, 10, 10 }
|
||||
for (const float& elem : tensorOutB->data()) std::cout << elem << " ";
|
||||
}
|
||||
|
||||
```
|
||||
|
|
@ -94,34 +124,72 @@ int main() {
|
|||
The [Python package](https://kompute.cc/overview/python-package.html) provides a [high level interactive interface](https://kompute.cc/overview/python-reference.html) that enables for experimentation whilst ensuring high performance and fast development workflows.
|
||||
|
||||
```python
|
||||
|
||||
# 1. Create Kompute Manager with default settings (device 0 and first compute compatible queue)
|
||||
mgr = Manager()
|
||||
mgr = kp.Manager()
|
||||
|
||||
# 2. Create and initialise Kompute Tensors (can be initialized with List[] or np.Array)
|
||||
tensor_in_a = Tensor([2, 2, 2])
|
||||
tensor_in_b = Tensor([1, 2, 3])
|
||||
tensor_out = Tensor([0, 0, 0])
|
||||
# 2. Create and initialise Kompute Tensors through manager
|
||||
tensor_in_a = mgr.tensor([2, 2, 2])
|
||||
tensor_in_b = mgr.tensor([1, 2, 3])
|
||||
tensor_out_a = mgr.tensor([0, 0, 0])
|
||||
tensor_out_b = mgr.tensor([0, 0, 0])
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
|
||||
|
||||
# 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path)
|
||||
@python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
|
||||
data1=("buffer", 0, Array(f32)),
|
||||
data2=("buffer", 1, Array(f32)),
|
||||
data3=("buffer", 2, Array(f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
# 3. Create algorithm based on shader (supports buffers & push/spec constants)
|
||||
shader = """
|
||||
#version 450
|
||||
|
||||
# 4. Run multiplication operation synchronously
|
||||
mgr.eval_algo_data_def(
|
||||
[tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
# 5. Map results back from GPU memory to print the results
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
// The input tensors bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
||||
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
||||
|
||||
// Kompute supports push constants updated on dispatch
|
||||
layout(push_constant) uniform PushConstants {
|
||||
float val;
|
||||
} push_const;
|
||||
|
||||
// Kompute also supports spec constants on initalization
|
||||
layout(constant_id = 0) const float const_one = 0;
|
||||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
out_a[index] += in_a[index] * in_b[index];
|
||||
out_b[index] += const_one * push_const.val;
|
||||
}
|
||||
"""
|
||||
|
||||
workgroup = (3, 1, 1)
|
||||
spec_consts = [2]
|
||||
push_consts_a = [2]
|
||||
push_consts_b = [3]
|
||||
|
||||
algo = mgr.algorithm(params, kp.Shader.compile_source(shader), workgroup, spec_consts)
|
||||
|
||||
# 4. Run operation synchronously using sequence
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(algo, push_consts_a))
|
||||
.record(kp.OpAlgoDispatch(algo, push_consts_b))
|
||||
.eval())
|
||||
|
||||
# 5. Sync results from the GPU asynchronously
|
||||
sq = mgr.sequence()
|
||||
sq.eval_async(kp.OpTensorSyncLocal(params))
|
||||
|
||||
# ... Do other work asynchronously whilst GPU finishes
|
||||
|
||||
sq.eval_await()
|
||||
|
||||
# Prints the first output which is: { 4, 8, 12 }
|
||||
print(tensor_out_a)
|
||||
# Prints the first output which is: { 10, 10, 10 }
|
||||
print(tensor_out_b)
|
||||
|
||||
# Prints [2.0, 4.0, 6.0]
|
||||
print(tensor_out.data())
|
||||
```
|
||||
|
||||
### Interactive Notebooks & Hands on Videos
|
||||
|
|
@ -199,7 +267,7 @@ The core architecture of Kompute includes the following:
|
|||
* [Kompute Sequence](https://kompute.cc/overview/reference.html#sequence) - Container of operations that can be sent to GPU as batch
|
||||
* [Kompute Operation (Base)](https://kompute.cc/overview/reference.html#algorithm) - Base class from which all operations inherit
|
||||
* [Kompute Tensor](https://kompute.cc/overview/reference.html#tensor) - Tensor structured data used in GPU operations
|
||||
* [Kompute Algorithm](https://kompute.cc/overview/reference.html#algorithm) - Abstraction for (shader) code executed in the GPU
|
||||
* [Kompute Algorithm](https://kompute.cc/overview/reference.html#algorithm) - Abstraction for (shader) logic executed in the GPU
|
||||
|
||||
To see a full breakdown you can read further in the [C++ Class Reference](https://kompute.cc/overview/reference.html).
|
||||
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 262 KiB After Width: | Height: | Size: 214 KiB |
|
|
@ -20,61 +20,62 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
|
|||
uint32_t ITERATIONS = 100;
|
||||
float learningRate = 0.1;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor(xIData) };
|
||||
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor(xJData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> y{ new kp::Tensor(yData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor(zerosData) };
|
||||
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
|
||||
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
{
|
||||
mgr.rebuild(params);
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor(xIData);
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor(xJData);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor(yData);
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor(zerosData);
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor(zerosData);
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor(zerosData);
|
||||
|
||||
// Newer versions of Android are able to use shaderc to read raw string
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
params, kp::Shader::compile_source(LR_SHADER));
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor(zerosData);
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
sq->end();
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
|
||||
|
||||
sq->eval();
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
for (size_t j = 0; j < bOut->size(); j++) {
|
||||
wIn->data()[0] -= learningRate * wOutI->data()[j];
|
||||
wIn->data()[1] -= learningRate * wOutJ->data()[j];
|
||||
bIn->data()[0] -= learningRate * bOut->data()[j];
|
||||
}
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
|
||||
sq->eval();
|
||||
|
||||
for (size_t j = 0; j < bOut->size(); j++) {
|
||||
wIn->data()[0] -= learningRate * wOutI->data()[j];
|
||||
wIn->data()[1] -= learningRate * wOutJ->data()[j];
|
||||
bIn->data()[0] -= learningRate * bOut->data()[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->mWeights = kp::Tensor(wIn->data());
|
||||
this->mBias = kp::Tensor(bIn->data());
|
||||
KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
|
||||
KP_LOG_INFO("{}", wIn->data()[0]);
|
||||
KP_LOG_INFO("{}", wIn->data()[1]);
|
||||
KP_LOG_INFO("{}", bIn->data()[0]);
|
||||
|
||||
this->mWeights = wIn;
|
||||
this->mBias = bIn;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<float> xJ) {
|
||||
|
|
@ -88,9 +89,9 @@ std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<fl
|
|||
for (size_t i = 0; i < xI.size(); i++) {
|
||||
float xIVal = xI[i];
|
||||
float xJVal = xJ[i];
|
||||
float result = (xIVal * this->mWeights.data()[0]
|
||||
+ xJVal * this->mWeights.data()[1]
|
||||
+ this->mBias.data()[0]);
|
||||
float result = (xIVal * this->mWeights->data()[0]
|
||||
+ xJVal * this->mWeights->data()[1]
|
||||
+ this->mBias->data()[0]);
|
||||
|
||||
// Instead of using sigmoid we'll just return full numbers
|
||||
float var = result > 0 ? 1 : 0;
|
||||
|
|
@ -103,13 +104,13 @@ std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<fl
|
|||
std::vector<float> KomputeModelML::get_params() {
|
||||
std::vector<float> retVector;
|
||||
|
||||
if(this->mWeights.size() + this->mBias.size() == 0) {
|
||||
if(this->mWeights->size() + this->mBias->size() == 0) {
|
||||
return retVector;
|
||||
}
|
||||
|
||||
retVector.push_back(this->mWeights.data()[0]);
|
||||
retVector.push_back(this->mWeights.data()[1]);
|
||||
retVector.push_back(this->mBias.data()[0]);
|
||||
retVector.push_back(this->mWeights->data()[0]);
|
||||
retVector.push_back(this->mWeights->data()[1]);
|
||||
retVector.push_back(this->mBias->data()[0]);
|
||||
retVector.push_back(99.0);
|
||||
|
||||
return retVector;
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
|
|
@ -20,8 +21,8 @@ public:
|
|||
std::vector<float> get_params();
|
||||
|
||||
private:
|
||||
kp::Tensor mWeights;
|
||||
kp::Tensor mBias;
|
||||
std::shared_ptr<kp::Tensor> mWeights;
|
||||
std::shared_ptr<kp::Tensor> mBias;
|
||||
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -37,11 +37,14 @@ int main()
|
|||
}
|
||||
)");
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorInA, tensorInB, tensorOut },
|
||||
kp::Shader::compile_source(shader));
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA, tensorInB, tensorOut };
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorOut});
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
// prints "Output { 0 4 12 }"
|
||||
std::cout<< "Output: { ";
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ void KomputeSummatorNode::_init() {
|
|||
std::cout << "CALLING INIT" << std::endl;
|
||||
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSequence = this->mManager.sequence("AdditionSeq");
|
||||
this->mSequence = this->mManager.sequence();
|
||||
|
||||
// We now record the steps in the sequence
|
||||
if (std::shared_ptr<kp::Sequence> sq = this->mSequence)
|
||||
|
|
@ -51,7 +51,11 @@ void KomputeSummatorNode::_init() {
|
|||
}
|
||||
)");
|
||||
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
|
||||
// First we ensure secondary tensor loads to GPU
|
||||
// No need to sync the primary tensor as it should not be changed
|
||||
|
|
@ -59,15 +63,12 @@ void KomputeSummatorNode::_init() {
|
|||
{ this->mSecondaryTensor });
|
||||
|
||||
// Then we run the operation with both tensors
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
kp::Shader::compile_source(shader));
|
||||
sq->record<kp::OpAlgoDispatch>(algo)
|
||||
|
||||
// We map the result back to local
|
||||
sq->record<kp::OpTensorSyncLocal>(
|
||||
{ this->mPrimaryTensor });
|
||||
|
||||
sq->end();
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("Sequence pointer no longer available");
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ void KomputeSummator::_init() {
|
|||
{ this->mSecondaryTensor });
|
||||
|
||||
// Then we run the operation with both tensors
|
||||
this->mSequence->record<kp::OpAlgoBase>(
|
||||
this->mSequence->record<kp::OpAlgoCreate>(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
|
|
|
|||
|
|
@ -29,54 +29,41 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
uint32_t ITERATIONS = 100;
|
||||
float learningRate = 0.1;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor(xIData) };
|
||||
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor(xJData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> y{ new kp::Tensor(yData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor(zerosData) };
|
||||
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
|
||||
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild(params);
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor(xIData);
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor(xJData);
|
||||
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor(yData);
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor(zerosData);
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor(zerosData);
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor(zerosData);
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor(zerosData);
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
|
||||
// Newer versions of Android are able to use shaderc to read raw string
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
params, std::vector<char>(LR_SHADER.begin(), LR_SHADER.end()));
|
||||
#else
|
||||
// Older versions of Android require the SPIRV binary directly
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
params, std::vector<char>(
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len
|
||||
));
|
||||
#endif
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
sq->end();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
|
|
@ -90,15 +77,15 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
|
||||
KP_LOG_INFO(wIn->data()[0]);
|
||||
KP_LOG_INFO(wIn->data()[1]);
|
||||
KP_LOG_INFO(bIn->data()[0]);
|
||||
|
||||
this->mWeights = kp::Tensor(wIn->data());
|
||||
this->mBias = kp::Tensor(bIn->data());
|
||||
}
|
||||
|
||||
KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
|
||||
KP_LOG_INFO(wIn->data()[0]);
|
||||
KP_LOG_INFO(wIn->data()[1]);
|
||||
KP_LOG_INFO(bIn->data()[0]);
|
||||
|
||||
this->mWeights = kp::Tensor(wIn->data());
|
||||
this->mBias = kp::Tensor(bIn->data());
|
||||
}
|
||||
|
||||
Array KomputeModelMLNode::predict(Array xI, Array xJ) {
|
||||
|
|
|
|||
|
|
@ -33,54 +33,41 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
uint32_t ITERATIONS = 100;
|
||||
float learningRate = 0.1;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor(xIData) };
|
||||
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor(xJData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> y{ new kp::Tensor(yData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor(zerosData) };
|
||||
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
|
||||
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor(zerosData) };
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor(xIData);
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor(xJData);
|
||||
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor(yData);
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor(zerosData);
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor(zerosData);
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor(zerosData);
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor(zerosData);
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
mgr.rebuild(params);
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
|
||||
|
||||
#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
|
||||
// Newer versions of Android are able to use shaderc to read raw string
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
params, std::vector<char>(LR_SHADER.begin(), LR_SHADER.end()));
|
||||
#else
|
||||
// Older versions of Android require the SPIRV binary directly
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
params, std::vector<char>(
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len
|
||||
));
|
||||
#endif
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
sq->end();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
|
|
@ -94,15 +81,15 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
|
||||
KP_LOG_INFO(wIn->data()[0]);
|
||||
KP_LOG_INFO(wIn->data()[1]);
|
||||
KP_LOG_INFO(bIn->data()[0]);
|
||||
|
||||
this->mWeights = wIn;
|
||||
this->mBias = bIn;
|
||||
}
|
||||
|
||||
KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
|
||||
KP_LOG_INFO(wIn->data()[0]);
|
||||
KP_LOG_INFO(wIn->data()[1]);
|
||||
KP_LOG_INFO(bIn->data()[0]);
|
||||
|
||||
this->mWeights = kp::Tensor(wIn->data());
|
||||
this->mBias = kp::Tensor(bIn->data());
|
||||
}
|
||||
|
||||
Array KomputeModelML::predict(Array xI, Array xJ) {
|
||||
|
|
@ -116,9 +103,9 @@ Array KomputeModelML::predict(Array xI, Array xJ) {
|
|||
for (size_t i = 0; i < xI.size(); i++) {
|
||||
float xIVal = xI[i];
|
||||
float xJVal = xJ[i];
|
||||
float result = (xIVal * this->mWeights.data()[0]
|
||||
+ xJVal * this->mWeights.data()[1]
|
||||
+ this->mBias.data()[0]);
|
||||
float result = (xIVal * this->mWeights->data()[0]
|
||||
+ xJVal * this->mWeights->data()[1]
|
||||
+ this->mBias->data()[0]);
|
||||
|
||||
// Instead of using sigmoid we'll just return full numbers
|
||||
Variant var = result > 0 ? 1 : 0;
|
||||
|
|
@ -131,15 +118,15 @@ Array KomputeModelML::predict(Array xI, Array xJ) {
|
|||
Array KomputeModelML::get_params() {
|
||||
Array retArray;
|
||||
|
||||
KP_LOG_INFO(this->mWeights.size() + this->mBias.size());
|
||||
KP_LOG_INFO(this->mWeights->size() + this->mBias->size());
|
||||
|
||||
if(this->mWeights.size() + this->mBias.size() == 0) {
|
||||
if(this->mWeights->size() + this->mBias->size() == 0) {
|
||||
return retArray;
|
||||
}
|
||||
|
||||
retArray.push_back(this->mWeights.data()[0]);
|
||||
retArray.push_back(this->mWeights.data()[1]);
|
||||
retArray.push_back(this->mBias.data()[0]);
|
||||
retArray.push_back(this->mWeights->data()[0]);
|
||||
retArray.push_back(this->mWeights->data()[1]);
|
||||
retArray.push_back(this->mBias->data()[0]);
|
||||
retArray.push_back(99.0);
|
||||
|
||||
return retArray;
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ public:
|
|||
static void _register_methods();
|
||||
|
||||
private:
|
||||
kp::Tensor mWeights;
|
||||
kp::Tensor mBias;
|
||||
std::shared_ptr<kp::Tensor> mWeights;
|
||||
std::shared_ptr<kp::Tensor> mBias;
|
||||
};
|
||||
|
||||
static std::string LR_SHADER = R"(
|
||||
|
|
|
|||
|
|
@ -15,44 +15,39 @@ int main()
|
|||
uint32_t ITERATIONS = 100;
|
||||
float learningRate = 0.1;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
|
||||
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild(params);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
params, std::vector<uint32_t>(
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)));
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
|
||||
|
||||
sq->end();
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
|
|
|
|||
|
|
@ -266,23 +266,23 @@ The type of tensor to initialize @param syncDataToGPU Whether to sync
|
|||
the data to GPU memory @returns Initialized Tensor with memory Syncd
|
||||
to GPU device)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase =
|
||||
static const char *__doc_kp_OpAlgoCreate =
|
||||
R"doc(Operation that provides a general abstraction that simplifies the use
|
||||
of algorithm and parameter components which can be used with shaders.
|
||||
By default it enables the user to provide a dynamic number of tensors
|
||||
which are then passed as inputs.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_x = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_x = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_y = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_y = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_z = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_z = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_2 =
|
||||
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_2 =
|
||||
R"doc(Default constructor with parameters that provides the bare minimum
|
||||
requirements for the operations to be able to create and manage their
|
||||
sub-components.
|
||||
|
|
@ -295,7 +295,7 @@ shaderFilePath Optional parameter to specify the shader to load
|
|||
(either in spirv or raw format) @param komputeWorkgroup Optional
|
||||
parameter to specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_3 =
|
||||
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_3 =
|
||||
R"doc(Constructor that enables a file to be passed to the operation with the
|
||||
contents of the shader. This can be either in raw format or in
|
||||
compiled SPIR-V binary format.
|
||||
|
|
@ -308,7 +308,7 @@ shaderFilePath Parameter to specify the shader to load (either in
|
|||
spirv or raw format) @param komputeWorkgroup Optional parameter to
|
||||
specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_OpAlgoBase_4 =
|
||||
static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_4 =
|
||||
R"doc(Constructor that enables raw shader data to be passed to the main
|
||||
operation which can be either in raw shader glsl code or in compiled
|
||||
SPIR-V binary.
|
||||
|
|
@ -321,37 +321,37 @@ shaderDataRaw Optional parameter to specify the shader data either in
|
|||
binary or raw form @param komputeWorkgroup Optional parameter to
|
||||
specify the layout for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_fetchSpirvBinaryData = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_fetchSpirvBinaryData = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_init =
|
||||
static const char *__doc_kp_OpAlgoCreate_init =
|
||||
R"doc(The init function is responsible for the initialisation of the
|
||||
algorithm component based on the parameters specified, and allows for
|
||||
extensibility on the options provided. Further dependent classes can
|
||||
perform more specific checks such as ensuring tensors provided are
|
||||
initialised, etc.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mAlgorithm = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_mAlgorithm = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mFreeAlgorithm = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_mFreeAlgorithm = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mKomputeWorkgroup = R"doc()doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_mKomputeWorkgroup = R"doc()doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mShaderDataRaw =
|
||||
static const char *__doc_kp_OpAlgoCreate_mShaderDataRaw =
|
||||
R"doc(< Optional member variable which can be provided to contain either the
|
||||
raw shader content or the spirv binary content)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_mShaderFilePath =
|
||||
R"doc(< Optional member variable which can be provided for the OpAlgoBase to
|
||||
static const char *__doc_kp_OpAlgoCreate_mShaderFilePath =
|
||||
R"doc(< Optional member variable which can be provided for the OpAlgoCreate to
|
||||
find the data automatically and load for processing)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_postEval =
|
||||
static const char *__doc_kp_OpAlgoCreate_postEval =
|
||||
R"doc(Executes after the recorded commands are submitted, and performs a
|
||||
copy of the GPU Device memory into the staging buffer so the output
|
||||
data can be retrieved.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
static const char *__doc_kp_OpAlgoCreate_preEval = R"doc(Does not perform any preEval commands.)doc";
|
||||
|
||||
static const char *__doc_kp_OpAlgoBase_record =
|
||||
static const char *__doc_kp_OpAlgoCreate_record =
|
||||
R"doc(This records the commands that are to be sent to the GPU. This
|
||||
includes the barriers that ensure the memory has been copied before
|
||||
going in and out of the shader, as well as the dispatch operation that
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
#include <kompute/Kompute.hpp>
|
||||
|
||||
#include "fmt/ranges.h"
|
||||
|
||||
#include "docstrings.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
|
@ -23,8 +25,7 @@ PYBIND11_MODULE(kp, m) {
|
|||
|
||||
py::module_ np = py::module_::import("numpy");
|
||||
|
||||
|
||||
py::enum_<kp::Tensor::TensorTypes>(m, "TensorTypes", DOC(kp, Tensor, TensorTypes))
|
||||
py::enum_<kp::Tensor::TensorTypes>(m, "TensorTypes")
|
||||
.value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.")
|
||||
.value("host", kp::Tensor::TensorTypes::eHost, "Tensor used for CPU visible GPU data.")
|
||||
.value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.")
|
||||
|
|
@ -53,22 +54,32 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::arg("sources"), py::arg("files") = std::vector<std::string>(), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() );
|
||||
#endif // KOMPUTE_DISABLE_SHADER_UTILS
|
||||
|
||||
py::class_<kp::OpBase, std::shared_ptr<kp::OpBase>>(m, "OpBase");
|
||||
|
||||
py::class_<kp::OpTensorSyncDevice, std::shared_ptr<kp::OpTensorSyncDevice>>(m, "OpTensorSyncDevice", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
|
||||
|
||||
py::class_<kp::OpTensorSyncLocal, std::shared_ptr<kp::OpTensorSyncLocal>>(m, "OpTensorSyncLocal", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
|
||||
|
||||
py::class_<kp::OpTensorCopy, std::shared_ptr<kp::OpTensorCopy>>(m, "OpTensorCopy", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
|
||||
|
||||
py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(m, "OpAlgoDispatch", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::shared_ptr<kp::Algorithm>&,const kp::Constants&>(),
|
||||
py::arg("algorithm"), py::arg("push_consts") = kp::Constants());
|
||||
|
||||
py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(m, "OpMult", py::base<kp::OpBase>())
|
||||
.def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&,const std::shared_ptr<kp::Algorithm>&>());
|
||||
|
||||
py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm")
|
||||
.def("get_tensors", &kp::Algorithm::getTensors)
|
||||
.def("destroy", &kp::Algorithm::destroy)
|
||||
.def("get_spec_consts", &kp::Algorithm::getSpecializationConstants)
|
||||
.def("is_init", &kp::Algorithm::isInit);
|
||||
|
||||
py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
|
||||
.def(py::init(
|
||||
[np](const py::array_t<float> data, kp::Tensor::TensorTypes tensor_type) {
|
||||
const py::array_t<float> flatdata = np.attr("ravel")(data);
|
||||
const py::buffer_info info = flatdata.request();
|
||||
const float* ptr = (float*) info.ptr;
|
||||
return std::unique_ptr<kp::Tensor>(
|
||||
new kp::Tensor(std::vector<float>(ptr, ptr+flatdata.size()), tensor_type)
|
||||
);
|
||||
}),
|
||||
"Construct Tensor with an array as initial data and an optional kp.TensorType (default:device).",
|
||||
py::arg("data"),
|
||||
py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice
|
||||
)
|
||||
.def("data", &kp::Tensor::data, DOC(kp, Tensor, data))
|
||||
.def("numpy", [](kp::Tensor& self) {
|
||||
.def("data", [](kp::Tensor& self) {
|
||||
return py::array(self.data().size(), self.data().data());
|
||||
}, "Returns stored data as a new numpy array.")
|
||||
.def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; },
|
||||
|
|
@ -105,221 +116,50 @@ PYBIND11_MODULE(kp, m) {
|
|||
.def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
|
||||
.def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.")
|
||||
.def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.")
|
||||
.def("map_data_from_host", &kp::Tensor::mapDataFromHostMemory, "Maps data into GPU memory from tensor local data.")
|
||||
.def("map_data_into_host", &kp::Tensor::mapDataIntoHostMemory, "Maps data from GPU memory into tensor local data.");
|
||||
|
||||
.def("destroy", &kp::Tensor::destroy, "Destroy tensor GPU resources.");
|
||||
|
||||
py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
|
||||
.def("init", &kp::Sequence::init, DOC(kp, Sequence, init))
|
||||
|
||||
// record
|
||||
.def("begin", &kp::Sequence::begin, DOC(kp, Sequence, begin))
|
||||
.def("end", &kp::Sequence::end, DOC(kp, Sequence, end))
|
||||
|
||||
// eval
|
||||
.def("eval", &kp::Sequence::eval, DOC(kp, Sequence, eval))
|
||||
.def("eval_async", &kp::Sequence::evalAsync, DOC(kp, Sequence, evalAsync))
|
||||
.def("eval_await", &kp::Sequence::evalAwait, DOC(kp, Sequence, evalAwait))
|
||||
|
||||
// status
|
||||
.def("is_running", &kp::Sequence::isRunning, DOC(kp, Sequence, isRunning))
|
||||
.def("is_rec", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording))
|
||||
.def("is_init", &kp::Sequence::isInit, DOC(kp, Sequence, isInit))
|
||||
|
||||
// record
|
||||
.def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>, DOC(kp, Sequence, record))
|
||||
.def("record_tensor_sync_device", &kp::Sequence::record<kp::OpTensorSyncDevice>,
|
||||
"Records operation to sync tensor from local memory to GPU memory")
|
||||
.def("record_tensor_sync_local", &kp::Sequence::record<kp::OpTensorSyncLocal>,
|
||||
"Records operation to sync tensor(s) from GPU memory to local memory")
|
||||
.def("record_algo_file", &kp::Sequence::record<
|
||||
kp::OpAlgoBase,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
"Records an operation using a custom shader provided from a shader path",
|
||||
py::arg("tensors"), py::arg("data"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() )
|
||||
.def("record_algo_data", [](kp::Sequence &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
py::bytes &bytes,
|
||||
kp::Workgroup workgroup,
|
||||
kp::Constants constants) -> bool {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
return self.record<kp::OpAlgoBase>(
|
||||
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
.def("record", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.record(op); })
|
||||
.def("eval", [](kp::Sequence& self) { return self.eval(); })
|
||||
.def("eval", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.eval(op); })
|
||||
.def("eval_async", [](kp::Sequence& self) { return self.eval(); })
|
||||
.def("eval_async", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.evalAsync(op); })
|
||||
.def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); })
|
||||
.def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); })
|
||||
.def("is_recording", &kp::Sequence::isRecording)
|
||||
.def("is_running", &kp::Sequence::isRunning)
|
||||
.def("is_init", &kp::Sequence::isInit)
|
||||
.def("clear", &kp::Sequence::clear)
|
||||
.def("destroy", &kp::Sequence::destroy);
|
||||
|
||||
py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager")
|
||||
.def(py::init())
|
||||
.def(py::init<uint32_t>())
|
||||
.def(py::init<uint32_t,const std::vector<uint32_t>&>())
|
||||
.def("sequence", &kp::Manager::sequence, py::arg("queueIndex") = 0)
|
||||
.def("tensor", [np](kp::Manager& self,
|
||||
const py::array_t<float> data,
|
||||
kp::Tensor::TensorTypes tensor_type) {
|
||||
const py::array_t<float> flatdata = np.attr("ravel")(data);
|
||||
const py::buffer_info info = flatdata.request();
|
||||
const float* ptr = (float*) info.ptr;
|
||||
return self.tensor(std::vector<float>(ptr, ptr+flatdata.size()), tensor_type);
|
||||
},
|
||||
"Records an operation using a custom shader provided as spirv bytes",
|
||||
py::arg("tensors"), py::arg("bytes"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() );
|
||||
|
||||
|
||||
py::class_<kp::Manager>(m, "Manager")
|
||||
.def(py::init(), "Default initializer uses device 0 and first compute compatible GPU queueFamily")
|
||||
.def(py::init(
|
||||
[](uint32_t physicalDeviceIndex) {
|
||||
return std::unique_ptr<kp::Manager>(new kp::Manager(physicalDeviceIndex));
|
||||
}), "Manager initialiser can provide specified device index but will use first compute compatible GPU queueFamily")
|
||||
.def(py::init(
|
||||
[](uint32_t physicalDeviceIndex, const std::vector<uint32_t>& familyQueueIndices) {
|
||||
return std::unique_ptr<kp::Manager>(new kp::Manager(physicalDeviceIndex, familyQueueIndices));
|
||||
}), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.")
|
||||
.def("sequence", &kp::Manager::sequence,
|
||||
py::arg("name") = "", py::arg("queueIndex") = 0, "Get or create a sequence with specific name and specified index of available queues")
|
||||
.def("tensor", &kp::Manager::tensor,
|
||||
py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true,
|
||||
"Build and initialise tensor")
|
||||
.def("rebuild", py::overload_cast<std::vector<std::shared_ptr<kp::Tensor>>, bool>(&kp::Manager::rebuild),
|
||||
py::arg("tensors"), py::arg("syncDataToGPU") = true,
|
||||
"Build and initialise list of tensors")
|
||||
.def("rebuild", py::overload_cast<std::shared_ptr<kp::Tensor>, bool>(&kp::Manager::rebuild),
|
||||
py::arg("tensor"), py::arg("syncDataToGPU") = true,
|
||||
"Build and initialise tensor")
|
||||
.def("destroy", py::overload_cast<std::shared_ptr<kp::Tensor>>(&kp::Manager::destroy),
|
||||
py::arg("tensor"), DOC(kp, Manager, destroy))
|
||||
.def("destroy", py::overload_cast<std::vector<std::shared_ptr<kp::Tensor>>>(&kp::Manager::destroy),
|
||||
py::arg("tensors"), DOC(kp, Manager, destroy, 2))
|
||||
.def("destroy", py::overload_cast<std::vector<std::shared_ptr<kp::Sequence>>>(&kp::Manager::destroy),
|
||||
py::arg("sequences"), DOC(kp, Manager, destroy, 3))
|
||||
.def("destroy", py::overload_cast<std::shared_ptr<kp::Sequence>>(&kp::Manager::destroy),
|
||||
py::arg("sequence"), DOC(kp, Manager, destroy, 4))
|
||||
.def("destroy", py::overload_cast<const std::string &>(&kp::Manager::destroy),
|
||||
py::arg("sequenceName"), DOC(kp, Manager, destroy, 5))
|
||||
.def("destroy", py::overload_cast<const std::vector<std::string>&>(&kp::Manager::destroy),
|
||||
py::arg("sequenceNames"), DOC(kp, Manager, destroy, 6))
|
||||
// temporary backwards compatibility
|
||||
.def("eval_tensor_create_def",[](kp::Manager& self, std::vector<std::shared_ptr<kp::Tensor>> tensors, bool syncDataToGPU) -> void {
|
||||
kp_error("IMPORTANT: eval_tensor_create_def is depricated! Please use Manager.rebuild instead as function will be removed soon.");
|
||||
self.rebuild(tensors, syncDataToGPU);
|
||||
"Tensor initialisation function with data and tensor type",
|
||||
py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
|
||||
.def("algorithm", [](kp::Manager& self,
|
||||
const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
|
||||
const py::bytes& spirv,
|
||||
const kp::Workgroup& workgroup,
|
||||
const kp::Constants& spec_consts) {
|
||||
py::buffer_info info(py::buffer(spirv).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
std::vector<uint32_t> spirvVec((uint32_t*)data, (uint32_t*)(data + length));
|
||||
return self.algorithm(tensors, spirvVec, workgroup, spec_consts);
|
||||
},
|
||||
py::arg("tensors"), py::arg("syncDataToGPU") = true,
|
||||
"Temporary backwards compatibility for tensor creation function which will be removed in the next version.")
|
||||
|
||||
// Await functions
|
||||
.def("eval_await", &kp::Manager::evalOpAwait,
|
||||
py::arg("sequenceName"), py::arg("waitFor") = UINT64_MAX,
|
||||
"Awaits for asynchronous operation on a named Sequence")
|
||||
.def("eval_await_def", &kp::Manager::evalOpAwaitDefault,
|
||||
py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created")
|
||||
|
||||
// eval default
|
||||
.def("eval_tensor_copy_def", &kp::Manager::evalOpDefault<kp::OpTensorCopy>,
|
||||
"Evaluates operation to copy one tensor to one or many tensors with new anonymous Sequence")
|
||||
.def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncDevice>,
|
||||
"Evaluates operation to sync tensor from local memory to GPU memory with new anonymous Sequence")
|
||||
.def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncLocal>,
|
||||
"Evaluates operation to sync tensor(s) from GPU memory to local memory with new anonymous Sequence")
|
||||
.def("eval_algo_file_def", &kp::Manager::evalOpDefault<
|
||||
kp::OpAlgoBase,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
"Evaluates an operation using a custom shader provided from a shader path with new anonymous Sequence",
|
||||
py::arg("tensors"), py::arg("data"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() )
|
||||
.def("eval_algo_data_def", [](kp::Manager &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
py::bytes &bytes,
|
||||
kp::Workgroup workgroup,
|
||||
kp::Constants constants) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpDefault<kp::OpAlgoBase>(
|
||||
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Evaluates an operation using a custom shader provided as spirv bytes with new anonymous Sequence",
|
||||
py::arg("tensors"), py::arg("bytes"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() )
|
||||
|
||||
// eval
|
||||
.def("eval_tensor_copy", &kp::Manager::evalOp<kp::OpTensorCopy>,
|
||||
"Evaluates operation to copy one tensor to one or many tensors with explicitly named Sequence")
|
||||
.def("eval_tensor_sync_device", &kp::Manager::evalOp<kp::OpTensorSyncDevice>,
|
||||
"Evaluates operation to sync tensor from local memory to GPU memory with explicitly named Sequence")
|
||||
.def("eval_tensor_sync_local", &kp::Manager::evalOp<kp::OpTensorSyncLocal>,
|
||||
"Evaluates operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
|
||||
.def("eval_algo_file", &kp::Manager::evalOp<
|
||||
kp::OpAlgoBase,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
"Evaluates an operation using a custom shader provided from a shader path with explicitly named Sequence",
|
||||
py::arg("tensors"), py::arg("sequence_name"), py::arg("data"),py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() )
|
||||
.def("eval_algo_data", [](kp::Manager &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
std::string sequenceName,
|
||||
py::bytes &bytes,
|
||||
kp::Workgroup workgroup,
|
||||
kp::Constants constants) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOp<kp::OpAlgoBase>(
|
||||
tensors, sequenceName, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Evaluates an operation using a custom shader provided as spirv bytes with explicitly named Sequence",
|
||||
py::arg("tensors"), py::arg("sequence_name"), py::arg("bytes"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() )
|
||||
|
||||
// eval async default
|
||||
.def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCopy>,
|
||||
"Evaluates asynchronously operation to copy one tensor to one or many tensors with anonymous Sequence")
|
||||
.def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncDevice>,
|
||||
"Evaluates asynchronously operation to sync tensor from local memory to GPU memory with anonymous Sequence")
|
||||
.def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncLocal>,
|
||||
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with anonymous Sequence")
|
||||
.def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault<
|
||||
kp::OpAlgoBase,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
"Evaluates asynchronously an operation using a custom shader provided from a shader path with anonymous Sequence",
|
||||
py::arg("tensors"), py::arg("data"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() )
|
||||
.def("eval_async_algo_data_def", [](kp::Manager &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
py::bytes &bytes,
|
||||
kp::Workgroup workgroup,
|
||||
kp::Constants constants) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpAsyncDefault<kp::OpAlgoBase>(
|
||||
tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence",
|
||||
py::arg("tensors"), py::arg("bytes"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() )
|
||||
|
||||
// eval async
|
||||
.def("eval_async_tensor_copy", &kp::Manager::evalOpAsync<kp::OpTensorCopy>,
|
||||
"Evaluates asynchronously operation to copy one tensor to one or many tensors with explicitly named Sequence")
|
||||
.def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync<kp::OpTensorSyncDevice>,
|
||||
"Evaluates asynchronously operation to sync tensor from local memory to GPU memory with explicitly named Sequence")
|
||||
.def("eval_async_tensor_sync_local", &kp::Manager::evalOpAsync<kp::OpTensorSyncLocal>,
|
||||
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
|
||||
.def("eval_async_algo_file", &kp::Manager::evalOpAsync<
|
||||
kp::OpAlgoBase,
|
||||
const std::string&,
|
||||
kp::Workgroup,
|
||||
kp::Constants>,
|
||||
"Evaluates asynchronously an operation using a custom shader provided from a shader path with explicitly named Sequence",
|
||||
py::arg("tensors"), py::arg("sequence_name"), py::arg("data"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() )
|
||||
.def("eval_async_algo_data", [](kp::Manager &self,
|
||||
std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
std::string sequenceName,
|
||||
py::bytes &bytes,
|
||||
kp::Workgroup workgroup,
|
||||
kp::Constants constants) {
|
||||
// Bytes have to be converted into std::vector
|
||||
py::buffer_info info(py::buffer(bytes).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
self.evalOpAsync<kp::OpAlgoBase>(
|
||||
tensors, sequenceName, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
|
||||
},
|
||||
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence",
|
||||
py::arg("tensors"), py::arg("sequence_name"), py::arg("bytes"), py::arg("workgroup") = kp::Workgroup(), py::arg("constants") = kp::Constants() );
|
||||
"Algorithm initialisation function",
|
||||
py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), py::arg("spec_consts") = kp::Constants());
|
||||
|
||||
#ifdef VERSION_INFO
|
||||
m.attr("__version__") = VERSION_INFO;
|
||||
|
|
|
|||
|
|
@ -9,29 +9,26 @@ def test_array_multiplication():
|
|||
mgr = kp.Manager()
|
||||
|
||||
# 2. Create Kompute Tensors to hold data
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
tensor_in_a = mgr.tensor([2, 2, 2])
|
||||
tensor_in_b = mgr.tensor([1, 2, 3])
|
||||
tensor_out = mgr.tensor([0, 0, 0])
|
||||
|
||||
# 3. Initialise the Kompute Tensors in the GPU
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
# 4. Define the multiplication shader code to run on the GPU
|
||||
@ps.python2shader
|
||||
def compute_shader_multiply(index=("input", "GlobalInvocationId", ps.ivec3),
|
||||
def compute_mult(index=("input", "GlobalInvocationId", ps.ivec3),
|
||||
data1=("buffer", 0, ps.Array(ps.f32)),
|
||||
data2=("buffer", 1, ps.Array(ps.f32)),
|
||||
data3=("buffer", 2, ps.Array(ps.f32))):
|
||||
i = index.x
|
||||
data3[i] = data1[i] * data2[i]
|
||||
|
||||
# 5. Run shader code against our previously defined tensors
|
||||
mgr.eval_algo_data_def(
|
||||
[tensor_in_a, tensor_in_b, tensor_out],
|
||||
compute_shader_multiply.to_spirv())
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(mgr.algorithm(params, compute_mult.to_spirv())))
|
||||
.record(kp.OpTensorSyncLocal([tensor_out]))
|
||||
.eval())
|
||||
|
||||
# 6. Sync tensor data from GPU back to local
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])
|
||||
assert tensor_out.data().tolist() == [2.0, 4.0, 6.0]
|
||||
assert np.all(tensor_out.data() == [2.0, 4.0, 6.0])
|
||||
|
|
|
|||
|
|
@ -7,25 +7,86 @@ import pyshader as ps
|
|||
|
||||
DIRNAME = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
def test_opalgobase_file():
|
||||
"""
|
||||
Test basic OpMult operation
|
||||
"""
|
||||
kp_log = logging.getLogger("kp")
|
||||
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
# TODO: Add example with file
|
||||
#def test_opalgobase_file():
|
||||
# """
|
||||
# Test basic OpMult operation
|
||||
# """
|
||||
#
|
||||
# tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
# tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
# tensor_out = kp.Tensor([0, 0, 0])
|
||||
#
|
||||
# mgr = kp.Manager()
|
||||
# mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
#
|
||||
# shader_path = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv")
|
||||
#
|
||||
# mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)
|
||||
#
|
||||
# mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
#
|
||||
# assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
|
||||
def test_end_to_end():
|
||||
|
||||
mgr = kp.Manager()
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
shader_path = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv")
|
||||
tensor_in_a = mgr.tensor([2, 2, 2])
|
||||
tensor_in_b = mgr.tensor([1, 2, 3])
|
||||
tensor_out_a = mgr.tensor([0, 0, 0])
|
||||
tensor_out_b = mgr.tensor([0, 0, 0])
|
||||
|
||||
mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
|
||||
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
shader = """
|
||||
#version 450
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
// The input tensors bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
||||
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
||||
|
||||
// Kompute supports push constants updated on dispatch
|
||||
layout(push_constant) uniform PushConstants {
|
||||
float val;
|
||||
} push_const;
|
||||
|
||||
// Kompute also supports spec constants on initalization
|
||||
layout(constant_id = 0) const float const_one = 0;
|
||||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
out_a[index] += in_a[index] * in_b[index];
|
||||
out_b[index] += const_one * push_const.val;
|
||||
}
|
||||
"""
|
||||
|
||||
workgroup = (3, 1, 1)
|
||||
spec_consts = [2]
|
||||
push_consts_a = [2]
|
||||
push_consts_b = [3]
|
||||
|
||||
algo = mgr.algorithm(params, kp.Shader.compile_source(shader), workgroup, spec_consts)
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(algo, push_consts_a))
|
||||
.record(kp.OpAlgoDispatch(algo, push_consts_b))
|
||||
.eval())
|
||||
|
||||
sq = mgr.sequence()
|
||||
sq.eval_async(kp.OpTensorSyncLocal(params))
|
||||
|
||||
sq.eval_await()
|
||||
|
||||
assert tensor_out_a.data().tolist() == [4, 8, 12]
|
||||
assert tensor_out_b.data().tolist() == [10, 10, 10]
|
||||
|
||||
|
||||
def test_shader_str():
|
||||
|
|
@ -47,67 +108,120 @@ void main()
|
|||
}
|
||||
"""
|
||||
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
mgr = kp.Manager()
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
spirv = kp.Shader.compile_source(shader)
|
||||
|
||||
mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], spirv)
|
||||
mgr = kp.Manager()
|
||||
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
tensor_in_a = mgr.tensor([2, 2, 2])
|
||||
tensor_in_b = mgr.tensor([1, 2, 3])
|
||||
tensor_out = mgr.tensor([0, 0, 0])
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
algo = mgr.algorithm(params, spirv)
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice(params))
|
||||
.record(kp.OpAlgoDispatch(algo))
|
||||
.record(kp.OpTensorSyncLocal(params))
|
||||
.eval())
|
||||
|
||||
assert tensor_out.data().tolist() == [2.0, 4.0, 6.0]
|
||||
|
||||
def test_sequence():
|
||||
"""
|
||||
Test basic OpAlgoBase operation
|
||||
"""
|
||||
mgr = kp.Manager(0, [2])
|
||||
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
shader = """
|
||||
#version 450
|
||||
layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
|
||||
layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
|
||||
layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
||||
}
|
||||
"""
|
||||
|
||||
shader_path = os.path.abspath(os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv"))
|
||||
mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)
|
||||
spirv = kp.Shader.compile_source(shader)
|
||||
|
||||
mgr.eval_await_def()
|
||||
mgr = kp.Manager(0)
|
||||
|
||||
seq = mgr.sequence("op")
|
||||
seq.begin()
|
||||
seq.record_tensor_sync_local([tensor_in_a])
|
||||
seq.record_tensor_sync_local([tensor_in_b])
|
||||
seq.record_tensor_sync_local([tensor_out])
|
||||
seq.end()
|
||||
seq.eval()
|
||||
tensor_in_a = mgr.tensor([2, 2, 2])
|
||||
tensor_in_b = mgr.tensor([1, 2, 3])
|
||||
tensor_out = mgr.tensor([0, 0, 0])
|
||||
|
||||
mgr.destroy("op")
|
||||
params = [tensor_in_a, tensor_in_b, tensor_out]
|
||||
|
||||
assert seq.is_init() == False
|
||||
algo = mgr.algorithm(params, spirv)
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])
|
||||
sq = mgr.sequence()
|
||||
|
||||
mgr.destroy(tensor_in_a)
|
||||
mgr.destroy([tensor_in_b, tensor_out])
|
||||
sq.record(kp.OpTensorSyncDevice(params))
|
||||
sq.record(kp.OpAlgoDispatch(algo))
|
||||
sq.record(kp.OpTensorSyncLocal(params))
|
||||
|
||||
sq.eval()
|
||||
|
||||
assert sq.is_init() == True
|
||||
|
||||
sq.destroy()
|
||||
|
||||
assert sq.is_init() == False
|
||||
|
||||
assert tensor_out.data().tolist() == [2.0, 4.0, 6.0]
|
||||
assert np.all(tensor_out.data() == [2.0, 4.0, 6.0])
|
||||
|
||||
tensor_in_a.destroy()
|
||||
tensor_in_b.destroy()
|
||||
tensor_out.destroy()
|
||||
|
||||
assert tensor_in_a.is_init() == False
|
||||
assert tensor_in_b.is_init() == False
|
||||
assert tensor_out.is_init() == False
|
||||
|
||||
def test_pushconsts():
|
||||
|
||||
spirv = kp.Shader.compile_source("""
|
||||
#version 450
|
||||
layout(push_constant) uniform PushConstants {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
} pcs;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
pa[0] += pcs.x;
|
||||
pa[1] += pcs.y;
|
||||
pa[2] += pcs.z;
|
||||
}
|
||||
""")
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
tensor = mgr.tensor([0, 0, 0])
|
||||
|
||||
algo = mgr.algorithm([tensor], spirv, (1, 1, 1))
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice([tensor]))
|
||||
.record(kp.OpAlgoDispatch(algo, [0.1, 0.2, 0.3]))
|
||||
.record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1]))
|
||||
.record(kp.OpTensorSyncLocal([tensor]))
|
||||
.eval())
|
||||
|
||||
assert np.all(tensor.data() == np.array([0.4, 0.4, 0.4], dtype=np.float32))
|
||||
|
||||
def test_workgroup():
|
||||
mgr = kp.Manager(0)
|
||||
|
||||
tensor_a = kp.Tensor(np.zeros([16,8]))
|
||||
tensor_b = kp.Tensor(np.zeros([16,8]))
|
||||
|
||||
mgr.rebuild([tensor_a, tensor_b])
|
||||
tensor_a = mgr.tensor(np.zeros([16,8]))
|
||||
tensor_b = mgr.tensor(np.zeros([16,8]))
|
||||
|
||||
@ps.python2shader
|
||||
def compute_shader_wg(gl_idx=("input", "GlobalInvocationId", ps.ivec3),
|
||||
|
|
@ -119,50 +233,17 @@ def test_workgroup():
|
|||
data1[i] = f32(gl_idx.x)
|
||||
data2[i] = f32(gl_idx.y)
|
||||
|
||||
seq = mgr.sequence("new")
|
||||
seq.begin()
|
||||
seq.record_algo_data([tensor_a, tensor_b], compute_shader_wg.to_spirv(), workgroup=(16,8,1))
|
||||
seq.end()
|
||||
seq.eval()
|
||||
algo = mgr.algorithm([tensor_a, tensor_b], compute_shader_wg.to_spirv(), (16,8,1))
|
||||
|
||||
mgr.destroy(seq)
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice([tensor_a, tensor_b]))
|
||||
.record(kp.OpAlgoDispatch(algo))
|
||||
.record(kp.OpTensorSyncLocal([tensor_a, tensor_b]))
|
||||
.eval())
|
||||
|
||||
assert seq.is_init() == False
|
||||
|
||||
mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
|
||||
|
||||
print(tensor_a.numpy())
|
||||
print(tensor_b.numpy())
|
||||
|
||||
assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel())
|
||||
assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel())
|
||||
|
||||
mgr.destroy([tensor_a, tensor_b])
|
||||
|
||||
assert tensor_a.is_init() == False
|
||||
assert tensor_b.is_init() == False
|
||||
|
||||
|
||||
def test_tensor_rebuild_backwards_compat():
|
||||
"""
|
||||
Test basic OpMult operation
|
||||
"""
|
||||
|
||||
tensor_in_a = kp.Tensor([2, 2, 2])
|
||||
tensor_in_b = kp.Tensor([1, 2, 3])
|
||||
tensor_out = kp.Tensor([0, 0, 0])
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
|
||||
|
||||
shader_path = os.path.abspath(os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv"))
|
||||
mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)
|
||||
mgr.eval_await_def()
|
||||
|
||||
mgr.eval_tensor_sync_local_def([tensor_out])
|
||||
|
||||
assert tensor_out.data() == [2.0, 4.0, 6.0]
|
||||
assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])
|
||||
print(tensor_a.data())
|
||||
print(tensor_b.data())
|
||||
|
||||
assert np.all(tensor_a.data() == np.stack([np.arange(16)]*8, axis=1).ravel())
|
||||
assert np.all(tensor_b.data() == np.stack([np.arange(8)]*16, axis=0).ravel())
|
||||
|
||||
|
|
|
|||
|
|
@ -46,45 +46,39 @@ def test_logistic_regression():
|
|||
mgr = kp.Manager(0)
|
||||
|
||||
# First we create input and ouput tensors for shader
|
||||
tensor_x_i = kp.Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
|
||||
tensor_x_j = kp.Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
tensor_x_i = mgr.tensor([0.0, 1.0, 1.0, 1.0, 1.0])
|
||||
tensor_x_j = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_y = kp.Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
tensor_y = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
|
||||
|
||||
tensor_w_in = kp.Tensor([0.001, 0.001])
|
||||
tensor_w_out_i = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_out_j = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_in = mgr.tensor([0.001, 0.001])
|
||||
tensor_w_out_i = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_w_out_j = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_b_in = kp.Tensor([0.0])
|
||||
tensor_b_out = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_b_in = mgr.tensor([0.0])
|
||||
tensor_b_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_l_out = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
tensor_l_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
tensor_m = kp.Tensor([ tensor_y.size() ])
|
||||
tensor_m = mgr.tensor([ tensor_y.size() ])
|
||||
|
||||
# We store them in an array for easier interaction
|
||||
params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
|
||||
tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
|
||||
|
||||
mgr.rebuild(params)
|
||||
mgr.sequence().eval(kp.OpTensorSyncDevice(params))
|
||||
|
||||
# Create a managed sequence
|
||||
sq = mgr.sequence()
|
||||
|
||||
# Clear previous operations and begin recording for new operations
|
||||
sq.begin()
|
||||
|
||||
# Record operation to sync memory from local to GPU memory
|
||||
sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
|
||||
sq.record(kp.OpTensorSyncDevice([tensor_w_in, tensor_b_in]))
|
||||
|
||||
# Record operation to execute GPU shader against all our parameters
|
||||
sq.record_algo_data(params, compute_shader.to_spirv())
|
||||
sq.record(kp.OpAlgoDispatch(mgr.algorithm(params, compute_shader.to_spirv())))
|
||||
|
||||
# Record operation to sync memory from GPU to local memory
|
||||
sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
|
||||
|
||||
# Stop recording operations
|
||||
sq.end()
|
||||
sq.record(kp.OpTensorSyncLocal([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]))
|
||||
|
||||
ITERATIONS = 100
|
||||
learning_rate = 0.1
|
||||
|
|
|
|||
|
|
@ -1,16 +1,15 @@
|
|||
#pragma once
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Shader.hpp"
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
#include "kompute/shaders/shaderlogisticregression.hpp"
|
||||
#include "kompute/Manager.hpp"
|
||||
#include "kompute/Sequence.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Shader.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
|
||||
#include "kompute/operations/OpMult.hpp"
|
||||
#include "kompute/operations/OpTensorCopy.hpp"
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
#include "kompute/operations/OpTensorSyncLocal.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
#include "kompute/operations/OpMult.hpp"
|
||||
#include "kompute/Sequence.hpp"
|
||||
#include "kompute/Manager.hpp"
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -4,138 +4,174 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
Algorithm::Algorithm()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm base constructor");
|
||||
}
|
||||
|
||||
Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
||||
|
||||
this->mDevice = device;
|
||||
this->mCommandBuffer = commandBuffer;
|
||||
this->mSpecializationConstants = specializationConstants;
|
||||
|
||||
if (tensors.size() && spirv.size()) {
|
||||
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
|
||||
"spirv size: {}",
|
||||
tensors.size(),
|
||||
spirv.size());
|
||||
this->rebuild(tensors, spirv, workgroup, specializationConstants);
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
|
||||
"spirv so not rebuilding vulkan components");
|
||||
}
|
||||
}
|
||||
|
||||
Algorithm::~Algorithm()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
|
||||
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
|
||||
|
||||
this->mTensors = tensors;
|
||||
this->mSpirv = spirv;
|
||||
this->mSpecializationConstants = specializationConstants;
|
||||
this->setWorkgroup(workgroup,
|
||||
this->mTensors.size() ? this->mTensors[0]->size() : 1);
|
||||
|
||||
// Descriptor pool is created first so if available then destroy all before
|
||||
// rebuild
|
||||
if (this->isInit()) {
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
this->createParameters();
|
||||
this->createShaderModule();
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
bool
|
||||
Algorithm::isInit()
|
||||
{
|
||||
return this->mPipeline && this->mPipelineCache && this->mPipelineLayout &&
|
||||
this->mDescriptorPool && this->mDescriptorSet &&
|
||||
this->mDescriptorSetLayout && this->mShaderModule;
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::destroy()
|
||||
{
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute Algorithm destructor reached with null Device pointer");
|
||||
KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
|
||||
"Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreePipeline) {
|
||||
if (this->mFreePipeline && this->mPipeline) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
|
||||
if (!this->mPipeline) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline but it is null");
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipeline,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipeline = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreePipelineCache) {
|
||||
if (this->mFreePipelineCache && this->mPipelineCache) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
|
||||
if (!this->mPipelineCache) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline cache but it is null");
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline cache but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipelineCache,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipelineCache = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreePipelineLayout) {
|
||||
if (this->mFreePipelineLayout && this->mPipelineLayout) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
|
||||
if (!this->mPipelineLayout) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline layout but it is null");
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipelineLayout,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipelineLayout = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeShaderModule) {
|
||||
if (this->mFreeShaderModule && this->mShaderModule) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
|
||||
if (!this->mShaderModule) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy shader "
|
||||
"module but it is null");
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
|
||||
"module but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mShaderModule,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mShaderModule = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeDescriptorSet) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set");
|
||||
if (!this->mDescriptorSet) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute Algorithm Error requested to free descriptor set");
|
||||
}
|
||||
this->mDevice->freeDescriptorSets(
|
||||
*this->mDescriptorPool, 1, this->mDescriptorSet.get());
|
||||
}
|
||||
// We don't call freeDescriptorSet as the descriptor pool is not created
|
||||
// with VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at
|
||||
// (https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#VUID-vkFreeDescriptorSets-descriptorPool-00312))
|
||||
// if (this->mFreeDescriptorSet && this->mDescriptorSet) {
|
||||
// KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set");
|
||||
// if (!this->mDescriptorSet) {
|
||||
// KP_LOG_WARN(
|
||||
// "Kompute Algorithm Error requested to free descriptor set");
|
||||
// }
|
||||
// this->mDevice->freeDescriptorSets(
|
||||
// *this->mDescriptorPool, 1, this->mDescriptorSet.get());
|
||||
// this->mDescriptorSet = nullptr;
|
||||
//}
|
||||
|
||||
if (this->mFreeDescriptorSetLayout) {
|
||||
if (this->mFreeDescriptorSetLayout && this->mDescriptorSetLayout) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
|
||||
if (!this->mDescriptorSetLayout) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor set layout but it is null");
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor set layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mDescriptorSetLayout,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDescriptorSetLayout = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeDescriptorPool) {
|
||||
if (this->mFreeDescriptorPool && this->mDescriptorPool) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool");
|
||||
if (!this->mDescriptorPool) {
|
||||
KP_LOG_ERROR("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor pool but it is null");
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor pool but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mDescriptorPool,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDescriptorPool = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::init(const std::vector<uint32_t>& shaderFileData,
|
||||
std::vector<std::shared_ptr<Tensor>> tensorParams)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm init started");
|
||||
|
||||
this->createParameters(tensorParams);
|
||||
this->createShaderModule(shaderFileData);
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : tensorParams) {
|
||||
this->mSpecializationConstants.push_back(tensor->size());
|
||||
}
|
||||
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createDescriptorPool()
|
||||
{}
|
||||
|
||||
void
|
||||
Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
||||
Algorithm::createParameters()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
|
||||
|
||||
std::vector<vk::DescriptorPoolSize> descriptorPoolSizes = {
|
||||
vk::DescriptorPoolSize(
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
static_cast<uint32_t>(tensorParams.size()) // Descriptor count
|
||||
static_cast<uint32_t>(this->mTensors.size()) // Descriptor count
|
||||
)
|
||||
};
|
||||
|
||||
|
|
@ -152,7 +188,7 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
|||
this->mFreeDescriptorPool = true;
|
||||
|
||||
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
|
||||
for (size_t i = 0; i < tensorParams.size(); i++) {
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
descriptorSetBindings.push_back(
|
||||
vk::DescriptorSetLayoutBinding(i, // Binding index
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
|
|
@ -184,11 +220,11 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
|||
this->mFreeDescriptorSet = true;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
|
||||
for (size_t i = 0; i < tensorParams.size(); i++) {
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
|
||||
|
||||
vk::DescriptorBufferInfo descriptorBufferInfo =
|
||||
tensorParams[i]->constructDescriptorBufferInfo();
|
||||
this->mTensors[i]->constructDescriptorBufferInfo();
|
||||
|
||||
computeWriteDescriptorSets.push_back(
|
||||
vk::WriteDescriptorSet(*this->mDescriptorSet,
|
||||
|
|
@ -207,17 +243,17 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
|||
}
|
||||
|
||||
void
|
||||
Algorithm::createShaderModule(const std::vector<uint32_t>& shaderFileData)
|
||||
Algorithm::createShaderModule()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
|
||||
|
||||
vk::ShaderModuleCreateInfo shaderModuleInfo(
|
||||
vk::ShaderModuleCreateFlags(),
|
||||
sizeof(uint32_t) * shaderFileData.size(),
|
||||
shaderFileData.data());
|
||||
vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(),
|
||||
sizeof(uint32_t) *
|
||||
this->mSpirv.size(),
|
||||
this->mSpirv.data());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
|
||||
shaderFileData.size());
|
||||
this->mSpirv.size());
|
||||
this->mFreeShaderModule = true;
|
||||
this->mShaderModule = std::make_shared<vk::ShaderModule>();
|
||||
this->mDevice->createShaderModule(
|
||||
|
|
@ -246,14 +282,14 @@ Algorithm::createPipeline()
|
|||
|
||||
for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) {
|
||||
vk::SpecializationMapEntry specializationEntry(
|
||||
static_cast<uint32_t>(i),
|
||||
static_cast<uint32_t>(sizeof(float) * i),
|
||||
sizeof(float));
|
||||
static_cast<uint32_t>(i),
|
||||
static_cast<uint32_t>(sizeof(float) * i),
|
||||
sizeof(float));
|
||||
|
||||
specializationEntries.push_back(specializationEntry);
|
||||
}
|
||||
|
||||
// This passes ownership of the memory so we remove ownership from
|
||||
// This passes ownership of the memory so we remove ownership from
|
||||
// specialization container by using "transferDataOwnership"
|
||||
vk::SpecializationInfo specializationInfo(
|
||||
static_cast<uint32_t>(specializationEntries.size()),
|
||||
|
|
@ -289,32 +325,109 @@ Algorithm::createPipeline()
|
|||
throw std::runtime_error("Failed to create pipeline result: " +
|
||||
vk::to_string(pipelineResult.result));
|
||||
}
|
||||
|
||||
vk::Pipeline& pipeline = pipelineResult.value;
|
||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
||||
this->mFreePipeline = true;
|
||||
#else
|
||||
vk::Pipeline pipelineResult =
|
||||
vk::Pipeline pipeline =
|
||||
this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo);
|
||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
||||
this->mFreePipeline = true;
|
||||
#endif
|
||||
|
||||
this->mFreePipeline = true;
|
||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipelineResult);
|
||||
// TODO: Update to consistent
|
||||
// this->mPipeline = std::make_shared<vk::Pipeline>();
|
||||
// this->mDevice->createComputePipelines(
|
||||
// *this->mPipelineCache, 1, &pipelineInfo, nullptr,
|
||||
// this->mPipeline.get());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Create Pipeline Success");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z)
|
||||
Algorithm::bindCore(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm calling record dispatch");
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding pipeline");
|
||||
|
||||
this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipeline);
|
||||
commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipeline);
|
||||
|
||||
this->mCommandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipelineLayout,
|
||||
0, // First set
|
||||
*this->mDescriptorSet,
|
||||
nullptr // Dispatcher
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding descriptor sets");
|
||||
|
||||
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipelineLayout,
|
||||
0, // First set
|
||||
*this->mDescriptorSet,
|
||||
nullptr // Dispatcher
|
||||
);
|
||||
}
|
||||
|
||||
this->mCommandBuffer->dispatch(x, y, z);
|
||||
void
|
||||
Algorithm::bindPush(const vk::CommandBuffer& commandBuffer,
|
||||
const Constants& pushConstants)
|
||||
{
|
||||
if (pushConstants.size()) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}",
|
||||
pushConstants.size());
|
||||
|
||||
commandBuffer.pushConstants(*this->mPipelineLayout,
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
0,
|
||||
pushConstants.size() * sizeof(float),
|
||||
pushConstants.data());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm recording dispatch");
|
||||
|
||||
commandBuffer.dispatch(
|
||||
this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (workgroup[0] > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mWorkgroup = { workgroup[0],
|
||||
workgroup[1] > 0 ? workgroup[1] : 1,
|
||||
workgroup[2] > 0 ? workgroup[2] : 1 };
|
||||
} else {
|
||||
this->mWorkgroup = { minSize, 1, 1 };
|
||||
}
|
||||
|
||||
KP_LOG_INFO("Kompute OpAlgoCreate set dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mWorkgroup[0],
|
||||
this->mWorkgroup[1],
|
||||
this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
const Workgroup&
|
||||
Algorithm::getWorkgroup()
|
||||
{
|
||||
return this->mWorkgroup;
|
||||
}
|
||||
|
||||
const Constants&
|
||||
Algorithm::getSpecializationConstants()
|
||||
{
|
||||
return this->mSpecializationConstants;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<Tensor>>&
|
||||
Algorithm::getTensors()
|
||||
{
|
||||
return this->mTensors;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -151,8 +151,7 @@ if(NOT KOMPUTE_OPT_DISABLE_SHADER_UTILS)
|
|||
# HLSL
|
||||
# glslang includes OGLCompiler, OSDependent, MachineIndependent
|
||||
glslang
|
||||
SPIRV
|
||||
glslang-default-resource-limits)
|
||||
SPIRV)
|
||||
else()
|
||||
find_package(glslang CONFIG REQUIRED)
|
||||
|
||||
|
|
@ -164,9 +163,8 @@ if(NOT KOMPUTE_OPT_DISABLE_SHADER_UTILS)
|
|||
# Not including hlsl support
|
||||
# glslang::HLSL
|
||||
# Adding explicit dependencies to match above
|
||||
glslang
|
||||
SPIRV
|
||||
glslang-default-resource-limits)
|
||||
glslang::glslang
|
||||
glslang::SPIRV)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
|||
271
src/Manager.cpp
271
src/Manager.cpp
|
|
@ -31,26 +31,34 @@ Manager::Manager()
|
|||
Manager::Manager(uint32_t physicalDeviceIndex,
|
||||
const std::vector<uint32_t>& familyQueueIndices)
|
||||
{
|
||||
this->mPhysicalDeviceIndex = physicalDeviceIndex;
|
||||
this->mManageResources = true;
|
||||
|
||||
this->createInstance();
|
||||
this->createDevice(familyQueueIndices);
|
||||
this->createDevice(familyQueueIndices, physicalDeviceIndex);
|
||||
}
|
||||
|
||||
Manager::Manager(std::shared_ptr<vk::Instance> instance,
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
uint32_t physicalDeviceIndex)
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
this->mManageResources = false;
|
||||
|
||||
this->mInstance = instance;
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mPhysicalDeviceIndex = physicalDeviceIndex;
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager Destructor started");
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy()
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager destroy() started");
|
||||
|
||||
if (this->mDevice == nullptr) {
|
||||
KP_LOG_ERROR(
|
||||
|
|
@ -58,24 +66,34 @@ Manager::~Manager()
|
|||
return;
|
||||
}
|
||||
|
||||
if (this->mManagedSequences.size()) {
|
||||
if (this->mManageResources && this->mManagedSequences.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly running destructor for "
|
||||
"managed sequences");
|
||||
for (const std::pair<std::string, std::shared_ptr<Sequence>>& sqPair :
|
||||
this->mManagedSequences) {
|
||||
sqPair.second->freeMemoryDestroyGPUResources();
|
||||
for (const std::weak_ptr<Sequence>& weakSq : this->mManagedSequences) {
|
||||
if (std::shared_ptr<Sequence> sq = weakSq.lock()) {
|
||||
sq->destroy();
|
||||
}
|
||||
}
|
||||
this->mManagedSequences.clear();
|
||||
}
|
||||
|
||||
if (this->mManagedTensors.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
KP_LOG_ERROR("Kompute Manager attempted to free managed tensor "
|
||||
"but not tensor is not initialised");
|
||||
if (this->mManageResources && this->mManagedAlgorithms.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
|
||||
for (const std::weak_ptr<Algorithm>& weakAlgorithm :
|
||||
this->mManagedAlgorithms) {
|
||||
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
|
||||
algorithm->destroy();
|
||||
}
|
||||
}
|
||||
this->mManagedAlgorithms.clear();
|
||||
}
|
||||
|
||||
if (this->mManageResources && this->mManagedTensors.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
|
||||
for (const std::weak_ptr<Tensor>& weakTensor : this->mManagedTensors) {
|
||||
if (std::shared_ptr<Tensor> tensor = weakTensor.lock()) {
|
||||
tensor->destroy();
|
||||
}
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
this->mManagedTensors.clear();
|
||||
}
|
||||
|
|
@ -84,6 +102,7 @@ Manager::~Manager()
|
|||
KP_LOG_INFO("Destroying device");
|
||||
this->mDevice->destroy(
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDevice = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Manager Destroyed Device");
|
||||
}
|
||||
|
||||
|
|
@ -106,39 +125,11 @@ Manager::~Manager()
|
|||
if (this->mFreeInstance) {
|
||||
this->mInstance->destroy(
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mInstance = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Manager Destroyed Instance");
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Manager::sequence(std::string sequenceName, uint32_t queueIndex)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager sequence() with sequenceName: {} "
|
||||
"and queueIndex: {}",
|
||||
sequenceName,
|
||||
queueIndex);
|
||||
|
||||
std::shared_ptr<Sequence> sq = nullptr;
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
|
||||
this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found == this->mManagedSequences.end()) {
|
||||
std::shared_ptr<Sequence> sq =
|
||||
std::make_shared<Sequence>(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]);
|
||||
sq->init();
|
||||
|
||||
this->mManagedSequences.insert({ sequenceName, sq });
|
||||
|
||||
return sq;
|
||||
} else {
|
||||
return found->second;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::createInstance()
|
||||
{
|
||||
|
|
@ -225,7 +216,31 @@ Manager::createInstance()
|
|||
}
|
||||
|
||||
void
|
||||
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
||||
Manager::clear()
|
||||
{
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.erase(
|
||||
std::remove_if(begin(this->mManagedTensors),
|
||||
end(this->mManagedTensors),
|
||||
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
|
||||
end(this->mManagedTensors));
|
||||
this->mManagedAlgorithms.erase(
|
||||
std::remove_if(
|
||||
begin(this->mManagedAlgorithms),
|
||||
end(this->mManagedAlgorithms),
|
||||
[](std::weak_ptr<Algorithm> t) { return t.expired(); }),
|
||||
end(this->mManagedAlgorithms));
|
||||
this->mManagedSequences.erase(
|
||||
std::remove_if(begin(this->mManagedSequences),
|
||||
end(this->mManagedSequences),
|
||||
[](std::weak_ptr<Sequence> t) { return t.expired(); }),
|
||||
end(this->mManagedSequences));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
||||
uint32_t physicalDeviceIndex)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager creating Device");
|
||||
|
|
@ -233,7 +248,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
|||
if (this->mInstance == nullptr) {
|
||||
throw std::runtime_error("Kompute Manager instance is null");
|
||||
}
|
||||
if (this->mPhysicalDeviceIndex < 0) {
|
||||
if (physicalDeviceIndex < 0) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Manager physical device index not provided");
|
||||
}
|
||||
|
|
@ -243,8 +258,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
|||
std::vector<vk::PhysicalDevice> physicalDevices =
|
||||
this->mInstance->enumeratePhysicalDevices();
|
||||
|
||||
vk::PhysicalDevice physicalDevice =
|
||||
physicalDevices[this->mPhysicalDeviceIndex];
|
||||
vk::PhysicalDevice physicalDevice = physicalDevices[physicalDeviceIndex];
|
||||
|
||||
this->mPhysicalDevice =
|
||||
std::make_shared<vk::PhysicalDevice>(physicalDevice);
|
||||
|
|
@ -253,7 +267,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
|||
physicalDevice.getProperties();
|
||||
|
||||
KP_LOG_INFO("Using physical device index {} found {}",
|
||||
this->mPhysicalDeviceIndex,
|
||||
physicalDeviceIndex,
|
||||
physicalDeviceProperties.deviceName);
|
||||
|
||||
if (!familyQueueIndices.size()) {
|
||||
|
|
@ -329,150 +343,55 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
|||
}
|
||||
|
||||
std::shared_ptr<Tensor>
|
||||
Manager::tensor(
|
||||
const std::vector<float>& data,
|
||||
Tensor::TensorTypes tensorType,
|
||||
bool syncDataToGPU)
|
||||
Manager::tensor(const std::vector<float>& data, Tensor::TensorTypes tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager tensor triggered");
|
||||
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager creating new tensor shared ptr");
|
||||
std::shared_ptr<Tensor> tensor =
|
||||
std::make_shared<Tensor>(kp::Tensor(data, tensorType));
|
||||
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
|
||||
this->mPhysicalDevice, this->mDevice, data, tensorType) };
|
||||
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
this->mManagedTensors.insert(tensor);
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
void
|
||||
Manager::rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
bool syncDataToGPU)
|
||||
std::shared_ptr<Algorithm>
|
||||
Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild triggered");
|
||||
for (std::shared_ptr<Tensor> tensor : tensors) {
|
||||
|
||||
// False syncData to run all tensors at once instead one by one
|
||||
this->rebuild(tensor, false);
|
||||
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
||||
this->mDevice, tensors, spirv, workgroup, specializationConstants) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
}
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>(tensors);
|
||||
}
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
void
|
||||
Manager::rebuild(std::shared_ptr<kp::Tensor> tensor,
|
||||
bool syncDataToGPU)
|
||||
std::shared_ptr<Sequence>
|
||||
Manager::sequence(uint32_t queueIndex)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered");
|
||||
KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex);
|
||||
|
||||
if (tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
std::shared_ptr<Sequence> sq{ new kp::Sequence(
|
||||
this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedSequences.push_back(sq);
|
||||
}
|
||||
|
||||
tensor->init(this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
std::set<std::shared_ptr<Tensor>>::iterator it =
|
||||
this->mManagedTensors.find(tensor);
|
||||
if (it == this->mManagedTensors.end()) {
|
||||
this->mManagedTensors.insert(tensor);
|
||||
}
|
||||
|
||||
if (syncDataToGPU) {
|
||||
this->evalOpDefault<OpTensorSyncDevice>({ tensor });
|
||||
}
|
||||
return sq;
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(std::shared_ptr<kp::Tensor> tensor)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered");
|
||||
|
||||
if (tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
|
||||
// TODO: Confirm not limiting destroying tensors owned by this manager allowed
|
||||
std::set<std::shared_ptr<Tensor>>::iterator it =
|
||||
this->mManagedTensors.find(tensor);
|
||||
|
||||
if (it != this->mManagedTensors.end()) {
|
||||
this->mManagedTensors.erase(tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered");
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : tensors) {
|
||||
this->destroy(tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered");
|
||||
|
||||
for (std::shared_ptr<kp::Sequence> sequence : sequences) {
|
||||
this->destroy(sequence);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(std::shared_ptr<kp::Sequence> sequence)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered");
|
||||
|
||||
// Inefficient but required to delete by value
|
||||
// Depending on the amount of named sequences created may be worth creating
|
||||
// a set to ensure efficient delete.
|
||||
for (std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator it = this->mManagedSequences.begin(); it != this->mManagedSequences.end(); it++) {
|
||||
if (it->second == sequence) {
|
||||
this->mManagedSequences.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (sequence->isInit()) {
|
||||
sequence->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(const std::string& sequenceName)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered");
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
|
||||
found = this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found != this->mManagedSequences.end()) {
|
||||
// We don't call destroy(sequence) as erasing sequence by name more efficient
|
||||
if (found->second->isInit()) {
|
||||
found->second->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
this->mManagedSequences.erase(sequenceName);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy(const std::vector<std::string>& sequenceNames)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered");
|
||||
|
||||
for (const std::string& sequenceName : sequenceNames) {
|
||||
this->destroy(sequenceName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,176 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoBase::OpAlgoBase()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase constructor base");
|
||||
}
|
||||
|
||||
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const Workgroup& komputeWorkgroup,
|
||||
const Constants& specializationConstants)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}",
|
||||
tensors.size());
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (komputeWorkgroup[0] > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mKomputeWorkgroup = {
|
||||
komputeWorkgroup[0],
|
||||
komputeWorkgroup[1] > 0 ? komputeWorkgroup[1] : 1,
|
||||
komputeWorkgroup[2] > 0 ? komputeWorkgroup[2] : 1
|
||||
};
|
||||
} else {
|
||||
this->mKomputeWorkgroup = { tensors[0]->size(), 1, 1 };
|
||||
}
|
||||
KP_LOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mKomputeWorkgroup[0],
|
||||
this->mKomputeWorkgroup[1],
|
||||
this->mKomputeWorkgroup[2]);
|
||||
|
||||
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer, specializationConstants);
|
||||
}
|
||||
|
||||
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::string shaderFilePath,
|
||||
const Workgroup& komputeWorkgroup,
|
||||
const Constants& specializationConstants)
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}",
|
||||
shaderFilePath);
|
||||
|
||||
this->mShaderFilePath = shaderFilePath;
|
||||
}
|
||||
|
||||
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& shaderDataRaw,
|
||||
const Workgroup& komputeWorkgroup,
|
||||
const Constants& specializationConstants)
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw "
|
||||
"data length: {}",
|
||||
shaderDataRaw.size());
|
||||
|
||||
this->mShaderDataRaw = shaderDataRaw;
|
||||
}
|
||||
|
||||
OpAlgoBase::~OpAlgoBase()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoBase::init()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoBase called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoBase validation failed; all tensor parameters "
|
||||
"must be initialised.");
|
||||
}
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase fetching spirv data");
|
||||
|
||||
std::vector<uint32_t> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoBase::record()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
tensor->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
}
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup[0],
|
||||
this->mKomputeWorkgroup[1],
|
||||
this->mKomputeWorkgroup[2]);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoBase::preEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoBase::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase postSubmit called");
|
||||
}
|
||||
|
||||
std::vector<uint32_t>
|
||||
OpAlgoBase::fetchSpirvBinaryData()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase Running fetchSpirvBinaryData");
|
||||
|
||||
if (this->mShaderFilePath.size()) {
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase Reading data from file path");
|
||||
|
||||
std::ifstream fileStream(this->mShaderFilePath,
|
||||
std::ios::binary | std::ios::in |
|
||||
std::ios::ate);
|
||||
|
||||
if (!fileStream.good()) {
|
||||
throw std::runtime_error("Error reading file: " +
|
||||
this->mShaderFilePath);
|
||||
}
|
||||
|
||||
size_t shaderFileSize = fileStream.tellg();
|
||||
fileStream.seekg(0, std::ios::beg);
|
||||
char* shaderDataRaw = new char[shaderFileSize];
|
||||
fileStream.read(shaderDataRaw, shaderFileSize);
|
||||
fileStream.close();
|
||||
|
||||
KP_LOG_WARN("Kompute OpAlgoBase fetched {} bytes", shaderFileSize);
|
||||
|
||||
return std::vector<uint32_t>((uint32_t*)shaderDataRaw, (uint32_t*)(shaderDataRaw + shaderFileSize));
|
||||
} else if (this->mShaderDataRaw.size()) {
|
||||
KP_LOG_DEBUG("Kompute OpAlgoBase Reading data from data provided");
|
||||
return this->mShaderDataRaw;
|
||||
} else {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither "
|
||||
"filepath nor data provided");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
54
src/OpAlgoDispatch.cpp
Normal file
54
src/OpAlgoDispatch.cpp
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const kp::Constants& pushConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
this->mAlgorithm = algorithm;
|
||||
this->mPushConstants = pushConstants;
|
||||
}
|
||||
|
||||
OpAlgoDispatch::~OpAlgoDispatch()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
for (const std::shared_ptr<Tensor>& tensor :
|
||||
this->mAlgorithm->getTensors()) {
|
||||
tensor->recordBufferMemoryBarrier(
|
||||
commandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
}
|
||||
|
||||
this->mAlgorithm->bindCore(commandBuffer);
|
||||
this->mAlgorithm->bindPush(commandBuffer, this->mPushConstants);
|
||||
this->mAlgorithm->recordDispatch(commandBuffer);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::preEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::postEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch postSubmit called");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,122 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoLhsRhsOut::OpAlgoLhsRhsOut()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base");
|
||||
}
|
||||
|
||||
OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
const Workgroup& komputeWorkgroup)
|
||||
// The inheritance is initialised with the copyOutputData to false given that
|
||||
// this depencendant class handles the transfer of data via staging buffers in
|
||||
// a granular way.
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
|
||||
}
|
||||
|
||||
OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoLhsRhsOut::init()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
|
||||
|
||||
if (this->mTensors.size() < 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut called with less than 1 tensor");
|
||||
} else if (this->mTensors.size() > 3) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors");
|
||||
}
|
||||
|
||||
this->mTensorLHS = this->mTensors[0];
|
||||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
this->mTensorOutput->isInit())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. "
|
||||
"LHS: " +
|
||||
std::to_string(this->mTensorLHS->isInit()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->isInit()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->isInit()));
|
||||
}
|
||||
|
||||
if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
|
||||
this->mTensorRHS->size() == this->mTensorOutput->size())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size "
|
||||
"LHS: " +
|
||||
std::to_string(this->mTensorLHS->size()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->size()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->size()));
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
|
||||
|
||||
std::vector<uint32_t> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoLhsRhsOut::record()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup[0],
|
||||
this->mKomputeWorkgroup[1],
|
||||
this->mKomputeWorkgroup[2]);
|
||||
|
||||
// Barrier to ensure the shader code is executed before buffer read
|
||||
this->mTensorOutput->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
|
||||
if (this->mTensorOutput->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensorOutput->recordCopyFromDeviceToStaging(this->mCommandBuffer,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoLhsRhsOut::postEval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
|
||||
|
||||
this->mTensorOutput->mapDataFromHostMemory();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -3,18 +3,16 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
OpTensorCopy::OpTensorCopy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy constructor base");
|
||||
}
|
||||
|
||||
OpTensorCopy::OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params");
|
||||
|
||||
this->mTensors = tensors;
|
||||
|
||||
if (this->mTensors.size() < 2) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCopy called with less than 2 tensor");
|
||||
}
|
||||
}
|
||||
|
||||
OpTensorCopy::~OpTensorCopy()
|
||||
|
|
@ -23,48 +21,25 @@ OpTensorCopy::~OpTensorCopy()
|
|||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::init()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy init called");
|
||||
|
||||
if (this->mTensors.size() < 2) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCopy called with less than 2 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCopy tensor parameter has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
throw std::runtime_error("Kompute OpTensorCopy tensor parameter is "
|
||||
"of TensorTypes::eStorage and hence "
|
||||
"cannot be used to receive or pass data.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::record()
|
||||
OpTensorCopy::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy record called");
|
||||
|
||||
// We iterate from the second tensor onwards and record a copy to all
|
||||
for (size_t i = 1; i < this->mTensors.size(); i++) {
|
||||
this->mTensors[i]->recordCopyFrom(
|
||||
this->mCommandBuffer, this->mTensors[0], false);
|
||||
commandBuffer, this->mTensors[0], false);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::preEval()
|
||||
OpTensorCopy::preEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::postEval()
|
||||
OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");
|
||||
|
||||
|
|
|
|||
|
|
@ -1,69 +1,43 @@
|
|||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorSyncDevice::OpTensorSyncDevice()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor base");
|
||||
}
|
||||
|
||||
OpTensorSyncDevice::OpTensorSyncDevice(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
|
||||
|
||||
if (tensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncDevice called with less than 1 tensor");
|
||||
}
|
||||
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
OpTensorSyncDevice::~OpTensorSyncDevice()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started");
|
||||
|
||||
this->mTensors.clear();
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::init()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncDevice called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error("Kompute OpTensorSyncDevice: Tensor param "
|
||||
"has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute OpTensorSyncLocal tensor parameter is of type "
|
||||
"TensorTypes::eStorage and hence cannot be used to receive or "
|
||||
"pass data.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::record()
|
||||
OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice record called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(
|
||||
this->mCommandBuffer, false);
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer,
|
||||
false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::preEval()
|
||||
OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
|
||||
|
||||
|
|
@ -76,7 +50,7 @@ OpTensorSyncDevice::preEval()
|
|||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::postEval()
|
||||
OpTensorSyncDevice::postEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice postEval called");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,19 +5,17 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
OpTensorSyncLocal::OpTensorSyncLocal()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor base");
|
||||
}
|
||||
|
||||
OpTensorSyncLocal::OpTensorSyncLocal(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors)
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
|
||||
|
||||
if (tensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal called with less than 1 tensor");
|
||||
}
|
||||
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
OpTensorSyncLocal::~OpTensorSyncLocal()
|
||||
|
|
@ -26,50 +24,26 @@ OpTensorSyncLocal::~OpTensorSyncLocal()
|
|||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::init()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal: Tensor has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute OpTensorSyncLocal tensor parameter is of type "
|
||||
"TensorTypes::eStorage and hence cannot be used to receive or "
|
||||
"pass data.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::record()
|
||||
OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal record called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFromDeviceToStaging(
|
||||
this->mCommandBuffer, true);
|
||||
this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer,
|
||||
true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::preEval()
|
||||
OpTensorSyncLocal::preEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::postEval()
|
||||
OpTensorSyncLocal::postEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called");
|
||||
|
||||
|
|
|
|||
202
src/Sequence.cpp
202
src/Sequence.cpp
|
|
@ -3,12 +3,6 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
Sequence::Sequence()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence base constructor");
|
||||
this->mIsInit = false;
|
||||
}
|
||||
|
||||
Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::Queue> computeQueue,
|
||||
|
|
@ -20,126 +14,92 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
this->mDevice = device;
|
||||
this->mComputeQueue = computeQueue;
|
||||
this->mQueueIndex = queueIndex;
|
||||
this->mIsInit = false;
|
||||
|
||||
this->createCommandPool();
|
||||
this->createCommandBuffer();
|
||||
}
|
||||
|
||||
Sequence::~Sequence()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence Destructor started");
|
||||
|
||||
if (!this->mIsInit) {
|
||||
KP_LOG_INFO("Kompute Sequence destructor called but sequence is not "
|
||||
"initialized so no need to removing GPU resources.");
|
||||
return;
|
||||
} else {
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::init()
|
||||
{
|
||||
this->createCommandPool();
|
||||
this->createCommandBuffer();
|
||||
this->mIsInit = true;
|
||||
}
|
||||
|
||||
bool
|
||||
Sequence::begin()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute sequence called BEGIN");
|
||||
|
||||
if (this->isRecording()) {
|
||||
KP_LOG_WARN("Kompute Sequence begin called when already recording");
|
||||
return false;
|
||||
KP_LOG_DEBUG("Kompute Sequence begin called when already recording");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->isRunning()) {
|
||||
KP_LOG_WARN(
|
||||
throw std::runtime_error(
|
||||
"Kompute Sequence begin called when sequence still running");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!this->mCommandPool) {
|
||||
throw std::runtime_error("Kompute Sequence command pool is null");
|
||||
}
|
||||
|
||||
if (this->mOperations.size()) {
|
||||
KP_LOG_INFO("Kompute Sequence clearing previous operations");
|
||||
this->mOperations.clear();
|
||||
}
|
||||
|
||||
if (!this->mRecording) {
|
||||
KP_LOG_INFO("Kompute Sequence command recording BEGIN");
|
||||
this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
|
||||
this->mRecording = true;
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute Sequence attempted to start command recording "
|
||||
"but recording already started");
|
||||
}
|
||||
return true;
|
||||
KP_LOG_INFO("Kompute Sequence command now started recording");
|
||||
this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
|
||||
this->mRecording = true;
|
||||
}
|
||||
|
||||
bool
|
||||
void
|
||||
Sequence::end()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence calling END");
|
||||
|
||||
if (!this->isRecording()) {
|
||||
KP_LOG_WARN("Kompute Sequence end called when not recording");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!this->mCommandPool) {
|
||||
throw std::runtime_error("Kompute Sequence command pool is null");
|
||||
}
|
||||
|
||||
if (this->mRecording) {
|
||||
return;
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Sequence command recording END");
|
||||
this->mCommandBuffer->end();
|
||||
this->mRecording = false;
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute Sequence attempted to end command recording but "
|
||||
"recording not started");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
void
|
||||
Sequence::clear()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence calling clear");
|
||||
this->end();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::eval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute sequence EVAL BEGIN");
|
||||
|
||||
bool evalResult = this->evalAsync();
|
||||
if (!evalResult) {
|
||||
KP_LOG_DEBUG("Kompute sequence EVAL FAILURE");
|
||||
return false;
|
||||
}
|
||||
|
||||
evalResult = this->evalAwait();
|
||||
|
||||
KP_LOG_DEBUG("Kompute sequence EVAL SUCCESS");
|
||||
|
||||
return evalResult;
|
||||
return this->evalAsync()->evalAwait();
|
||||
}
|
||||
|
||||
bool
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::eval(std::shared_ptr<OpBase> op)
|
||||
{
|
||||
this->clear();
|
||||
return this->record(op)->eval();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAsync()
|
||||
{
|
||||
if (this->isRecording()) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAsync called when still recording");
|
||||
return false;
|
||||
this->end();
|
||||
}
|
||||
|
||||
if (this->mIsRunning) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAsync called when an eval async was "
|
||||
"called without successful wait");
|
||||
return false;
|
||||
throw std::runtime_error(
|
||||
"Kompute Sequence evalAsync called when an eval async was "
|
||||
"called without successful wait");
|
||||
}
|
||||
|
||||
this->mIsRunning = true;
|
||||
|
||||
for (size_t i = 0; i < this->mOperations.size(); i++) {
|
||||
this->mOperations[i]->preEval();
|
||||
this->mOperations[i]->preEval(*this->mCommandBuffer);
|
||||
}
|
||||
|
||||
vk::SubmitInfo submitInfo(
|
||||
|
|
@ -152,15 +112,24 @@ Sequence::evalAsync()
|
|||
|
||||
this->mComputeQueue->submit(1, &submitInfo, this->mFence);
|
||||
|
||||
return true;
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
bool
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAsync(std::shared_ptr<OpBase> op)
|
||||
{
|
||||
this->clear();
|
||||
this->record(op);
|
||||
this->evalAsync();
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAwait(uint64_t waitFor)
|
||||
{
|
||||
if (!this->mIsRunning) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait called without existing eval");
|
||||
return false;
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
vk::Result result =
|
||||
|
|
@ -171,15 +140,16 @@ Sequence::evalAwait(uint64_t waitFor)
|
|||
this->mIsRunning = false;
|
||||
|
||||
if (result == vk::Result::eTimeout) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait timed out");
|
||||
return false;
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}",
|
||||
waitFor);
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < this->mOperations.size(); i++) {
|
||||
this->mOperations[i]->postEval();
|
||||
this->mOperations[i]->postEval(*this->mCommandBuffer);
|
||||
}
|
||||
|
||||
return true;
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -197,54 +167,51 @@ Sequence::isRecording()
|
|||
bool
|
||||
Sequence::isInit()
|
||||
{
|
||||
return this->mIsInit;
|
||||
return this->mDevice && this->mCommandPool && this->mCommandBuffer &&
|
||||
this->mComputeQueue;
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::freeMemoryDestroyGPUResources()
|
||||
Sequence::destroy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");
|
||||
|
||||
if (!this->mIsInit) {
|
||||
KP_LOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
|
||||
"but Sequence is not initialized so there's no relevant "
|
||||
"GPU resources.");
|
||||
return;
|
||||
}
|
||||
KP_LOG_DEBUG("Kompute Sequence destroy called");
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
|
||||
"with null Device pointer");
|
||||
this->mIsInit = false;
|
||||
KP_LOG_WARN("Kompute Sequence destroy called "
|
||||
"with null Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreeCommandBuffer) {
|
||||
KP_LOG_INFO("Freeing CommandBuffer");
|
||||
if (!this->mCommandBuffer) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute Sequence freeMemoryDestroyGPUResources called with null "
|
||||
"CommandPool pointer");
|
||||
this->mIsInit = false;
|
||||
KP_LOG_WARN("Kompute Sequence destroy called with null "
|
||||
"CommandPool pointer");
|
||||
return;
|
||||
}
|
||||
this->mDevice->freeCommandBuffers(
|
||||
*this->mCommandPool, 1, this->mCommandBuffer.get());
|
||||
|
||||
this->mCommandBuffer = nullptr;
|
||||
this->mFreeCommandBuffer = false;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence Freed CommandBuffer");
|
||||
}
|
||||
|
||||
if (this->mFreeCommandPool) {
|
||||
KP_LOG_INFO("Destroying CommandPool");
|
||||
if (this->mCommandPool == nullptr) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute Sequence freeMemoryDestroyGPUResources called with null "
|
||||
"CommandPool pointer");
|
||||
this->mIsInit = false;
|
||||
KP_LOG_WARN("Kompute Sequence destroy called with null "
|
||||
"CommandPool pointer");
|
||||
return;
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mCommandPool,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
|
||||
this->mCommandPool = nullptr;
|
||||
this->mFreeCommandPool = false;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence Destroyed CommandPool");
|
||||
}
|
||||
|
||||
|
|
@ -253,7 +220,32 @@ Sequence::freeMemoryDestroyGPUResources()
|
|||
this->mOperations.clear();
|
||||
}
|
||||
|
||||
this->mIsInit = false;
|
||||
if (this->mDevice) {
|
||||
this->mDevice = nullptr;
|
||||
}
|
||||
if (this->mPhysicalDevice) {
|
||||
this->mPhysicalDevice = nullptr;
|
||||
}
|
||||
if (this->mComputeQueue) {
|
||||
this->mComputeQueue = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::record(std::shared_ptr<OpBase> op)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
this->begin();
|
||||
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Sequence running record on OpBase derived class instance");
|
||||
|
||||
op->record(*this->mCommandBuffer);
|
||||
|
||||
this->mOperations.push_back(op);
|
||||
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -5,11 +5,13 @@
|
|||
namespace kp {
|
||||
|
||||
std::vector<uint32_t>
|
||||
Shader::compile_sources(const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string,std::string>> definitions,
|
||||
const TBuiltInResource& resources) {
|
||||
Shader::compile_sources(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string, std::string>> definitions,
|
||||
const TBuiltInResource& resources)
|
||||
{
|
||||
|
||||
// Initialize glslang library.
|
||||
glslang::InitializeProcess();
|
||||
|
|
@ -18,27 +20,32 @@ Shader::compile_sources(const std::vector<std::string>& sources,
|
|||
const EShLanguage language = EShLangCompute;
|
||||
glslang::TShader shader(language);
|
||||
|
||||
std::vector<const char*> filesCStr(files.size()), sourcesCStr(sources.size());
|
||||
for (size_t i = 0; i < sources.size(); i++) sourcesCStr[i] = sources[i].c_str();
|
||||
std::vector<const char*> filesCStr(files.size()),
|
||||
sourcesCStr(sources.size());
|
||||
for (size_t i = 0; i < sources.size(); i++)
|
||||
sourcesCStr[i] = sources[i].c_str();
|
||||
|
||||
if (files.size() > 1) {
|
||||
assert(files.size() == sources.size());
|
||||
for (size_t i = 0; i < files.size(); i++) filesCStr[i] = files[i].c_str();
|
||||
shader.setStringsWithLengthsAndNames(sourcesCStr.data(), nullptr, filesCStr.data(), filesCStr.size());
|
||||
}
|
||||
else {
|
||||
filesCStr = {""};
|
||||
shader.setStringsWithLengthsAndNames(sourcesCStr.data(), nullptr, filesCStr.data(), sourcesCStr.size());
|
||||
for (size_t i = 0; i < files.size(); i++)
|
||||
filesCStr[i] = files[i].c_str();
|
||||
shader.setStringsWithLengthsAndNames(
|
||||
sourcesCStr.data(), nullptr, filesCStr.data(), filesCStr.size());
|
||||
} else {
|
||||
filesCStr = { "" };
|
||||
shader.setStringsWithLengthsAndNames(
|
||||
sourcesCStr.data(), nullptr, filesCStr.data(), sourcesCStr.size());
|
||||
}
|
||||
|
||||
shader.setEntryPoint(entryPoint.c_str());
|
||||
shader.setSourceEntryPoint(entryPoint.c_str());
|
||||
|
||||
std::string info_log = "";
|
||||
const EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgVulkanRules | EShMsgSpvRules);
|
||||
if (!shader.parse(&resources, 100, false, messages))
|
||||
{
|
||||
info_log = std::string(shader.getInfoLog()) + "\n" + std::string(shader.getInfoDebugLog());
|
||||
const EShMessages messages = static_cast<EShMessages>(
|
||||
EShMsgDefault | EShMsgVulkanRules | EShMsgSpvRules);
|
||||
if (!shader.parse(&resources, 100, false, messages)) {
|
||||
info_log = std::string(shader.getInfoLog()) + "\n" +
|
||||
std::string(shader.getInfoDebugLog());
|
||||
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
|
||||
throw std::runtime_error(info_log);
|
||||
}
|
||||
|
|
@ -47,24 +54,23 @@ Shader::compile_sources(const std::vector<std::string>& sources,
|
|||
glslang::TProgram program;
|
||||
program.addShader(&shader);
|
||||
// Link program.
|
||||
if (!program.link(messages))
|
||||
{
|
||||
info_log = std::string(program.getInfoLog()) + "\n" + std::string(program.getInfoDebugLog());
|
||||
if (!program.link(messages)) {
|
||||
info_log = std::string(program.getInfoLog()) + "\n" +
|
||||
std::string(program.getInfoDebugLog());
|
||||
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
|
||||
throw std::runtime_error(info_log);
|
||||
}
|
||||
|
||||
// Save any info log that was generated.
|
||||
if (shader.getInfoLog())
|
||||
{
|
||||
info_log += std::string(shader.getInfoLog()) + "\n" + std::string(shader.getInfoDebugLog()) + "\n";
|
||||
if (shader.getInfoLog()) {
|
||||
info_log += std::string(shader.getInfoLog()) + "\n" +
|
||||
std::string(shader.getInfoDebugLog()) + "\n";
|
||||
KP_LOG_INFO("Kompute Shader Information: {}", info_log);
|
||||
}
|
||||
|
||||
glslang::TIntermediate *intermediate = program.getIntermediate(language);
|
||||
glslang::TIntermediate* intermediate = program.getIntermediate(language);
|
||||
// Translate to SPIRV.
|
||||
if (!intermediate)
|
||||
{
|
||||
if (!intermediate) {
|
||||
info_log += "Failed to get shared intermediate code.\n";
|
||||
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
|
||||
throw std::runtime_error(info_log);
|
||||
|
|
@ -74,8 +80,7 @@ Shader::compile_sources(const std::vector<std::string>& sources,
|
|||
std::vector<std::uint32_t> spirv;
|
||||
glslang::GlslangToSpv(*intermediate, spirv, &logger);
|
||||
|
||||
if (shader.getInfoLog())
|
||||
{
|
||||
if (shader.getInfoLog()) {
|
||||
info_log += logger.getAllMessages() + "\n";
|
||||
KP_LOG_DEBUG("Kompute Shader all result messages: {}", info_log);
|
||||
}
|
||||
|
|
@ -87,11 +92,17 @@ Shader::compile_sources(const std::vector<std::string>& sources,
|
|||
}
|
||||
|
||||
std::vector<uint32_t>
|
||||
Shader::compile_source(const std::string& source,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string,std::string>> definitions,
|
||||
const TBuiltInResource& resource) {
|
||||
return compile_sources({source}, std::vector<std::string>({}), entryPoint, definitions, resource);
|
||||
Shader::compile_source(
|
||||
const std::string& source,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string, std::string>> definitions,
|
||||
const TBuiltInResource& resource)
|
||||
{
|
||||
return compile_sources({ source },
|
||||
std::vector<std::string>({}),
|
||||
entryPoint,
|
||||
definitions,
|
||||
resource);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
180
src/Tensor.cpp
180
src/Tensor.cpp
|
|
@ -3,23 +3,19 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
Tensor::Tensor()
|
||||
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
const TensorTypes& tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor base constructor");
|
||||
this->mTensorType = TensorTypes::eDevice;
|
||||
}
|
||||
|
||||
Tensor::Tensor(const std::vector<float>& data, TensorTypes tensorType)
|
||||
{
|
||||
#if DEBUG
|
||||
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
|
||||
data.size(),
|
||||
tensorType);
|
||||
#endif
|
||||
|
||||
this->mData = data;
|
||||
this->mShape = { static_cast<uint32_t>(data.size()) };
|
||||
this->mTensorType = tensorType;
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
|
||||
this->rebuild(data, tensorType);
|
||||
}
|
||||
|
||||
Tensor::~Tensor()
|
||||
|
|
@ -27,25 +23,24 @@ Tensor::~Tensor()
|
|||
KP_LOG_DEBUG("Kompute Tensor destructor started. Type: {}",
|
||||
this->tensorType());
|
||||
|
||||
if (this->isInit()) {
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
this->destroy();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor destructor success");
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device)
|
||||
Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor running init with Vulkan params and num data "
|
||||
"elementS: {}",
|
||||
this->mData.size());
|
||||
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size());
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mData = data;
|
||||
this->mTensorType = tensorType;
|
||||
|
||||
this->mIsInit = true;
|
||||
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Tensor destroying existing resources before rebuild");
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
this->allocateMemoryCreateGPUResources();
|
||||
}
|
||||
|
|
@ -71,13 +66,7 @@ Tensor::memorySize()
|
|||
uint32_t
|
||||
Tensor::size()
|
||||
{
|
||||
return this->mShape[0];
|
||||
}
|
||||
|
||||
std::array<uint32_t, KP_MAX_DIM_SIZE>
|
||||
Tensor::shape()
|
||||
{
|
||||
return this->mShape;
|
||||
return static_cast<uint32_t>(this->mData.size());
|
||||
}
|
||||
|
||||
Tensor::TensorTypes
|
||||
|
|
@ -89,7 +78,7 @@ Tensor::tensorType()
|
|||
bool
|
||||
Tensor::isInit()
|
||||
{
|
||||
return this->mIsInit && this->mPrimaryBuffer && this->mPrimaryMemory;
|
||||
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -103,7 +92,7 @@ Tensor::setData(const std::vector<float>& data)
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFrom(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<Tensor> copyFromTensor,
|
||||
bool createBarrier)
|
||||
{
|
||||
|
|
@ -113,65 +102,58 @@ Tensor::recordCopyFrom(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
|||
|
||||
KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize);
|
||||
|
||||
this->copyBuffer(commandBuffer,
|
||||
copyFromTensor->mPrimaryBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
copyFromTensor->mPrimaryBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFromStagingToDevice(
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier)
|
||||
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier)
|
||||
{
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(0, 0, bufferSize);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
|
||||
|
||||
this->copyBuffer(commandBuffer,
|
||||
this->mStagingBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
this->mStagingBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFromDeviceToStaging(
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier)
|
||||
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier)
|
||||
{
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(0, 0, bufferSize);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
|
||||
|
||||
this->copyBuffer(commandBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
this->mStagingBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
this->mStagingBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier)
|
||||
Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier)
|
||||
{
|
||||
|
||||
if (!this->mIsInit) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Tensor attempted to run copyBuffer without init");
|
||||
}
|
||||
|
||||
commandBuffer->copyBuffer(*bufferFrom, *bufferTo, copyRegion);
|
||||
commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion);
|
||||
|
||||
if (createBarrier) {
|
||||
// Buffer to ensure wait until data is copied to staging buffer
|
||||
|
|
@ -184,12 +166,11 @@ Tensor::copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::recordBufferMemoryBarrier(
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask)
|
||||
Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor recording buffer memory barrier");
|
||||
|
||||
|
|
@ -203,12 +184,12 @@ Tensor::recordBufferMemoryBarrier(
|
|||
bufferMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
bufferMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
|
||||
commandBuffer->pipelineBarrier(srcStageMask,
|
||||
dstStageMask,
|
||||
vk::DependencyFlags(),
|
||||
nullptr,
|
||||
bufferMemoryBarrier,
|
||||
nullptr);
|
||||
commandBuffer.pipelineBarrier(srcStageMask,
|
||||
dstStageMask,
|
||||
vk::DependencyFlags(),
|
||||
nullptr,
|
||||
bufferMemoryBarrier,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
vk::DescriptorBufferInfo
|
||||
|
|
@ -344,11 +325,6 @@ Tensor::allocateMemoryCreateGPUResources()
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor creating buffer");
|
||||
|
||||
if (!this->mIsInit) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Tensor attempted to run createBuffer without init");
|
||||
}
|
||||
|
||||
if (!this->mPhysicalDevice) {
|
||||
throw std::runtime_error("Kompute Tensor phyisical device is null");
|
||||
}
|
||||
|
|
@ -455,71 +431,77 @@ Tensor::allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::freeMemoryDestroyGPUResources()
|
||||
Tensor::destroy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources");
|
||||
|
||||
this->mIsInit = false;
|
||||
KP_LOG_DEBUG("Kompute Tensor started destroy()");
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_ERROR(
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor destructor reached with null Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreePrimaryBuffer) {
|
||||
if (!this->mPrimaryBuffer) {
|
||||
KP_LOG_ERROR("Kompose Tensor expected to destroy primary buffer "
|
||||
"but got null buffer");
|
||||
KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer "
|
||||
"but got null buffer");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompose Tensor destroying primary buffer");
|
||||
this->mDevice->destroy(
|
||||
*this->mPrimaryBuffer,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPrimaryBuffer = nullptr;
|
||||
this->mFreePrimaryBuffer = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (this->mFreeStagingBuffer) {
|
||||
if (!this->mStagingBuffer) {
|
||||
KP_LOG_ERROR("Kompose Tensor expected to destroy staging buffer "
|
||||
"but got null buffer");
|
||||
KP_LOG_WARN("Kompose Tensor expected to destroy staging buffer "
|
||||
"but got null buffer");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompose Tensor destroying staging buffer");
|
||||
this->mDevice->destroy(
|
||||
*this->mStagingBuffer,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mStagingBuffer = nullptr;
|
||||
this->mFreeStagingBuffer = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (this->mFreePrimaryMemory) {
|
||||
if (!this->mPrimaryMemory) {
|
||||
KP_LOG_ERROR("Kompose Tensor expected to free primary memory but "
|
||||
"got null memory");
|
||||
KP_LOG_WARN("Kompose Tensor expected to free primary memory but "
|
||||
"got null memory");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompose Tensor freeing primary memory");
|
||||
this->mDevice->freeMemory(
|
||||
*this->mPrimaryMemory,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPrimaryMemory = nullptr;
|
||||
this->mFreePrimaryMemory = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (this->mFreeStagingMemory) {
|
||||
if (!this->mStagingMemory) {
|
||||
KP_LOG_ERROR("Kompose Tensor expected to free staging memory but "
|
||||
"got null memory");
|
||||
KP_LOG_WARN("Kompose Tensor expected to free staging memory but "
|
||||
"got null memory");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompose Tensor freeing staging memory");
|
||||
this->mDevice->freeMemory(
|
||||
*this->mStagingMemory,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mStagingMemory = nullptr;
|
||||
this->mFreeStagingMemory = false;
|
||||
}
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources");
|
||||
if (this->mDevice) {
|
||||
this->mDevice = nullptr;
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor successful destroy()");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,13 +12,7 @@ namespace kp {
|
|||
*/
|
||||
class Algorithm
|
||||
{
|
||||
public:
|
||||
/**
|
||||
Base constructor for Algorithm. Should not be used unless explicit
|
||||
intended.
|
||||
*/
|
||||
Algorithm();
|
||||
|
||||
public:
|
||||
/**
|
||||
* Default constructor for Algorithm
|
||||
*
|
||||
|
|
@ -27,7 +21,9 @@ public:
|
|||
* shaders
|
||||
*/
|
||||
Algorithm(std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
/**
|
||||
|
|
@ -36,11 +32,13 @@ public:
|
|||
*
|
||||
* @param shaderFileData The bytes in spir-v format of the shader
|
||||
* @tensorParams The Tensors to be used in the Algorithm / shader for
|
||||
* @specalizationInstalces The specialization parameters to pass to the function
|
||||
* processing
|
||||
* @specalizationInstalces The specialization parameters to pass to the
|
||||
* function processing
|
||||
*/
|
||||
void init(const std::vector<uint32_t>& shaderFileData,
|
||||
std::vector<std::shared_ptr<Tensor>> tensorParams);
|
||||
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
|
|
@ -56,12 +54,27 @@ public:
|
|||
* @param y Layout Y dispatch value
|
||||
* @param z Layout Z dispatch value
|
||||
*/
|
||||
void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1);
|
||||
void recordDispatch(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
private:
|
||||
void bindCore(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
void bindPush(const vk::CommandBuffer& commandBuffer,
|
||||
const Constants& pushConstants);
|
||||
|
||||
bool isInit();
|
||||
|
||||
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
|
||||
|
||||
const Workgroup& getWorkgroup();
|
||||
const Constants& getSpecializationConstants();
|
||||
const std::vector<std::shared_ptr<Tensor>>& getTensors();
|
||||
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
|
|
@ -80,15 +93,18 @@ private:
|
|||
bool mFreePipeline = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<uint32_t> mSpirv;
|
||||
Constants mSpecializationConstants;
|
||||
Workgroup mWorkgroup;
|
||||
|
||||
bool mIsInit;
|
||||
|
||||
// Create util functions
|
||||
void createShaderModule(const std::vector<uint32_t>& shaderFileData);
|
||||
void createShaderModule();
|
||||
void createPipeline();
|
||||
|
||||
// Parameters
|
||||
void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createDescriptorPool();
|
||||
void createParameters();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -60,12 +60,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
|
|||
#define KP_LOG_DEBUG(...)
|
||||
#else
|
||||
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
((void)__android_log_print(ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#elif defined(KOMPUTE_BUILD_PYTHON)
|
||||
#define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_DEBUG(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#endif // SPDLOG_ACTIVE_LEVEL > 1
|
||||
|
||||
|
|
@ -73,12 +80,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
|
|||
#define KP_LOG_INFO(...)
|
||||
#else
|
||||
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
#define KP_LOG_INFO(...) \
|
||||
((void)__android_log_print(ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#define KP_LOG_INFO(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#elif defined(KOMPUTE_BUILD_PYTHON)
|
||||
#define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_INFO(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
|
||||
#define KP_LOG_INFO(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#endif // SPDLOG_ACTIVE_LEVEL > 2
|
||||
|
||||
|
|
@ -86,12 +100,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
|
|||
#define KP_LOG_WARN(...)
|
||||
#else
|
||||
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
#define KP_LOG_WARN(...) \
|
||||
((void)__android_log_print(ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#define KP_LOG_WARN(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#elif defined(KOMPUTE_BUILD_PYTHON)
|
||||
#define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_WARN(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
|
||||
#define KP_LOG_WARN(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#endif // SPDLOG_ACTIVE_LEVEL > 3
|
||||
|
||||
|
|
@ -99,12 +120,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
|
|||
#define KP_LOG_ERROR(...)
|
||||
#else
|
||||
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
#define KP_LOG_ERROR(...) \
|
||||
((void)__android_log_print(ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#define KP_LOG_ERROR(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#elif defined(KOMPUTE_BUILD_PYTHON)
|
||||
#define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_ERROR(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
|
||||
#define KP_LOG_ERROR(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#endif // SPDLOG_ACTIVE_LEVEL > 4
|
||||
#endif // KOMPUTE_SPDLOG_ENABLED
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
|
||||
#include "kompute/Sequence.hpp"
|
||||
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
|
||||
#define KP_DEFAULT_SESSION "DEFAULT"
|
||||
|
||||
namespace kp {
|
||||
|
|
@ -30,6 +28,8 @@ class Manager
|
|||
* they would like to create the resources on.
|
||||
*
|
||||
* @param physicalDeviceIndex The index of the physical device to use
|
||||
* @param manageResources (Optional) Whether to manage the memory of the
|
||||
* resources created and destroy when the manager is destroyed.
|
||||
* @param familyQueueIndices (Optional) List of queue indices to add for
|
||||
* explicit allocation
|
||||
* @param totalQueues The total number of compute queues to create.
|
||||
|
|
@ -48,8 +48,7 @@ class Manager
|
|||
*/
|
||||
Manager(std::shared_ptr<vk::Instance> instance,
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
uint32_t physicalDeviceIndex);
|
||||
std::shared_ptr<vk::Device> device);
|
||||
|
||||
/**
|
||||
* Manager destructor which would ensure all owned resources are destroyed
|
||||
|
|
@ -67,150 +66,7 @@ class Manager
|
|||
* @param queueIndex The queue to use from the available queues
|
||||
* @return Shared pointer to the manager owned sequence resource
|
||||
*/
|
||||
std::shared_ptr<Sequence> sequence(
|
||||
std::string sequenceName = KP_DEFAULT_SESSION,
|
||||
uint32_t queueIndex = 0);
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against named sequence.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param sequenceName The name of the sequence to be retrieved or created
|
||||
* @param TArgs Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::string sequenceName,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp triggered");
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
this->sequence(sequenceName);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence RECORD");
|
||||
sq->record<T>(tensors, std::forward<TArgs>(params)...);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence END");
|
||||
sq->end();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence EVAL");
|
||||
sq->eval();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS");
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against a newly created sequence.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param TArgs Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOpDefault(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOp Default triggered");
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->evalOp<T>(
|
||||
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that evaluates operation against named sequence asynchronously.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param sequenceName The name of the sequence to be retrieved or created
|
||||
* @param params Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOpAsync(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::string sequenceName,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync triggered");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
this->sequence(sequenceName);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD");
|
||||
sq->record<T>(tensors, std::forward<TArgs>(params)...);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence END");
|
||||
sq->end();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL");
|
||||
sq->evalAsync();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS");
|
||||
}
|
||||
|
||||
/**
|
||||
* Operation that evaluates operation against default sequence
|
||||
* asynchronously.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param params Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOpAsyncDefault(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
|
||||
this->mCurrentSequenceIndex++;
|
||||
this->evalOpAsync<T>(
|
||||
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
|
||||
}
|
||||
|
||||
/**
|
||||
* Operation that awaits for named sequence to finish.
|
||||
*
|
||||
* @param sequenceName The name of the sequence to wait for termination
|
||||
* @param waitFor The amount of time to wait before timing out
|
||||
*/
|
||||
void evalOpAwait(std::string sequenceName, uint64_t waitFor = UINT64_MAX)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAwait triggered with sequence {}",
|
||||
sequenceName);
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
|
||||
found = this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found != this->mManagedSequences.end()) {
|
||||
if (std::shared_ptr<kp::Sequence> sq = found->second) {
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAwait running sequence "
|
||||
"Sequence EVAL AWAIT");
|
||||
if (sq->isRunning()) {
|
||||
sq->evalAwait(waitFor);
|
||||
}
|
||||
}
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Manager evalOpAwait running sequence SUCCESS");
|
||||
} else {
|
||||
KP_LOG_ERROR("Kompute Manager evalOpAwait Sequence not found");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Operation that awaits for default sequence to finish.
|
||||
*
|
||||
* @param tensors The tensors to be used in the operation recorded
|
||||
* @param params Template parameters that will be used to initialise
|
||||
* Operation to allow for extensible configurations on initialisation
|
||||
*/
|
||||
void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
|
||||
this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
|
||||
}
|
||||
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0);
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor creation and
|
||||
|
|
@ -225,102 +81,34 @@ class Manager
|
|||
*/
|
||||
std::shared_ptr<Tensor> tensor(
|
||||
const std::vector<float>& data,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
|
||||
bool syncDataToGPU = true);
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
* create a new Tensor. The tensor memory will then be managed and owned by
|
||||
* the manager.
|
||||
*
|
||||
* @param tensors Array of tensors to rebuild
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
*/
|
||||
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
|
||||
bool syncDataToGPU = true);
|
||||
std::shared_ptr<Algorithm> algorithm(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
/**
|
||||
* Function that simplifies the common workflow of tensor initialisation. It
|
||||
* will take the constructor parameters for a Tensor and will will us it to
|
||||
* create a new Tensor. The tensor memory will then be managed and owned by
|
||||
* the manager.
|
||||
*
|
||||
* @param tensors Single tensor to rebuild
|
||||
* @param syncDataToGPU Whether to sync the data to GPU memory
|
||||
*/
|
||||
void rebuild(std::shared_ptr<kp::Tensor> tensor,
|
||||
bool syncDataToGPU = true);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* single tensor.
|
||||
*
|
||||
* @param tensors Single tensor to rebuild
|
||||
*/
|
||||
void destroy(std::shared_ptr<kp::Tensor> tensor);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* vector of tensors.
|
||||
*
|
||||
* @param tensors Single tensor to rebuild
|
||||
*/
|
||||
void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* vector of sequences. Destroying by sequence name is more efficent
|
||||
* and hence recommended instead of by object.
|
||||
*
|
||||
* @param sequences Vector for shared ptrs with sequences to destroy
|
||||
*/
|
||||
void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* single sequence. Destroying by sequence name is more efficent
|
||||
* and hence recommended instead of by object.
|
||||
*
|
||||
* @param sequences Single sequence to rebuild
|
||||
*/
|
||||
void destroy(std::shared_ptr<kp::Sequence> sequence);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* sequence by name.
|
||||
*
|
||||
* @param sequenceName Single name of named sequence to destroy
|
||||
*/
|
||||
void destroy(const std::string& sequenceName);
|
||||
|
||||
/**
|
||||
* Destroy owned Vulkan GPU resources and free GPU memory for
|
||||
* sequences using vector of named sequence names.
|
||||
*
|
||||
* @param sequenceName Vector of sequence names to destroy
|
||||
*/
|
||||
void destroy(const std::vector<std::string>& sequenceNames);
|
||||
void destroy();
|
||||
void clear();
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
uint32_t mPhysicalDeviceIndex = -1;
|
||||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
bool mFreeDevice = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::set<std::shared_ptr<Tensor>> mManagedTensors;
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>
|
||||
mManagedSequences;
|
||||
std::vector<std::weak_ptr<Tensor>> mManagedTensors;
|
||||
std::vector<std::weak_ptr<Sequence>> mManagedSequences;
|
||||
std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
|
||||
|
||||
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
||||
uint32_t mCurrentSequenceIndex = -1;
|
||||
bool mManageResources = false;
|
||||
|
||||
#if DEBUG
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
|
|
@ -331,7 +119,8 @@ class Manager
|
|||
|
||||
// Create functions
|
||||
void createInstance();
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {});
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
|
||||
uint32_t hysicalDeviceIndex = 0);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -9,14 +9,9 @@ namespace kp {
|
|||
/**
|
||||
* Container of operations that can be sent to GPU as batch
|
||||
*/
|
||||
class Sequence
|
||||
class Sequence : public std::enable_shared_from_this<Sequence>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor for Sequence. Should not be used unless explicit
|
||||
* intended.
|
||||
*/
|
||||
Sequence();
|
||||
/**
|
||||
* Main constructor for sequence which requires core vulkan components to
|
||||
* generate all dependent resources.
|
||||
|
|
@ -37,80 +32,8 @@ class Sequence
|
|||
~Sequence();
|
||||
|
||||
/**
|
||||
* Initialises sequence including the creation of the command pool and the
|
||||
* command buffer.
|
||||
*/
|
||||
void init();
|
||||
|
||||
/**
|
||||
* Begins recording commands for commands to be submitted into the command
|
||||
* buffer.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool begin();
|
||||
|
||||
/**
|
||||
* Ends the recording and stops recording commands when the record command
|
||||
* is sent.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool end();
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool eval();
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier. EvalAwait() must
|
||||
* be called after to ensure the sequence is terminated correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool evalAsync();
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
bool evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently in recording activated.
|
||||
*
|
||||
* @return Boolean stating if recording ongoing.
|
||||
*/
|
||||
bool isRecording();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently running - mostly used for async
|
||||
* workloads.
|
||||
*
|
||||
* @return Boolean stating if currently running.
|
||||
*/
|
||||
bool isRunning();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence has been successfully initialised.
|
||||
*
|
||||
* @return Boolean stating if sequence has been initialised.
|
||||
*/
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory
|
||||
* and sets the sequence as init=False.
|
||||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
|
|
@ -123,44 +46,194 @@ class Sequence
|
|||
* which allows for extensible configurations on initialisation.
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
bool record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
std::shared_ptr<Sequence> record(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
if (!this->isRecording()) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute sequence record attempted when not record BEGIN");
|
||||
return false;
|
||||
}
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
T* op = new T(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mCommandBuffer,
|
||||
tensors,
|
||||
std::forward<TArgs>(params)...);
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
|
||||
OpBase* baseOp = dynamic_cast<OpBase*>(op);
|
||||
|
||||
std::unique_ptr<OpBase> baseOpPtr{ baseOp };
|
||||
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Sequence running init on OpBase derived class instance");
|
||||
baseOpPtr->init();
|
||||
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Sequence running record on OpBase derived class instance");
|
||||
baseOpPtr->record();
|
||||
|
||||
mOperations.push_back(std::move(baseOpPtr));
|
||||
|
||||
return true;
|
||||
return this->record(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
// TODO: Aim to have only a single function with tensors/algorithm
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
// TODO: Aim to be able to handle errors when returning without throw
|
||||
// except
|
||||
return this->eval(op);
|
||||
}
|
||||
// Needded as otherise can't use initialiser list
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->eval(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier. EvalAwait() must
|
||||
* be called after to ensure the sequence is terminated correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> evalAsync(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
// Needed as otherwise it's not possible to use initializer lists
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Clear function clears all operations currently recorded and starts
|
||||
* recording again.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* Begins recording commands for commands to be submitted into the command
|
||||
* buffer.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
void begin();
|
||||
|
||||
/**
|
||||
* Ends the recording and stops recording commands when the record command
|
||||
* is sent.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
void end();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently in recording activated.
|
||||
*
|
||||
* @return Boolean stating if recording ongoing.
|
||||
*/
|
||||
bool isRecording();
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently running - mostly used for async
|
||||
* workloads.
|
||||
*
|
||||
* @return Boolean stating if currently running.
|
||||
*/
|
||||
bool isRunning();
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory
|
||||
* and sets the sequence as init=False.
|
||||
*/
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
|
|
@ -176,10 +249,9 @@ class Sequence
|
|||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
vk::Fence mFence;
|
||||
std::vector<std::unique_ptr<OpBase>> mOperations;
|
||||
std::vector<std::shared_ptr<OpBase>> mOperations;
|
||||
|
||||
// State
|
||||
bool mIsInit = false;
|
||||
bool mRecording = false;
|
||||
bool mIsRunning = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@
|
|||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <SPIRV/GlslangToSpv.h>
|
||||
#include <glslang/Include/ResourceLimits.h>
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include <SPIRV/GlslangToSpv.h>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
|
|
@ -16,161 +16,162 @@ namespace kp {
|
|||
// Has been adobted by:
|
||||
// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
|
||||
const TBuiltInResource defaultResource = {
|
||||
/* .MaxLights = */ 0,
|
||||
/* .MaxClipPlanes = */ 0,
|
||||
/* .MaxTextureUnits = */ 0,
|
||||
/* .MaxTextureCoords = */ 0,
|
||||
/* .MaxVertexAttribs = */ 64,
|
||||
/* .MaxVertexUniformComponents = */ 4096,
|
||||
/* .MaxVaryingFloats = */ 64,
|
||||
/* .MaxVertexTextureImageUnits = */ 0,
|
||||
/* .MaxCombinedTextureImageUnits = */ 0,
|
||||
/* .MaxTextureImageUnits = */ 0,
|
||||
/* .MaxFragmentUniformComponents = */ 0,
|
||||
/* .MaxDrawBuffers = */ 0,
|
||||
/* .MaxVertexUniformVectors = */ 128,
|
||||
/* .MaxVaryingVectors = */ 8,
|
||||
/* .MaxFragmentUniformVectors = */ 0,
|
||||
/* .MaxVertexOutputVectors = */ 16,
|
||||
/* .MaxFragmentInputVectors = */ 0,
|
||||
/* .MinProgramTexelOffset = */ -8,
|
||||
/* .MaxProgramTexelOffset = */ 7,
|
||||
/* .MaxClipDistances = */ 8,
|
||||
/* .MaxComputeWorkGroupCountX = */ 65535,
|
||||
/* .MaxComputeWorkGroupCountY = */ 65535,
|
||||
/* .MaxComputeWorkGroupCountZ = */ 65535,
|
||||
/* .MaxComputeWorkGroupSizeX = */ 1024,
|
||||
/* .MaxComputeWorkGroupSizeY = */ 1024,
|
||||
/* .MaxComputeWorkGroupSizeZ = */ 64,
|
||||
/* .MaxComputeUniformComponents = */ 1024,
|
||||
/* .MaxComputeTextureImageUnits = */ 16,
|
||||
/* .MaxComputeImageUniforms = */ 8,
|
||||
/* .MaxComputeAtomicCounters = */ 8,
|
||||
/* .MaxComputeAtomicCounterBuffers = */ 1,
|
||||
/* .MaxVaryingComponents = */ 60,
|
||||
/* .MaxVertexOutputComponents = */ 64,
|
||||
/* .MaxGeometryInputComponents = */ 64,
|
||||
/* .MaxGeometryOutputComponents = */ 128,
|
||||
/* .MaxFragmentInputComponents = */ 0,
|
||||
/* .MaxImageUnits = */ 0,
|
||||
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
|
||||
/* .MaxCombinedShaderOutputResources = */ 8,
|
||||
/* .MaxImageSamples = */ 0,
|
||||
/* .MaxVertexImageUniforms = */ 0,
|
||||
/* .MaxTessControlImageUniforms = */ 0,
|
||||
/* .MaxTessEvaluationImageUniforms = */ 0,
|
||||
/* .MaxGeometryImageUniforms = */ 0,
|
||||
/* .MaxFragmentImageUniforms = */ 0,
|
||||
/* .MaxCombinedImageUniforms = */ 0,
|
||||
/* .MaxGeometryTextureImageUnits = */ 0,
|
||||
/* .MaxGeometryOutputVertices = */ 256,
|
||||
/* .MaxGeometryTotalOutputComponents = */ 1024,
|
||||
/* .MaxGeometryUniformComponents = */ 1024,
|
||||
/* .MaxGeometryVaryingComponents = */ 64,
|
||||
/* .MaxTessControlInputComponents = */ 128,
|
||||
/* .MaxTessControlOutputComponents = */ 128,
|
||||
/* .MaxTessControlTextureImageUnits = */ 0,
|
||||
/* .MaxTessControlUniformComponents = */ 1024,
|
||||
/* .MaxTessControlTotalOutputComponents = */ 4096,
|
||||
/* .MaxTessEvaluationInputComponents = */ 128,
|
||||
/* .MaxTessEvaluationOutputComponents = */ 128,
|
||||
/* .MaxTessEvaluationTextureImageUnits = */ 16,
|
||||
/* .MaxTessEvaluationUniformComponents = */ 1024,
|
||||
/* .MaxTessPatchComponents = */ 120,
|
||||
/* .MaxPatchVertices = */ 32,
|
||||
/* .MaxTessGenLevel = */ 64,
|
||||
/* .MaxViewports = */ 16,
|
||||
/* .MaxVertexAtomicCounters = */ 0,
|
||||
/* .MaxTessControlAtomicCounters = */ 0,
|
||||
/* .MaxTessEvaluationAtomicCounters = */ 0,
|
||||
/* .MaxGeometryAtomicCounters = */ 0,
|
||||
/* .MaxFragmentAtomicCounters = */ 0,
|
||||
/* .MaxCombinedAtomicCounters = */ 8,
|
||||
/* .MaxAtomicCounterBindings = */ 1,
|
||||
/* .MaxVertexAtomicCounterBuffers = */ 0,
|
||||
/* .MaxTessControlAtomicCounterBuffers = */ 0,
|
||||
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
|
||||
/* .MaxGeometryAtomicCounterBuffers = */ 0,
|
||||
/* .MaxFragmentAtomicCounterBuffers = */ 0,
|
||||
/* .MaxCombinedAtomicCounterBuffers = */ 1,
|
||||
/* .MaxAtomicCounterBufferSize = */ 16384,
|
||||
/* .MaxTransformFeedbackBuffers = */ 4,
|
||||
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
|
||||
/* .MaxCullDistances = */ 8,
|
||||
/* .MaxCombinedClipAndCullDistances = */ 8,
|
||||
/* .MaxSamples = */ 4,
|
||||
/* .maxMeshOutputVerticesNV = */ 256,
|
||||
/* .maxMeshOutputPrimitivesNV = */ 512,
|
||||
/* .maxMeshWorkGroupSizeX_NV = */ 32,
|
||||
/* .maxMeshWorkGroupSizeY_NV = */ 1,
|
||||
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
|
||||
/* .maxTaskWorkGroupSizeX_NV = */ 32,
|
||||
/* .maxTaskWorkGroupSizeY_NV = */ 1,
|
||||
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
|
||||
/* .maxMeshViewCountNV = */ 4,
|
||||
/* .maxDualSourceDrawBuffersEXT = */ 1,
|
||||
/* .MaxLights = */ 0,
|
||||
/* .MaxClipPlanes = */ 0,
|
||||
/* .MaxTextureUnits = */ 0,
|
||||
/* .MaxTextureCoords = */ 0,
|
||||
/* .MaxVertexAttribs = */ 64,
|
||||
/* .MaxVertexUniformComponents = */ 4096,
|
||||
/* .MaxVaryingFloats = */ 64,
|
||||
/* .MaxVertexTextureImageUnits = */ 0,
|
||||
/* .MaxCombinedTextureImageUnits = */ 0,
|
||||
/* .MaxTextureImageUnits = */ 0,
|
||||
/* .MaxFragmentUniformComponents = */ 0,
|
||||
/* .MaxDrawBuffers = */ 0,
|
||||
/* .MaxVertexUniformVectors = */ 128,
|
||||
/* .MaxVaryingVectors = */ 8,
|
||||
/* .MaxFragmentUniformVectors = */ 0,
|
||||
/* .MaxVertexOutputVectors = */ 16,
|
||||
/* .MaxFragmentInputVectors = */ 0,
|
||||
/* .MinProgramTexelOffset = */ -8,
|
||||
/* .MaxProgramTexelOffset = */ 7,
|
||||
/* .MaxClipDistances = */ 8,
|
||||
/* .MaxComputeWorkGroupCountX = */ 65535,
|
||||
/* .MaxComputeWorkGroupCountY = */ 65535,
|
||||
/* .MaxComputeWorkGroupCountZ = */ 65535,
|
||||
/* .MaxComputeWorkGroupSizeX = */ 1024,
|
||||
/* .MaxComputeWorkGroupSizeY = */ 1024,
|
||||
/* .MaxComputeWorkGroupSizeZ = */ 64,
|
||||
/* .MaxComputeUniformComponents = */ 1024,
|
||||
/* .MaxComputeTextureImageUnits = */ 16,
|
||||
/* .MaxComputeImageUniforms = */ 8,
|
||||
/* .MaxComputeAtomicCounters = */ 8,
|
||||
/* .MaxComputeAtomicCounterBuffers = */ 1,
|
||||
/* .MaxVaryingComponents = */ 60,
|
||||
/* .MaxVertexOutputComponents = */ 64,
|
||||
/* .MaxGeometryInputComponents = */ 64,
|
||||
/* .MaxGeometryOutputComponents = */ 128,
|
||||
/* .MaxFragmentInputComponents = */ 0,
|
||||
/* .MaxImageUnits = */ 0,
|
||||
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
|
||||
/* .MaxCombinedShaderOutputResources = */ 8,
|
||||
/* .MaxImageSamples = */ 0,
|
||||
/* .MaxVertexImageUniforms = */ 0,
|
||||
/* .MaxTessControlImageUniforms = */ 0,
|
||||
/* .MaxTessEvaluationImageUniforms = */ 0,
|
||||
/* .MaxGeometryImageUniforms = */ 0,
|
||||
/* .MaxFragmentImageUniforms = */ 0,
|
||||
/* .MaxCombinedImageUniforms = */ 0,
|
||||
/* .MaxGeometryTextureImageUnits = */ 0,
|
||||
/* .MaxGeometryOutputVertices = */ 256,
|
||||
/* .MaxGeometryTotalOutputComponents = */ 1024,
|
||||
/* .MaxGeometryUniformComponents = */ 1024,
|
||||
/* .MaxGeometryVaryingComponents = */ 64,
|
||||
/* .MaxTessControlInputComponents = */ 128,
|
||||
/* .MaxTessControlOutputComponents = */ 128,
|
||||
/* .MaxTessControlTextureImageUnits = */ 0,
|
||||
/* .MaxTessControlUniformComponents = */ 1024,
|
||||
/* .MaxTessControlTotalOutputComponents = */ 4096,
|
||||
/* .MaxTessEvaluationInputComponents = */ 128,
|
||||
/* .MaxTessEvaluationOutputComponents = */ 128,
|
||||
/* .MaxTessEvaluationTextureImageUnits = */ 16,
|
||||
/* .MaxTessEvaluationUniformComponents = */ 1024,
|
||||
/* .MaxTessPatchComponents = */ 120,
|
||||
/* .MaxPatchVertices = */ 32,
|
||||
/* .MaxTessGenLevel = */ 64,
|
||||
/* .MaxViewports = */ 16,
|
||||
/* .MaxVertexAtomicCounters = */ 0,
|
||||
/* .MaxTessControlAtomicCounters = */ 0,
|
||||
/* .MaxTessEvaluationAtomicCounters = */ 0,
|
||||
/* .MaxGeometryAtomicCounters = */ 0,
|
||||
/* .MaxFragmentAtomicCounters = */ 0,
|
||||
/* .MaxCombinedAtomicCounters = */ 8,
|
||||
/* .MaxAtomicCounterBindings = */ 1,
|
||||
/* .MaxVertexAtomicCounterBuffers = */ 0,
|
||||
/* .MaxTessControlAtomicCounterBuffers = */ 0,
|
||||
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
|
||||
/* .MaxGeometryAtomicCounterBuffers = */ 0,
|
||||
/* .MaxFragmentAtomicCounterBuffers = */ 0,
|
||||
/* .MaxCombinedAtomicCounterBuffers = */ 1,
|
||||
/* .MaxAtomicCounterBufferSize = */ 16384,
|
||||
/* .MaxTransformFeedbackBuffers = */ 4,
|
||||
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
|
||||
/* .MaxCullDistances = */ 8,
|
||||
/* .MaxCombinedClipAndCullDistances = */ 8,
|
||||
/* .MaxSamples = */ 4,
|
||||
/* .maxMeshOutputVerticesNV = */ 256,
|
||||
/* .maxMeshOutputPrimitivesNV = */ 512,
|
||||
/* .maxMeshWorkGroupSizeX_NV = */ 32,
|
||||
/* .maxMeshWorkGroupSizeY_NV = */ 1,
|
||||
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
|
||||
/* .maxTaskWorkGroupSizeX_NV = */ 32,
|
||||
/* .maxTaskWorkGroupSizeY_NV = */ 1,
|
||||
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
|
||||
/* .maxMeshViewCountNV = */ 4,
|
||||
/* .maxDualSourceDrawBuffersEXT = */ 1,
|
||||
|
||||
/* .limits = */
|
||||
{
|
||||
/* .nonInductiveForLoops = */ 1,
|
||||
/* .whileLoops = */ 1,
|
||||
/* .doWhileLoops = */ 1,
|
||||
/* .generalUniformIndexing = */ 1,
|
||||
/* .generalAttributeMatrixVectorIndexing = */ 1,
|
||||
/* .generalVaryingIndexing = */ 1,
|
||||
/* .generalSamplerIndexing = */ 1,
|
||||
/* .generalVariableIndexing = */ 1,
|
||||
/* .generalConstantMatrixVectorIndexing = */ 1,
|
||||
}
|
||||
};
|
||||
|
||||
/* .limits = */ {
|
||||
/* .nonInductiveForLoops = */ 1,
|
||||
/* .whileLoops = */ 1,
|
||||
/* .doWhileLoops = */ 1,
|
||||
/* .generalUniformIndexing = */ 1,
|
||||
/* .generalAttributeMatrixVectorIndexing = */ 1,
|
||||
/* .generalVaryingIndexing = */ 1,
|
||||
/* .generalSamplerIndexing = */ 1,
|
||||
/* .generalVariableIndexing = */ 1,
|
||||
/* .generalConstantMatrixVectorIndexing = */ 1,
|
||||
}};
|
||||
|
||||
/**
|
||||
Shader utily class with functions to compile and process glsl files.
|
||||
*/
|
||||
class Shader {
|
||||
public:
|
||||
class Shader
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Compile multiple sources with optional filenames. Currently this function
|
||||
* uses the glslang C++ interface which is not thread safe so this funciton
|
||||
* should not be called from multiple threads concurrently. If you have a
|
||||
* online shader processing multithreading use-case that can't use offline
|
||||
* online shader processing multithreading use-case that can't use offline
|
||||
* compilation please open an issue.
|
||||
*
|
||||
* @param sources A list of raw glsl shaders in string format
|
||||
* @param files A list of file names respective to each of the sources
|
||||
* @param entryPoint The function name to use as entry point
|
||||
* @param definitions List of pairs containing key value definitions
|
||||
* @param resourcesLimit A list that contains the resource limits for the GLSL compiler
|
||||
* @param resourcesLimit A list that contains the resource limits for the
|
||||
* GLSL compiler
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static std::vector<uint32_t> compile_sources(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files = {},
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string,std::string>> definitions = {},
|
||||
const TBuiltInResource& resources = defaultResource);
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files = {},
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string, std::string>> definitions = {},
|
||||
const TBuiltInResource& resources = defaultResource);
|
||||
|
||||
/**
|
||||
* Compile a single glslang source from string value. Currently this function
|
||||
* uses the glslang C++ interface which is not thread safe so this funciton
|
||||
* should not be called from multiple threads concurrently. If you have a
|
||||
* online shader processing multithreading use-case that can't use offline
|
||||
* compilation please open an issue.
|
||||
* Compile a single glslang source from string value. Currently this
|
||||
* function uses the glslang C++ interface which is not thread safe so this
|
||||
* funciton should not be called from multiple threads concurrently. If you
|
||||
* have a online shader processing multithreading use-case that can't use
|
||||
* offline compilation please open an issue.
|
||||
*
|
||||
* @param source An individual raw glsl shader in string format
|
||||
* @param entryPoint The function name to use as entry point
|
||||
* @param definitions List of pairs containing key value definitions
|
||||
* @param resourcesLimit A list that contains the resource limits for the GLSL compiler
|
||||
* @param resourcesLimit A list that contains the resource limits for the
|
||||
* GLSL compiler
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static std::vector<uint32_t> compile_source(
|
||||
const std::string& source,
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string,std::string>> definitions = {},
|
||||
const TBuiltInResource& resources = defaultResource);
|
||||
|
||||
const std::string& source,
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string, std::string>> definitions = {},
|
||||
const TBuiltInResource& resources = defaultResource);
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif // DKOMPUTE_DISABLE_SHADER_UTILS
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@
|
|||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#define KP_MAX_DIM_SIZE 1
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
|
|
@ -30,11 +28,6 @@ class Tensor
|
|||
eStorage = 2, ///< Type is Device memory (only)
|
||||
};
|
||||
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
Tensor();
|
||||
|
||||
/**
|
||||
* Default constructor with data provided which would be used to create the
|
||||
* respective vulkan buffer and memory.
|
||||
|
|
@ -43,8 +36,10 @@ class Tensor
|
|||
* tensor
|
||||
* @param tensorType Type for the tensor which is of type TensorTypes
|
||||
*/
|
||||
Tensor(const std::vector<float>& data,
|
||||
TensorTypes tensorType = TensorTypes::eDevice);
|
||||
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Destructor which is in charge of freeing vulkan resources unless they
|
||||
|
|
@ -58,13 +53,15 @@ class Tensor
|
|||
* would only be created for the tensors of type TensorType::eDevice as
|
||||
* otherwise there is no need to copy from host memory.
|
||||
*/
|
||||
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device);
|
||||
void rebuild(const std::vector<float>& data,
|
||||
TensorTypes tensorType = TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
void destroy();
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns the vector of data currently contained by the Tensor. It is
|
||||
|
|
@ -91,26 +88,13 @@ class Tensor
|
|||
* @return Unsigned integer representing the total number of elements
|
||||
*/
|
||||
uint32_t size();
|
||||
/**
|
||||
* Returns the shape of the tensor, which includes the number of dimensions
|
||||
* and the size per dimension.
|
||||
*
|
||||
* @return Array containing the sizes for each dimension. Zero means
|
||||
* respective dimension is not active.
|
||||
*/
|
||||
std::array<uint32_t, KP_MAX_DIM_SIZE> shape();
|
||||
|
||||
/**
|
||||
* Retrieve the tensor type of the Tensor
|
||||
*
|
||||
* @return Tensor type of tensor
|
||||
*/
|
||||
TensorTypes tensorType();
|
||||
/**
|
||||
* Returns true if the tensor initialisation function has been carried out
|
||||
* successful, which would mean that the buffer and memory will have been
|
||||
* provisioned.
|
||||
*/
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Sets / resets the vector data of the tensor. This function does not
|
||||
|
|
@ -128,7 +112,7 @@ class Tensor
|
|||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFrom(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
void recordCopyFrom(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<Tensor> copyFromTensor,
|
||||
bool createBarrier);
|
||||
|
||||
|
|
@ -141,9 +125,8 @@ class Tensor
|
|||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFromStagingToDevice(
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier);
|
||||
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records a copy from the internal device memory to the staging memory
|
||||
|
|
@ -154,9 +137,8 @@ class Tensor
|
|||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFromDeviceToStaging(
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
bool createBarrier);
|
||||
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records the buffer memory barrier into the command buffer which
|
||||
|
|
@ -168,12 +150,11 @@ class Tensor
|
|||
* @param scrStageMask Pipeline stage flags for source stage mask
|
||||
* @param dstStageMask Pipeline stage flags for destination stage mask
|
||||
*/
|
||||
void recordBufferMemoryBarrier(
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask);
|
||||
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask);
|
||||
|
||||
/**
|
||||
* Constructs a vulkan descriptor buffer info which can be used to specify
|
||||
|
|
@ -214,21 +195,18 @@ class Tensor
|
|||
|
||||
TensorTypes mTensorType = TensorTypes::eDevice;
|
||||
|
||||
std::array<uint32_t, KP_MAX_DIM_SIZE> mShape;
|
||||
bool mIsInit = false;
|
||||
|
||||
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
|
||||
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
|
||||
vk::BufferUsageFlags bufferUsageFlags);
|
||||
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
|
||||
std::shared_ptr<vk::DeviceMemory> memory,
|
||||
vk::MemoryPropertyFlags memoryPropertyFlags);
|
||||
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier);
|
||||
void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier);
|
||||
|
||||
// Private util functions
|
||||
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
|
||||
|
|
|
|||
|
|
@ -1,144 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* By default it enables the user to provide a dynamic number of tensors
|
||||
* which are then passed as inputs.
|
||||
*/
|
||||
class OpAlgoBase : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpAlgoBase();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const Workgroup& komputeWorkgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
/**
|
||||
* Constructor that enables a file to be passed to the operation with
|
||||
* the contents of the shader. This can be either in raw format or in
|
||||
* compiled SPIR-V binary format.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::string shaderFilePath,
|
||||
const Workgroup& komputeWorkgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
/**
|
||||
* Constructor that enables raw shader data to be passed to the main operation
|
||||
* which can be either in raw shader glsl code or in compiled SPIR-V binary.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& shaderDataRaw,
|
||||
const Workgroup& komputeWorkgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpAlgoBase() override;
|
||||
|
||||
/**
|
||||
* The init function is responsible for the initialisation of the algorithm
|
||||
* component based on the parameters specified, and allows for extensibility
|
||||
* on the options provided. Further dependent classes can perform more
|
||||
* specific checks such as ensuring tensors provided are initialised, etc.
|
||||
*/
|
||||
virtual void init() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record() override;
|
||||
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
bool mFreeAlgorithm = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
|
||||
Workgroup mKomputeWorkgroup;
|
||||
|
||||
std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
|
||||
std::vector<uint32_t> mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content
|
||||
|
||||
virtual std::vector<uint32_t> fetchSpirvBinaryData();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
58
src/include/kompute/operations/OpAlgoDispatch.hpp
Normal file
58
src/include/kompute/operations/OpAlgoDispatch.hpp
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* By default it enables the user to provide a dynamic number of tensors
|
||||
* which are then passed as inputs.
|
||||
*/
|
||||
class OpAlgoDispatch : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const kp::Constants& pushConstants = {});
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpAlgoDispatch() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
Constants mPushConstants;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
|
@ -1,84 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation base class to simplify the creation of operations that require
|
||||
* right hand and left hand side datapoints together with a single output.
|
||||
* The expected data passed is two input tensors and one output tensor.
|
||||
*/
|
||||
class OpAlgoLhsRhsOut : public OpAlgoBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpAlgoLhsRhsOut();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
const Workgroup& komputeWorkgroup = {});
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpAlgoLhsRhsOut() override;
|
||||
|
||||
/**
|
||||
* The init function is responsible for ensuring that all of the tensors
|
||||
* provided are aligned with requirements such as LHS, RHS and Output
|
||||
* tensors, and creates the algorithm component which processes the
|
||||
* computation.
|
||||
*/
|
||||
virtual void init() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -17,33 +17,6 @@ namespace kp {
|
|||
class OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpBase() { KP_LOG_DEBUG("Compute OpBase base constructor"); }
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
*/
|
||||
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Compute OpBase constructor with params");
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mCommandBuffer = commandBuffer;
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default destructor for OpBase class. This OpBase destructor class should
|
||||
|
|
@ -53,37 +26,14 @@ class OpBase
|
|||
virtual ~OpBase()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBase destructor started");
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN("Kompute OpBase destructor called with empty device");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreeTensors) {
|
||||
KP_LOG_DEBUG("Kompute OpBase freeing tensors");
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if (tensor && tensor->isInit()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute OpBase expected to free "
|
||||
"tensor but has already been freed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The init function is responsible for setting up all the resources and
|
||||
* should be called after the Operation has been created.
|
||||
*/
|
||||
virtual void init() = 0;
|
||||
|
||||
/**
|
||||
* The record function is intended to only send a record command or run
|
||||
* commands that are expected to record operations that are to be submitted
|
||||
* as a batch into the GPU.
|
||||
*/
|
||||
virtual void record() = 0;
|
||||
virtual void record(const vk::CommandBuffer& commandBuffer) = 0;
|
||||
|
||||
/**
|
||||
* Pre eval is called before the Sequence has called eval and submitted the commands to
|
||||
|
|
@ -93,7 +43,7 @@ class OpBase
|
|||
* resources that are created should be idempotent in case it's called multiple
|
||||
* times in a row.
|
||||
*/
|
||||
virtual void preEval() = 0;
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0;
|
||||
|
||||
/**
|
||||
* Post eval is called after the Sequence has called eval and submitted the commands to
|
||||
|
|
@ -103,22 +53,7 @@ class OpBase
|
|||
* resources that are destroyed should not require a re-init unless explicitly
|
||||
* provided by the user.
|
||||
*/
|
||||
virtual void postEval() = 0;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice>
|
||||
mPhysicalDevice; ///< Vulkan Physical Device
|
||||
std::shared_ptr<vk::Device> mDevice; ///< Vulkan Logical Device
|
||||
std::shared_ptr<vk::CommandBuffer>
|
||||
mCommandBuffer; ///< Vulkan Command Buffer
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>>
|
||||
mTensors; ///< Tensors referenced by operation that can be managed
|
||||
///< optionally by operation
|
||||
bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
|
||||
///< tensors are freed (if they are managed)
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) = 0;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -4,14 +4,12 @@
|
|||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#if RELEASE
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
#endif
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -19,15 +17,9 @@ namespace kp {
|
|||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor.
|
||||
*/
|
||||
class OpMult : public OpAlgoBase
|
||||
class OpMult : public OpAlgoDispatch
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpMult() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
|
|
@ -40,46 +32,30 @@ class OpMult : public OpAlgoBase
|
|||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
const Workgroup& komputeWorkgroup = {})
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
|
||||
OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
|
||||
: OpAlgoDispatch(algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
#ifndef RELEASE
|
||||
this->mShaderFilePath = "shaders/glsl/opmult.comp.spv";
|
||||
#endif
|
||||
}
|
||||
if (tensors.size() != 3) {
|
||||
throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size());
|
||||
}
|
||||
|
||||
#if RELEASE
|
||||
/**
|
||||
* If RELEASE=1 it will be using the static version of the shader which is
|
||||
* loaded using this file directly. Otherwise it should not override the function.
|
||||
*/
|
||||
std::vector<uint32_t> fetchSpirvBinaryData() override
|
||||
{
|
||||
KP_LOG_WARN(
|
||||
"Kompute OpMult Running shaders directly from header");
|
||||
|
||||
return std::vector<uint32_t>(
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)shader_data::shaders_glsl_opmult_comp_spv,
|
||||
(uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
|
||||
|
||||
algorithm->rebuild(tensors, spirv);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpMult() override {
|
||||
virtual ~OpMult() override {
|
||||
KP_LOG_DEBUG("Kompute OpMult destructor started");
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ namespace kp {
|
|||
class OpTensorCopy : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorCopy();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation.
|
||||
*
|
||||
|
|
@ -24,37 +22,31 @@ class OpTensorCopy : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorCopy() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier.
|
||||
*/
|
||||
void record() override;
|
||||
void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Copies the local vectors for all the tensors to sync the data with the gpu.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
|
@ -14,8 +13,6 @@ namespace kp {
|
|||
class OpTensorSyncDevice : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorSyncDevice();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
|
|
@ -24,37 +21,31 @@ class OpTensorSyncDevice : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
|
||||
*/
|
||||
void record() override;
|
||||
void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any postEval commands.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ namespace kp {
|
|||
class OpTensorSyncLocal : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorSyncLocal();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
|
|
@ -24,38 +22,32 @@ class OpTensorSyncLocal : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncLocal(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncLocal() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
|
||||
*/
|
||||
void record() override;
|
||||
void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*/
|
||||
virtual void preEval() override;
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* For host tensors it performs the map command from the host memory into local memory.
|
||||
*/
|
||||
virtual void postEval() override;
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -37,25 +37,32 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultSync(size, 100000000);
|
||||
std::vector<float> resultAsync(size, 100000000);
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> inputsSyncB;
|
||||
std::vector<std::shared_ptr<kp::Algorithm>> algorithms;
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
inputsSyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
|
||||
inputsSyncB.push_back(mgr.tensor(data));
|
||||
algorithms.push_back(mgr.algorithm({ inputsSyncB[i] }, spirv));
|
||||
}
|
||||
|
||||
mgr.rebuild(inputsSyncB);
|
||||
sq->eval<kp::OpTensorSyncDevice>(inputsSyncB);
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(inputsSyncB);
|
||||
|
||||
auto startSync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ inputsSyncB[i] }, kp::Shader::compile_source(shader));
|
||||
sq->eval<kp::OpAlgoDispatch>(algorithms[i]);
|
||||
}
|
||||
|
||||
auto endSync = std::chrono::high_resolution_clock::now();
|
||||
|
|
@ -63,7 +70,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
std::chrono::duration_cast<std::chrono::microseconds>(endSync - startSync)
|
||||
.count();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>(inputsSyncB);
|
||||
sq->eval<kp::OpTensorSyncLocal>(inputsSyncB);
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
EXPECT_EQ(inputsSyncB[i]->data(), resultSync);
|
||||
|
|
@ -73,27 +80,27 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> inputsAsyncB;
|
||||
|
||||
std::vector<std::shared_ptr<kp::Algorithm>> algosAsync;
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
inputsAsyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
|
||||
inputsAsyncB.push_back(mgr.tensor(data));
|
||||
algosAsync.push_back(mgr.algorithm({ inputsAsyncB[i] }, spirv));
|
||||
}
|
||||
|
||||
mgrAsync.rebuild(inputsAsyncB);
|
||||
std::vector<std::shared_ptr<kp::Sequence>> sqs;
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgrAsync.sequence("async" + std::to_string(i), i);
|
||||
sqs.push_back(mgrAsync.sequence(i));
|
||||
}
|
||||
|
||||
auto startAsync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgrAsync.evalOpAsync<kp::OpAlgoBase>(
|
||||
{ inputsAsyncB[i] },
|
||||
"async" + std::to_string(i),
|
||||
kp::Shader::compile_source(shader));
|
||||
sqs[i]->evalAsync<kp::OpAlgoDispatch>(algosAsync[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgrAsync.evalOpAwait("async" + std::to_string(i));
|
||||
sqs[i]->evalAwait();
|
||||
}
|
||||
|
||||
auto endAsync = std::chrono::high_resolution_clock::now();
|
||||
|
|
@ -101,7 +108,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
endAsync - startAsync)
|
||||
.count();
|
||||
|
||||
mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ inputsAsyncB });
|
||||
sq->eval<kp::OpTensorSyncLocal>({ inputsAsyncB });
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync);
|
||||
|
|
@ -138,32 +145,32 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultAsync(size, 100000000);
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(data);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(data);
|
||||
|
||||
mgr.sequence("asyncOne");
|
||||
mgr.sequence("asyncTwo");
|
||||
std::shared_ptr<kp::Sequence> sq1 = mgr.sequence();
|
||||
std::shared_ptr<kp::Sequence> sq2 = mgr.sequence();
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
sq1->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
std::vector<uint32_t> result = kp::Shader::compile_source(shader);
|
||||
std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({ tensorA }, spirv);
|
||||
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({ tensorB }, spirv);
|
||||
|
||||
mgr.evalOpAsync<kp::OpAlgoBase>(
|
||||
{ tensorA }, "asyncOne", kp::Shader::compile_source(shader));
|
||||
sq1->evalAsync<kp::OpAlgoDispatch>(algo1);
|
||||
sq2->evalAsync<kp::OpAlgoDispatch>(algo2);
|
||||
|
||||
mgr.evalOpAsync<kp::OpAlgoBase>(
|
||||
{ tensorB }, "asyncTwo", kp::Shader::compile_source(shader));
|
||||
sq1->evalAwait();
|
||||
sq2->evalAwait();
|
||||
|
||||
mgr.evalOpAwait("asyncOne");
|
||||
mgr.evalOpAwait("asyncTwo");
|
||||
|
||||
mgr.evalOpAsyncDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
mgr.evalOpAwaitDefault();
|
||||
sq1->evalAsync<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
sq1->evalAwait();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), resultAsync);
|
||||
EXPECT_EQ(tensorB->data(), resultAsync);
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
TEST(TestDestroy, TestDestroyTensorSingle)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -16,37 +16,36 @@ TEST(TestDestroy, TestDestroyTensorSingle)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(tensorA);
|
||||
mgr.sequence()
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->eval()
|
||||
->eval<kp::OpTensorSyncLocal>(algo->getTensors());
|
||||
|
||||
tensorA->destroy();
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroyTensorVector)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
std::shared_ptr<kp::Tensor> tensorB = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -58,6 +57,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
pa[index] = pa[index] + 1;
|
||||
pb[index] = pb[index] + 2;
|
||||
})");
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
|
@ -65,20 +65,20 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
tensorA = mgr.tensor({ 1, 1, 1 });
|
||||
tensorB = mgr.tensor({ 1, 1, 1 });
|
||||
|
||||
sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensorA, tensorB }, spirv);
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB }, kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(algo->getTensors())
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>(algo->getTensors())
|
||||
->eval();
|
||||
|
||||
sq->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
mgr.destroy({ tensorA, tensorB });
|
||||
tensorA->destroy();
|
||||
tensorB->destroy();
|
||||
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
EXPECT_FALSE(tensorB->isInit());
|
||||
|
|
@ -88,32 +88,9 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroyTensorVectorUninitialised)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
mgr.destroy({ tensorA, tensorB });
|
||||
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
EXPECT_FALSE(tensorB->isInit());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceSingle)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -124,247 +101,27 @@ TEST(TestDestroy, TestDestroySequenceSingle)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
sq = mgr.sequence();
|
||||
sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ tensorA })
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpTensorSyncLocal>({ tensorA })
|
||||
->eval();
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(sq);
|
||||
sq->destroy();
|
||||
|
||||
EXPECT_FALSE(sq->isInit());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceVector)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq1 = nullptr;
|
||||
std::shared_ptr<kp::Sequence> sq2 = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
sq1 = mgr.sequence("One");
|
||||
sq1->begin();
|
||||
sq1->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq1->end();
|
||||
sq1->eval();
|
||||
|
||||
sq2 = mgr.sequence("Two");
|
||||
sq2->begin();
|
||||
sq2->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq2->end();
|
||||
sq2->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy({ sq1, sq2 });
|
||||
|
||||
EXPECT_FALSE(sq1->isInit());
|
||||
EXPECT_FALSE(sq2->isInit());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameSingleInsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
mgr.evalOp<kp::OpAlgoBase>(
|
||||
{ tensorA }, "one",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOp<kp::OpAlgoBase>(
|
||||
{ tensorA }, "two",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy("one");
|
||||
mgr.destroy("two");
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameSingleOutsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq1 = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
sq1 = mgr.sequence("One");
|
||||
sq1->begin();
|
||||
sq1->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq1->end();
|
||||
sq1->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy("One");
|
||||
|
||||
EXPECT_FALSE(sq1->isInit());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameVectorInsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
mgr.evalOp<kp::OpAlgoBase>(
|
||||
{ tensorA }, "one",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOp<kp::OpAlgoBase>(
|
||||
{ tensorA }, "two",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(std::vector<std::string>({"one", "two"}));
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameVectorOutsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
mgr.evalOp<kp::OpAlgoBase>(
|
||||
{ tensorA }, "one",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOp<kp::OpAlgoBase>(
|
||||
{ tensorA }, "two",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(std::vector<std::string>({"one", "two"}));
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameDefaultOutsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorA },
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(KP_DEFAULT_SESSION);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,47 +11,45 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
|
|||
uint32_t ITERATIONS = 100;
|
||||
float learningRate = 0.1;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
|
||||
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild(params);
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
params,
|
||||
std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)),
|
||||
kp::Workgroup(), kp::Constants({5.0}));
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
sq->end();
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::
|
||||
test_shaders_glsl_test_logistic_regression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::
|
||||
test_shaders_glsl_test_logistic_regression_comp_spv +
|
||||
kp::shader_data::
|
||||
test_shaders_glsl_test_logistic_regression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(
|
||||
params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algorithm)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
|
|
@ -64,21 +62,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
|
|||
bIn->data()[0] -= learningRate * bOut->data()[j];
|
||||
}
|
||||
}
|
||||
|
||||
// Based on the inputs the outputs should be at least:
|
||||
// * wi < 0.01
|
||||
// * wj > 1.0
|
||||
// * b < 0
|
||||
// TODO: Add EXPECT_DOUBLE_EQ instead
|
||||
EXPECT_LT(wIn->data()[0], 0.01);
|
||||
EXPECT_GT(wIn->data()[1], 1.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
|
||||
KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
|
||||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
|
||||
// Based on the inputs the outputs should be at least:
|
||||
// * wi < 0.01
|
||||
// * wj > 1.0
|
||||
// * b < 0
|
||||
// TODO: Add EXPECT_DOUBLE_EQ instead
|
||||
EXPECT_LT(wIn->data()[0], 0.01);
|
||||
EXPECT_GT(wIn->data()[1], 1.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
|
||||
KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
|
||||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
|
||||
TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
||||
|
|
@ -87,50 +85,46 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
uint32_t ITERATIONS = 100;
|
||||
float learningRate = 0.1;
|
||||
|
||||
kp::Constants wInVec = { 0.001, 0.001 };
|
||||
std::vector<float> bInVec = { 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor(
|
||||
wInVec, kp::Tensor::TensorTypes::eHost) };
|
||||
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor(
|
||||
bInVec, kp::Tensor::TensorTypes::eHost) };
|
||||
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild(params);
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Tensor> wIn =
|
||||
mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
params,
|
||||
std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)),
|
||||
kp::Workgroup(), kp::Constants({5.0}));
|
||||
std::shared_ptr<kp::Tensor> bIn =
|
||||
mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->end();
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
mgr.sequence()->record<kp::OpTensorSyncDevice>(params)->eval();
|
||||
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({ 5.0 }));
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algorithm)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
|
|
@ -145,19 +139,19 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
wIn->mapDataIntoHostMemory();
|
||||
bIn->mapDataIntoHostMemory();
|
||||
}
|
||||
|
||||
// Based on the inputs the outputs should be at least:
|
||||
// * wi < 0.01
|
||||
// * wj > 1.0
|
||||
// * b < 0
|
||||
// TODO: Add EXPECT_DOUBLE_EQ instead
|
||||
EXPECT_LT(wIn->data()[0], 0.01);
|
||||
EXPECT_GT(wIn->data()[1], 1.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
|
||||
KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
|
||||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
|
||||
// Based on the inputs the outputs should be at least:
|
||||
// * wi < 0.01
|
||||
// * wj > 1.0
|
||||
// * b < 0
|
||||
// TODO: Add EXPECT_DOUBLE_EQ instead
|
||||
EXPECT_LT(wIn->data()[0], 0.01);
|
||||
EXPECT_GT(wIn->data()[1], 1.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
|
||||
KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
|
||||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,53 +3,43 @@
|
|||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
TEST(TestManager, EndToEndOpMultFlow)
|
||||
TEST(TestManager, EndToEndOpMultEvalFlow)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
|
||||
mgr.rebuild({ tensorLHS });
|
||||
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor({ 2, 4, 6 }) };
|
||||
mgr.rebuild({ tensorRHS });
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
|
||||
tensorRHS,
|
||||
tensorOutput };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
mgr.rebuild({ tensorOutput });
|
||||
|
||||
mgr.evalOpDefault<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorOutput });
|
||||
mgr.sequence()
|
||||
->eval<kp::OpTensorSyncDevice>(params)
|
||||
->eval<kp::OpMult>(params, mgr.algorithm())
|
||||
->eval<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
|
||||
}
|
||||
|
||||
TEST(TestManager, OpMultSequenceFlow)
|
||||
TEST(TestManager, EndToEndOpMultSeqFlow)
|
||||
{
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor({ 2, 4, 6 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
{
|
||||
mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
|
||||
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence");
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
|
||||
tensorRHS,
|
||||
tensorOutput };
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpMult>(params, mgr.algorithm())
|
||||
->record<kp::OpTensorSyncLocal>(params)
|
||||
->eval();
|
||||
|
||||
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
|
||||
}
|
||||
|
|
@ -58,75 +48,17 @@ TEST(TestManager, TestMultipleSequences)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqOne =
|
||||
mgr.sequence("sqOne");
|
||||
std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
|
||||
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTwo =
|
||||
mgr.sequence("sqTwo");
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
|
||||
tensorRHS,
|
||||
tensorOutput };
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqOneRef =
|
||||
mgr.sequence("sqOne");
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTwoRef =
|
||||
mgr.sequence("sqTwo");
|
||||
|
||||
EXPECT_EQ(sqOne, sqOneRef);
|
||||
EXPECT_NE(sqTwo, sqOneRef);
|
||||
EXPECT_EQ(sqTwo, sqTwoRef);
|
||||
EXPECT_NE(sqOneRef, sqTwoRef);
|
||||
}
|
||||
|
||||
TEST(TestManager, TestMultipleTensorsAtOnce)
|
||||
{
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor({ 2, 4, 6 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence");
|
||||
|
||||
{
|
||||
mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
EXPECT_TRUE(tensorLHS->isInit());
|
||||
EXPECT_TRUE(tensorRHS->isInit());
|
||||
EXPECT_TRUE(tensorOutput->isInit());
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
mgr.sequence()->eval<kp::OpMult>(params, mgr.algorithm());
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
|
||||
}
|
||||
|
||||
TEST(TestManager, TestCreateInitTensor)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 1, 2 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB });
|
||||
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorC =
|
||||
mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorC });
|
||||
|
||||
EXPECT_EQ(tensorC->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,12 +3,76 @@
|
|||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
auto tensorInA = mgr.tensor({ 2., 2., 2. });
|
||||
auto tensorInB = mgr.tensor({ 1., 2., 3. });
|
||||
auto tensorOutA = mgr.tensor({ 0., 0., 0. });
|
||||
auto tensorOutB = mgr.tensor({ 0., 0., 0. });
|
||||
|
||||
std::string shader = (R"(
|
||||
#version 450
|
||||
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
// The input tensors bind index is relative to index in parameter passed
|
||||
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
||||
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
||||
layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
|
||||
layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
|
||||
|
||||
// Kompute supports push constants updated on dispatch
|
||||
layout(push_constant) uniform PushConstants {
|
||||
float val;
|
||||
} push_const;
|
||||
|
||||
// Kompute also supports spec constants on initalization
|
||||
layout(constant_id = 0) const float const_one = 0;
|
||||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
out_a[index] += in_a[index] * in_b[index];
|
||||
out_b[index] += const_one * push_const.val;
|
||||
}
|
||||
)");
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = {
|
||||
tensorInA, tensorInB, tensorOutA, tensorOutB
|
||||
};
|
||||
|
||||
kp::Workgroup workgroup({ 3, 1, 1 });
|
||||
kp::Constants specConsts({ 2 });
|
||||
kp::Constants pushConstsA({ 2.0 });
|
||||
kp::Constants pushConstsB({ 3.0 });
|
||||
|
||||
auto algorithm = mgr.algorithm(
|
||||
params, kp::Shader::compile_source(shader), workgroup, specConsts);
|
||||
|
||||
// 3. Run operation with string shader synchronously
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpAlgoDispatch>(algorithm, pushConstsA)
|
||||
->record<kp::OpAlgoDispatch>(algorithm, pushConstsB)
|
||||
->eval();
|
||||
|
||||
auto sq = mgr.sequence();
|
||||
sq->evalAsync<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
sq->evalAwait();
|
||||
|
||||
EXPECT_EQ(tensorOutA->data(), std::vector<float>({ 4, 8, 12 }));
|
||||
EXPECT_EQ(tensorOutB->data(), std::vector<float>({ 10, 10, 10 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -19,25 +83,16 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence");
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ tensorA })
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpTensorSyncLocal>({ tensorA })
|
||||
->eval();
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
|
|
@ -47,7 +102,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -58,41 +113,22 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
mgr.rebuild({ tensorA }, false);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sqTensor = mgr.sequence();
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
// First create the tensor in a separate sequence
|
||||
sqTensor->begin();
|
||||
sqTensor->record<kp::OpTensorSyncDevice>({ tensorA });
|
||||
sqTensor->end();
|
||||
sqTensor->eval();
|
||||
mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
|
||||
|
||||
// Then perform the computations
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>({ tensorA },
|
||||
kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
sq->eval();
|
||||
mgr.sequence()->record<kp::OpAlgoDispatch>(algorithm)->eval();
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>({ tensorA },
|
||||
kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
sq->eval();
|
||||
mgr.sequence()->record<kp::OpAlgoDispatch>(algorithm)->eval();
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>({ tensorA },
|
||||
kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
sq->eval();
|
||||
mgr.sequence()->record<kp::OpAlgoDispatch>(algorithm)->eval();
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA });
|
||||
sq->end();
|
||||
sq->eval();
|
||||
mgr.sequence()->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
|
@ -102,7 +138,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
|||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -113,68 +149,31 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence");
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
sq->record<kp::OpAlgoDispatch>(algorithm)->eval();
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence2");
|
||||
sq->record<kp::OpAlgoDispatch>(algorithm)->eval();
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoDispatch>(algorithm)->eval();
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence3");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence5");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -185,169 +184,56 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
mgr.rebuild({ tensorA }, false);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence");
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ tensorA });
|
||||
sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
sq->record<kp::OpAlgoDispatch>(algorithm)->eval()->eval()->eval();
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence2");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
sq->eval();
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence3");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
sq->eval();
|
||||
sq->eval();
|
||||
}
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorInA{ new kp::Tensor({ 2.0, 4.0, 6.0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) };
|
||||
|
||||
mgr.rebuild({ tensorInA, tensorInB, tensorOut });
|
||||
|
||||
std::string shader(R"(
|
||||
// The version to use
|
||||
#version 450
|
||||
|
||||
// The execution structure
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
// The buffers are provided via the tensors
|
||||
layout(binding = 0) buffer bufA { float a[]; };
|
||||
layout(binding = 1) buffer bufB { float b[]; };
|
||||
layout(binding = 2) buffer bufOut { float o[]; };
|
||||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
|
||||
o[index] = a[index] * b[index];
|
||||
}
|
||||
)");
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorInA, tensorInB, tensorOut },
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorOut });
|
||||
|
||||
EXPECT_EQ(tensorOut->data(), std::vector<float>({ 0.0, 4.0, 12.0 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
auto tensorInA = mgr.tensor(
|
||||
{ 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false);
|
||||
auto tensorInB = mgr.tensor(
|
||||
{ 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false);
|
||||
auto tensorOut = mgr.tensor(
|
||||
{ 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false);
|
||||
|
||||
std::string shader(R"(
|
||||
// The version to use
|
||||
#version 450
|
||||
|
||||
// The execution structure
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
// The buffers are provided via the tensors
|
||||
layout(binding = 0) buffer bufA { float a[]; };
|
||||
layout(binding = 1) buffer bufB { float b[]; };
|
||||
layout(binding = 2) buffer bufOut { float o[]; };
|
||||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
|
||||
o[index] = a[index] * b[index];
|
||||
}
|
||||
)");
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncDevice>(
|
||||
{ tensorInA, tensorInB, tensorOut });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorInA, tensorInB, tensorOut },
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorOut });
|
||||
|
||||
EXPECT_EQ(tensorOut->data(), std::vector<float>({ 0.0, 4.0, 12.0 }));
|
||||
}
|
||||
|
||||
TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
||||
sq = mgr.sequence();
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
|
||||
|
||||
sq->eval();
|
||||
sq->record<kp::OpAlgoDispatch>(algorithm)->eval()->eval()->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,80 +0,0 @@
|
|||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
float TOTAL_ITER = 10;
|
||||
|
||||
std::vector<float> testExpectedOutVec = { TOTAL_ITER,
|
||||
TOTAL_ITER,
|
||||
TOTAL_ITER };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
layout(set = 0, binding = 1) buffer b { float pb[]; };
|
||||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pb[index] = pa[index] + 1;
|
||||
}
|
||||
)");
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB }, false);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("default");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ tensorA, tensorB });
|
||||
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("run");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB },
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
sq->record<kp::OpTensorCopy>({ tensorB, tensorA });
|
||||
sq->end();
|
||||
|
||||
for (size_t i = 0; i < TOTAL_ITER; i++) {
|
||||
sq->eval();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("export");
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testExpectedOutVec);
|
||||
}
|
||||
|
|
@ -5,13 +5,12 @@
|
|||
|
||||
#include "kompute_test/shaders/shadertest_op_custom_shader.hpp"
|
||||
|
||||
TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
|
||||
TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -28,50 +27,60 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
|
|||
}
|
||||
)");
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB }, kp::Shader::compile_source(shader));
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA, tensorB };
|
||||
|
||||
mgr.sequence()
|
||||
->eval<kp::OpTensorSyncDevice>(params)
|
||||
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
|
||||
->eval<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
|
||||
}
|
||||
|
||||
TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
|
||||
TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB },
|
||||
std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv +
|
||||
kp::shader_data::
|
||||
test_shaders_glsl_test_op_custom_shader_comp_spv_len)));
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)
|
||||
kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::
|
||||
test_shaders_glsl_test_op_custom_shader_comp_spv +
|
||||
kp::shader_data::
|
||||
test_shaders_glsl_test_op_custom_shader_comp_spv_len));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA, tensorB };
|
||||
|
||||
mgr.sequence()
|
||||
->eval<kp::OpTensorSyncDevice>(params)
|
||||
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
|
||||
->eval<kp::OpTensorSyncLocal>(params);
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
|
||||
}
|
||||
|
||||
TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
|
||||
}
|
||||
// TODO: Add support to read from file for shader
|
||||
// TEST(TestOpAlgoCreate, ShaderCompiledDataFromFile)
|
||||
//{
|
||||
// kp::Manager mgr;
|
||||
//
|
||||
// std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
// std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
// mgr.rebuild({ tensorA, tensorB });
|
||||
//
|
||||
// mgr.evalOpDefault<kp::OpAlgoCreate>(
|
||||
// { tensorA, tensorB },
|
||||
// "test/shaders/glsl/test_op_custom_shader.comp.spv");
|
||||
//
|
||||
// mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
//
|
||||
// EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
|
||||
// EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
|
||||
//}
|
||||
|
|
|
|||
|
|
@ -11,20 +11,18 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
|
|||
std::vector<float> testVecA{ 1, 2, 3 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
mgr.sequence()
|
||||
->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB })
|
||||
->eval<kp::OpTensorCopy>({ tensorA, tensorB })
|
||||
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB });
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
}
|
||||
|
||||
|
|
@ -37,23 +35,24 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
|
|||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
std::vector<float> testVecC{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor(testVecC) };
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB, tensorC });
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
|
||||
std::shared_ptr<kp::Tensor> tensorC = mgr.tensor(testVecC);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
EXPECT_TRUE(tensorC->isInit());
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB, tensorC });
|
||||
mgr.sequence()
|
||||
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC })
|
||||
->eval<kp::OpTensorCopy>({ tensorA, tensorB, tensorC });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->data(), tensorC->data());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB, tensorC });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB, tensorC });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
EXPECT_EQ(tensorA->data(), tensorC->data());
|
||||
}
|
||||
|
|
@ -66,24 +65,22 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
|
|||
std::vector<float> testVecA{ 3, 4, 5 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
|
||||
testVecB, kp::Tensor::TensorTypes::eHost) };
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB }, false);
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::Tensor> tensorB =
|
||||
mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
// Only calling sync on device type tensor
|
||||
mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
}
|
||||
|
||||
|
|
@ -95,27 +92,22 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
|
|||
std::vector<float> testVecA{ 4, 5, 6 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
|
||||
testVecA, kp::Tensor::TensorTypes::eHost) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB }, false);
|
||||
|
||||
// Manually copy data into host memory of Tensor
|
||||
tensorA->mapDataIntoHostMemory();
|
||||
std::shared_ptr<kp::Tensor> tensorA =
|
||||
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
|
||||
|
||||
// Only calling sync on device type tensor
|
||||
mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorB });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
}
|
||||
|
||||
|
|
@ -127,22 +119,22 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
|
|||
std::vector<float> testVecA{ 5, 6, 7 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
|
||||
testVecA, kp::Tensor::TensorTypes::eHost) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
|
||||
testVecB, kp::Tensor::TensorTypes::eHost) };
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
std::shared_ptr<kp::Tensor> tensorA =
|
||||
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> tensorB =
|
||||
mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
mgr.sequence()
|
||||
->eval<kp::OpTensorSyncDevice>({ tensorA })
|
||||
->eval<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
}
|
||||
|
||||
|
|
@ -153,13 +145,11 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
|
|||
|
||||
std::vector<float> testVecA{ 6, 7, 8 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
|
||||
testVecA, kp::Tensor::TensorTypes::eHost) };
|
||||
|
||||
mgr.rebuild({ tensorA }, false);
|
||||
std::shared_ptr<kp::Tensor> tensorA =
|
||||
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
EXPECT_THROW(mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA }),
|
||||
EXPECT_THROW(mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA }),
|
||||
std::runtime_error);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,12 +6,12 @@
|
|||
TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
|
||||
{
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
tensorA = mgr.tensor(testVecA);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
|
|
@ -21,120 +21,23 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
|
|||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
mgr.rebuild({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed)
|
||||
{
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
mgr.rebuild({ tensorA });
|
||||
mgr.rebuild({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
}
|
||||
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
EXPECT_FALSE(tensorB->isInit());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed)
|
||||
{
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
mgr.rebuild({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
|
||||
{
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
mgr.rebuild({ tensorB });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
|
||||
tensorA->freeMemoryDestroyGPUResources();
|
||||
tensorB->freeMemoryDestroyGPUResources();
|
||||
tensorA->destroy();
|
||||
tensorB->destroy();
|
||||
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
EXPECT_FALSE(tensorB->isInit());
|
||||
}
|
||||
|
|
@ -143,12 +46,10 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
|
|||
{
|
||||
std::vector<float> testVecA;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
try {
|
||||
mgr.rebuild({ tensorA });
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
|
||||
} catch (const std::runtime_error& err) {
|
||||
// check exception
|
||||
ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=
|
||||
|
|
|
|||
|
|
@ -11,17 +11,15 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
|
|||
std::vector<float> testVecPreA{ 0, 0, 0 };
|
||||
std::vector<float> testVecPostA{ 9, 8, 7 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecPreA) };
|
||||
|
||||
mgr.rebuild({ tensorA }, false);
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecPreA);
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
tensorA->setData(testVecPostA);
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA });
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecPostA);
|
||||
}
|
||||
|
|
@ -33,11 +31,9 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
|
|||
|
||||
std::vector<float> testVec{ 9, 8, 7 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB, tensorC }, false);
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> tensorC = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
|
@ -45,11 +41,11 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
|
|||
|
||||
tensorA->setData(testVec);
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA });
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB, tensorC });
|
||||
mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB, tensorC });
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC });
|
||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC });
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVec);
|
||||
EXPECT_EQ(tensorB->data(), testVec);
|
||||
|
|
|
|||
49
test/TestPushConstant.cpp
Normal file
49
test/TestPushConstant.cpp
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#include "gtest/gtest.h"
|
||||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
#include "fmt/ranges.h"
|
||||
|
||||
TEST(TestPushConstants, TestTwoConstants)
|
||||
{
|
||||
{
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout(push_constant) uniform PushConstants {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
} pcs;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
pa[0] += pcs.x;
|
||||
pa[1] += pcs.y;
|
||||
pa[2] += pcs.z;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }));
|
||||
|
||||
sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ tensor })
|
||||
->record<kp::OpAlgoDispatch>(algo,
|
||||
kp::Constants{ 0.1, 0.2, 0.3 })
|
||||
->record<kp::OpAlgoDispatch>(algo,
|
||||
kp::Constants{ 0.3, 0.2, 0.1 })
|
||||
->record<kp::OpTensorSyncLocal>({ tensor })
|
||||
->eval();
|
||||
|
||||
EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -3,28 +3,6 @@
|
|||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
TEST(TestSequence, CmdBufSequenceBeginEnd)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("newSequence");
|
||||
|
||||
EXPECT_TRUE(sq->eval());
|
||||
EXPECT_TRUE(!sq->isRecording());
|
||||
EXPECT_TRUE(sq->begin());
|
||||
EXPECT_TRUE(sq->isRecording());
|
||||
EXPECT_TRUE(!sq->begin());
|
||||
EXPECT_TRUE(sq->isRecording());
|
||||
EXPECT_TRUE(sq->end());
|
||||
EXPECT_TRUE(!sq->isRecording());
|
||||
EXPECT_TRUE(!sq->end());
|
||||
EXPECT_TRUE(!sq->isRecording());
|
||||
EXPECT_TRUE(sq->eval());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TestSequence, SequenceDestructorViaManager)
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
|
@ -32,11 +10,10 @@ TEST(TestSequence, SequenceDestructorViaManager)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
sq = mgr.sequence("newSequence");
|
||||
sq = mgr.sequence();
|
||||
|
||||
EXPECT_TRUE(sq->isInit());
|
||||
}
|
||||
|
||||
EXPECT_FALSE(sq->isInit());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,46 +4,46 @@
|
|||
|
||||
TEST(TestSpecializationConstants, TestTwoConstants)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (constant_id = 0) const float cOne = 1;
|
||||
layout (constant_id = 1) const float cTwo = 1;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
layout(set = 0, binding = 1) buffer b { float pb[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = cOne;
|
||||
pb[index] = cTwo;
|
||||
})");
|
||||
|
||||
{
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (constant_id = 0) const float cOne = 1;
|
||||
layout (constant_id = 1) const float cTwo = 1;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
layout(set = 0, binding = 1) buffer b { float pb[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = cOne;
|
||||
pb[index] = cTwo;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
sq = mgr.sequence();
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
|
||||
tensorB };
|
||||
|
||||
auto spec = kp::Constants({5.0, 0.3});
|
||||
kp::Constants spec = kp::Constants({ 5.0, 0.3 });
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB },
|
||||
kp::Shader::compile_source(shader),
|
||||
kp::Workgroup(), spec);
|
||||
sq->end();
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm(params, spirv, {}, spec);
|
||||
|
||||
sq->eval();
|
||||
sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>(params)
|
||||
->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 5, 5, 5 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0.3, 0.3, 0.3 }));
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 5, 5, 5 }));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0.3, 0.3, 0.3 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,36 +5,9 @@
|
|||
|
||||
TEST(TestTensor, ConstructorData)
|
||||
{
|
||||
std::vector<float> vec{ 0, 1, 2 };
|
||||
kp::Tensor tensor(vec);
|
||||
EXPECT_EQ(tensor.size(), vec.size());
|
||||
EXPECT_EQ(tensor.data(), vec);
|
||||
}
|
||||
|
||||
TEST(TestTensor, CopyFromHostData)
|
||||
{
|
||||
std::vector<float> vecA{ 0, 1, 2 };
|
||||
std::vector<float> vecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA =
|
||||
std::make_shared<kp::Tensor>(vecA, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> tensorB =
|
||||
std::make_shared<kp::Tensor>(vecB, kp::Tensor::TensorTypes::eHost);
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence("new")) {
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorCopy>({ tensorA, tensorB });
|
||||
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
std::vector<float> vec{ 0, 1, 2 };
|
||||
std::shared_ptr<kp::Tensor> tensor = mgr.tensor(vec);
|
||||
EXPECT_EQ(tensor->size(), vec.size());
|
||||
EXPECT_EQ(tensor->data(), vec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,44 +5,63 @@
|
|||
|
||||
#include "kompute_test/shaders/shadertest_workgroup.hpp"
|
||||
|
||||
|
||||
TEST(TestWorkgroup, TestSimpleWorkgroup)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(std::vector<float>(16 * 8)) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(std::vector<float>(16 * 8)) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
std::shared_ptr<kp::Tensor> tensorB = nullptr;
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
tensorA = mgr.tensor(std::vector<float>(16 * 8));
|
||||
tensorB = mgr.tensor(std::vector<float>(16 * 8));
|
||||
|
||||
kp::Workgroup workgroup = {16, 8, 1};
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
|
||||
tensorB };
|
||||
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)
|
||||
kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::
|
||||
test_shaders_glsl_test_workgroup_comp_spv +
|
||||
kp::shader_data::
|
||||
test_shaders_glsl_test_workgroup_comp_spv_len));
|
||||
|
||||
kp::Workgroup workgroup = { 16, 8, 1 };
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm(params, spirv, workgroup);
|
||||
|
||||
sq = mgr.sequence();
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoBase>(
|
||||
{ tensorA, tensorB },
|
||||
std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv +
|
||||
kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv_len)),
|
||||
workgroup);
|
||||
sq->end();
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>(params);
|
||||
sq->record<kp::OpAlgoDispatch>(algorithm);
|
||||
sq->record<kp::OpTensorSyncLocal>(params);
|
||||
sq->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> expectedA = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15};
|
||||
std::vector<float> expectedA = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15
|
||||
};
|
||||
|
||||
std::vector<float> expectedB = { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 };
|
||||
std::vector<float> expectedB = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
|
||||
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
|
||||
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
};
|
||||
|
||||
EXPECT_EQ(tensorA->data(), expectedA);
|
||||
EXPECT_EQ(tensorB->data(), expectedB);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue