From 4fddf74ca7092edfc9fe5e46b45a225eec1109f2 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 28 Feb 2021 17:07:17 +0000 Subject: [PATCH] Updated examples --- .../app/src/main/cpp/KomputeModelML.cpp | 95 ++-- .../app/src/main/cpp/KomputeModelML.hpp | 5 +- examples/array_multiplication/src/Main.cpp | 11 +- .../kompute_summator/KomputeSummatorNode.cpp | 13 +- .../kompute_model_ml/KomputeModelMLNode.cpp | 83 ++-- .../gdnative_shared/src/KomputeModelML.cpp | 101 ++-- .../gdnative_shared/src/KomputeModelML.hpp | 4 +- examples/logistic_regression/src/Main.cpp | 45 +- single_include/kompute/Kompute.hpp | 439 ++++++++++-------- src/include/kompute/Core.hpp | 16 +- src/include/kompute/operations/OpBase.hpp | 1 - 11 files changed, 408 insertions(+), 405 deletions(-) diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp index f1884760a..647cd5236 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp @@ -20,61 +20,62 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, uint32_t ITERATIONS = 100; float learningRate = 0.1; - std::shared_ptr xI{ new kp::Tensor(xIData) }; - std::shared_ptr xJ{ new kp::Tensor(xJData) }; - - std::shared_ptr y{ new kp::Tensor(yData) }; - - std::shared_ptr wIn{ new kp::Tensor({ 0.001, 0.001 }) }; - std::shared_ptr wOutI{ new kp::Tensor(zerosData) }; - std::shared_ptr wOutJ{ new kp::Tensor(zerosData) }; - - std::shared_ptr bIn{ new kp::Tensor({ 0 }) }; - std::shared_ptr bOut{ new kp::Tensor(zerosData) }; - - std::shared_ptr lOut{ new kp::Tensor(zerosData) }; - - std::vector> params = { xI, xJ, y, - wIn, wOutI, wOutJ, - bIn, bOut, lOut }; - { kp::Manager mgr; - { - mgr.rebuild(params); + std::shared_ptr xI = mgr.tensor(xIData); + std::shared_ptr xJ = mgr.tensor(xJData); - std::shared_ptr sq = mgr.sequence(); + std::shared_ptr y = mgr.tensor(yData); - // Record op algo base - sq->begin(); + std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); + std::shared_ptr wOutI = mgr.tensor(zerosData); + std::shared_ptr wOutJ = mgr.tensor(zerosData); - sq->record({ wIn, bIn }); + std::shared_ptr bIn = mgr.tensor({ 0 }); + std::shared_ptr bOut = mgr.tensor(zerosData); - // Newer versions of Android are able to use shaderc to read raw string - sq->record( - params, kp::Shader::compile_source(LR_SHADER)); + std::shared_ptr lOut = mgr.tensor(zerosData); - sq->record({ wOutI, wOutJ, bOut, lOut }); + std::vector> params = { xI, xJ, y, + wIn, wOutI, wOutJ, + bIn, bOut, lOut }; - sq->end(); + std::vector spirv( + (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, + (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); - // Iterate across all expected iterations - for (size_t i = 0; i < ITERATIONS; i++) { + std::shared_ptr algo = + mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); - sq->eval(); + mgr.sequence()->eval(params); - for (size_t j = 0; j < bOut->size(); j++) { - wIn->data()[0] -= learningRate * wOutI->data()[j]; - wIn->data()[1] -= learningRate * wOutJ->data()[j]; - bIn->data()[0] -= learningRate * bOut->data()[j]; - } + std::shared_ptr sq = mgr.sequence() + ->record({ wIn, bIn }) + ->record(algo) + ->record({ wOutI, wOutJ, bOut, lOut }); + + // Iterate across all expected iterations + for (size_t i = 0; i < ITERATIONS; i++) { + + sq->eval(); + + for (size_t j = 0; j < bOut->size(); j++) { + wIn->data()[0] -= learningRate * wOutI->data()[j]; + wIn->data()[1] -= learningRate * wOutJ->data()[j]; + bIn->data()[0] -= learningRate * bOut->data()[j]; } } - } - this->mWeights = kp::Tensor(wIn->data()); - this->mBias = kp::Tensor(bIn->data()); + KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<"); + KP_LOG_INFO("{}", wIn->data()[0]); + KP_LOG_INFO("{}", wIn->data()[1]); + KP_LOG_INFO("{}", bIn->data()[0]); + + this->mWeights = wIn; + this->mBias = bIn; + } } std::vector KomputeModelML::predict(std::vector xI, std::vector xJ) { @@ -88,9 +89,9 @@ std::vector KomputeModelML::predict(std::vector xI, std::vectormWeights.data()[0] - + xJVal * this->mWeights.data()[1] - + this->mBias.data()[0]); + float result = (xIVal * this->mWeights->data()[0] + + xJVal * this->mWeights->data()[1] + + this->mBias->data()[0]); // Instead of using sigmoid we'll just return full numbers float var = result > 0 ? 1 : 0; @@ -103,13 +104,13 @@ std::vector KomputeModelML::predict(std::vector xI, std::vector KomputeModelML::get_params() { std::vector retVector; - if(this->mWeights.size() + this->mBias.size() == 0) { + if(this->mWeights->size() + this->mBias->size() == 0) { return retVector; } - retVector.push_back(this->mWeights.data()[0]); - retVector.push_back(this->mWeights.data()[1]); - retVector.push_back(this->mBias.data()[0]); + retVector.push_back(this->mWeights->data()[0]); + retVector.push_back(this->mWeights->data()[1]); + retVector.push_back(this->mBias->data()[0]); retVector.push_back(99.0); return retVector; diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp index 335f05805..093edbafc 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp @@ -4,6 +4,7 @@ #include #include +#include #include "kompute/Kompute.hpp" @@ -20,8 +21,8 @@ public: std::vector get_params(); private: - kp::Tensor mWeights; - kp::Tensor mBias; + std::shared_ptr mWeights; + std::shared_ptr mBias; }; diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index 8ec611e15..fd823bca8 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -37,11 +37,14 @@ int main() } )"); - mgr.evalOpDefault( - { tensorInA, tensorInB, tensorOut }, - kp::Shader::compile_source(shader)); + std::vector> params = { tensorInA, tensorInB, tensorOut }; - mgr.evalOpDefault({tensorOut}); + std::shared_ptr algo = mgr.algorithm(params, kp::Shader::compile_source(shader)); + + mgr.sequence() + ->record(params) + ->record(algo) + ->record(params); // prints "Output { 0 4 12 }" std::cout<< "Output: { "; diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp index 2e9f1bc00..f50c56d5c 100644 --- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp +++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp @@ -31,7 +31,7 @@ void KomputeSummatorNode::_init() { std::cout << "CALLING INIT" << std::endl; this->mPrimaryTensor = this->mManager.tensor({ 0.0 }); this->mSecondaryTensor = this->mManager.tensor({ 0.0 }); - this->mSequence = this->mManager.sequence("AdditionSeq"); + this->mSequence = this->mManager.sequence(); // We now record the steps in the sequence if (std::shared_ptr sq = this->mSequence) @@ -51,7 +51,11 @@ void KomputeSummatorNode::_init() { } )"); - sq->begin(); + std::shared_ptr algo = + mgr.algorithm( + { this->mPrimaryTensor, this->mSecondaryTensor }, + kp::Shader::compile_source(shader)); + // First we ensure secondary tensor loads to GPU // No need to sync the primary tensor as it should not be changed @@ -59,15 +63,12 @@ void KomputeSummatorNode::_init() { { this->mSecondaryTensor }); // Then we run the operation with both tensors - sq->record( - { this->mPrimaryTensor, this->mSecondaryTensor }, - kp::Shader::compile_source(shader)); + sq->record(algo) // We map the result back to local sq->record( { this->mPrimaryTensor }); - sq->end(); } else { throw std::runtime_error("Sequence pointer no longer available"); diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp index 57490a8d4..081315a4b 100644 --- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp +++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp @@ -29,54 +29,41 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) { uint32_t ITERATIONS = 100; float learningRate = 0.1; - std::shared_ptr xI{ new kp::Tensor(xIData) }; - std::shared_ptr xJ{ new kp::Tensor(xJData) }; - - std::shared_ptr y{ new kp::Tensor(yData) }; - - std::shared_ptr wIn{ new kp::Tensor({ 0.001, 0.001 }) }; - std::shared_ptr wOutI{ new kp::Tensor(zerosData) }; - std::shared_ptr wOutJ{ new kp::Tensor(zerosData) }; - - std::shared_ptr bIn{ new kp::Tensor({ 0 }) }; - std::shared_ptr bOut{ new kp::Tensor(zerosData) }; - - std::shared_ptr lOut{ new kp::Tensor(zerosData) }; - - std::vector> params = { xI, xJ, y, - wIn, wOutI, wOutJ, - bIn, bOut, lOut }; - { kp::Manager mgr; - mgr.rebuild(params); + std::shared_ptr xI = mgr.tensor(xIData); + std::shared_ptr xJ = mgr.tensor(xJData); + + std::shared_ptr y = mgr.tensor(yData); + + std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); + std::shared_ptr wOutI = mgr.tensor(zerosData); + std::shared_ptr wOutJ = mgr.tensor(zerosData); + + std::shared_ptr bIn = mgr.tensor({ 0 }); + std::shared_ptr bOut = mgr.tensor(zerosData); + + std::shared_ptr lOut = mgr.tensor(zerosData); + + std::vector> params = { xI, xJ, y, + wIn, wOutI, wOutJ, + bIn, bOut, lOut }; { - std::shared_ptr sq = mgr.sequence(); + std::vector spirv( + (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, + (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); - // Record op algo base - sq->begin(); + std::shared_ptr algo = mgr.algorithm(params, spirv); - sq->record({ wIn, bIn }); + mgr.sequence()->eval(params); -#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING - // Newer versions of Android are able to use shaderc to read raw string - sq->record( - params, std::vector(LR_SHADER.begin(), LR_SHADER.end())); -#else - // Older versions of Android require the SPIRV binary directly - sq->record( - params, std::vector( - kp::shader_data::shaders_glsl_logisticregression_comp_spv, - kp::shader_data::shaders_glsl_logisticregression_comp_spv - + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len - )); -#endif - - sq->record({ wOutI, wOutJ, bOut, lOut }); - - sq->end(); + std::shared_ptr sq = mgr.sequence() + ->record({ wIn, bIn }) + ->record(algo) + ->record({ wOutI, wOutJ, bOut, lOut }); // Iterate across all expected iterations for (size_t i = 0; i < ITERATIONS; i++) { @@ -90,15 +77,15 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) { } } } + + KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<"); + KP_LOG_INFO(wIn->data()[0]); + KP_LOG_INFO(wIn->data()[1]); + KP_LOG_INFO(bIn->data()[0]); + + this->mWeights = kp::Tensor(wIn->data()); + this->mBias = kp::Tensor(bIn->data()); } - - KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<"); - KP_LOG_INFO(wIn->data()[0]); - KP_LOG_INFO(wIn->data()[1]); - KP_LOG_INFO(bIn->data()[0]); - - this->mWeights = kp::Tensor(wIn->data()); - this->mBias = kp::Tensor(bIn->data()); } Array KomputeModelMLNode::predict(Array xI, Array xJ) { diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp index 1a01febd0..1222fe867 100644 --- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp +++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp @@ -33,54 +33,41 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) { uint32_t ITERATIONS = 100; float learningRate = 0.1; - std::shared_ptr xI{ new kp::Tensor(xIData) }; - std::shared_ptr xJ{ new kp::Tensor(xJData) }; - - std::shared_ptr y{ new kp::Tensor(yData) }; - - std::shared_ptr wIn{ new kp::Tensor({ 0.001, 0.001 }) }; - std::shared_ptr wOutI{ new kp::Tensor(zerosData) }; - std::shared_ptr wOutJ{ new kp::Tensor(zerosData) }; - - std::shared_ptr bIn{ new kp::Tensor({ 0 }) }; - std::shared_ptr bOut{ new kp::Tensor(zerosData) }; - - std::shared_ptr lOut{ new kp::Tensor(zerosData) }; - - std::vector> params = { xI, xJ, y, - wIn, wOutI, wOutJ, - bIn, bOut, lOut }; - { kp::Manager mgr; + std::shared_ptr xI = mgr.tensor(xIData); + std::shared_ptr xJ = mgr.tensor(xJData); + + std::shared_ptr y = mgr.tensor(yData); + + std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); + std::shared_ptr wOutI = mgr.tensor(zerosData); + std::shared_ptr wOutJ = mgr.tensor(zerosData); + + std::shared_ptr bIn = mgr.tensor({ 0 }); + std::shared_ptr bOut = mgr.tensor(zerosData); + + std::shared_ptr lOut = mgr.tensor(zerosData); + + std::vector> params = { xI, xJ, y, + wIn, wOutI, wOutJ, + bIn, bOut, lOut }; + { - mgr.rebuild(params); + std::vector spirv( + (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, + (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); - std::shared_ptr sq = mgr.sequence(); + std::shared_ptr algo = mgr.algorithm(params, spirv); - // Record op algo base - sq->begin(); + mgr.sequence()->eval(params); - sq->record({ wIn, bIn }); - -#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING - // Newer versions of Android are able to use shaderc to read raw string - sq->record( - params, std::vector(LR_SHADER.begin(), LR_SHADER.end())); -#else - // Older versions of Android require the SPIRV binary directly - sq->record( - params, std::vector( - kp::shader_data::shaders_glsl_logisticregression_comp_spv, - kp::shader_data::shaders_glsl_logisticregression_comp_spv - + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len - )); -#endif - - sq->record({ wOutI, wOutJ, bOut, lOut }); - - sq->end(); + std::shared_ptr sq = mgr.sequence() + ->record({ wIn, bIn }) + ->record(algo) + ->record({ wOutI, wOutJ, bOut, lOut }); // Iterate across all expected iterations for (size_t i = 0; i < ITERATIONS; i++) { @@ -94,15 +81,15 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) { } } } + + KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<"); + KP_LOG_INFO(wIn->data()[0]); + KP_LOG_INFO(wIn->data()[1]); + KP_LOG_INFO(bIn->data()[0]); + + this->mWeights = wIn; + this->mBias = bIn; } - - KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<"); - KP_LOG_INFO(wIn->data()[0]); - KP_LOG_INFO(wIn->data()[1]); - KP_LOG_INFO(bIn->data()[0]); - - this->mWeights = kp::Tensor(wIn->data()); - this->mBias = kp::Tensor(bIn->data()); } Array KomputeModelML::predict(Array xI, Array xJ) { @@ -116,9 +103,9 @@ Array KomputeModelML::predict(Array xI, Array xJ) { for (size_t i = 0; i < xI.size(); i++) { float xIVal = xI[i]; float xJVal = xJ[i]; - float result = (xIVal * this->mWeights.data()[0] - + xJVal * this->mWeights.data()[1] - + this->mBias.data()[0]); + float result = (xIVal * this->mWeights->data()[0] + + xJVal * this->mWeights->data()[1] + + this->mBias->data()[0]); // Instead of using sigmoid we'll just return full numbers Variant var = result > 0 ? 1 : 0; @@ -131,15 +118,15 @@ Array KomputeModelML::predict(Array xI, Array xJ) { Array KomputeModelML::get_params() { Array retArray; - KP_LOG_INFO(this->mWeights.size() + this->mBias.size()); + KP_LOG_INFO(this->mWeights->size() + this->mBias->size()); - if(this->mWeights.size() + this->mBias.size() == 0) { + if(this->mWeights->size() + this->mBias->size() == 0) { return retArray; } - retArray.push_back(this->mWeights.data()[0]); - retArray.push_back(this->mWeights.data()[1]); - retArray.push_back(this->mBias.data()[0]); + retArray.push_back(this->mWeights->data()[0]); + retArray.push_back(this->mWeights->data()[1]); + retArray.push_back(this->mBias->data()[0]); retArray.push_back(99.0); return retArray; diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.hpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.hpp index 1f87fbb69..69bab4f19 100644 --- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.hpp +++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.hpp @@ -28,8 +28,8 @@ public: static void _register_methods(); private: - kp::Tensor mWeights; - kp::Tensor mBias; + std::shared_ptr mWeights; + std::shared_ptr mBias; }; static std::string LR_SHADER = R"( diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index 769699ca7..c435575e2 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -15,44 +15,39 @@ int main() uint32_t ITERATIONS = 100; float learningRate = 0.1; - std::shared_ptr xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) }; - std::shared_ptr xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) }; + kp::Manager mgr; - std::shared_ptr y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) }; + std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr wIn{ new kp::Tensor({ 0.001, 0.001 }) }; - std::shared_ptr wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - std::shared_ptr wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; + std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr bIn{ new kp::Tensor({ 0 }) }; - std::shared_ptr bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; + std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); + std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; + std::shared_ptr bIn = mgr.tensor({ 0 }); + std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + + std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, bIn, bOut, lOut }; - kp::Manager mgr; - - mgr.rebuild(params); - - std::shared_ptr sq = mgr.sequence(); - - // Record op algo base - sq->begin(); - - sq->record({ wIn, bIn }); - - sq->record( - params, std::vector( + std::vector spirv( (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv - + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len))); + + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); - sq->record({ wOutI, wOutJ, bOut, lOut }); + std::shared_ptr algo = mgr.algorithm(params, spirv); - sq->end(); + mgr.sequence()->eval(params); + + std::shared_ptr sq = mgr.sequence() + ->record({ wIn, bIn }) + ->record(algo) + ->record({ wOutI, wOutJ, bOut, lOut }); // Iterate across all expected iterations for (size_t i = 0; i < ITERATIONS; i++) { diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 0bf66d593..52d574ad3 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -647,12 +647,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #define KP_LOG_DEBUG(...) #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) -#define KP_LOG_DEBUG(...) \ - ((void)__android_log_print(ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) +#define KP_LOG_DEBUG(...) \ + ((void)__android_log_write( \ + ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str())) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__)) #else -#define KP_LOG_DEBUG(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__)) +#define KP_LOG_DEBUG(...) \ + fmt::print("[{} {}] [debug] [{}:{}] {}\n", \ + __DATE__, \ + __TIME__, \ + __FILE__, \ + __LINE__, \ + fmt::format(__VA_ARGS__)) #endif // VK_USE_PLATFORM_ANDROID_KHR #endif // SPDLOG_ACTIVE_LEVEL > 1 @@ -660,12 +667,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #define KP_LOG_INFO(...) #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) -#define KP_LOG_INFO(...) \ - ((void)__android_log_print(ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) +#define KP_LOG_INFO(...) \ + ((void)__android_log_write( \ + ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str())) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__)) #else -#define KP_LOG_INFO(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__)) +#define KP_LOG_INFO(...) \ + fmt::print("[{} {}] [debug] [{}:{}] {}\n", \ + __DATE__, \ + __TIME__, \ + __FILE__, \ + __LINE__, \ + fmt::format(__VA_ARGS__)) #endif // VK_USE_PLATFORM_ANDROID_KHR #endif // SPDLOG_ACTIVE_LEVEL > 2 @@ -673,12 +687,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #define KP_LOG_WARN(...) #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) -#define KP_LOG_WARN(...) \ - ((void)__android_log_print(ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) +#define KP_LOG_WARN(...) \ + ((void)__android_log_write( \ + ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str())) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__)) #else -#define KP_LOG_WARN(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__)) +#define KP_LOG_WARN(...) \ + fmt::print("[{} {}] [debug] [{}:{}] {}\n", \ + __DATE__, \ + __TIME__, \ + __FILE__, \ + __LINE__, \ + fmt::format(__VA_ARGS__)) #endif // VK_USE_PLATFORM_ANDROID_KHR #endif // SPDLOG_ACTIVE_LEVEL > 3 @@ -686,12 +707,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #define KP_LOG_ERROR(...) #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) -#define KP_LOG_ERROR(...) \ - ((void)__android_log_print(ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) +#define KP_LOG_ERROR(...) \ + ((void)__android_log_write( \ + ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str())) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__)) #else -#define KP_LOG_ERROR(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__)) +#define KP_LOG_ERROR(...) \ + fmt::print("[{} {}] [debug] [{}:{}] {}\n", \ + __DATE__, \ + __TIME__, \ + __FILE__, \ + __LINE__, \ + fmt::format(__VA_ARGS__)) #endif // VK_USE_PLATFORM_ANDROID_KHR #endif // SPDLOG_ACTIVE_LEVEL > 4 #endif // KOMPUTE_SPDLOG_ENABLED @@ -701,9 +729,9 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #include #include +#include #include #include -#include namespace kp { @@ -711,157 +739,161 @@ namespace kp { // Has been adobted by: // https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp const TBuiltInResource defaultResource = { -/* .MaxLights = */ 0, -/* .MaxClipPlanes = */ 0, -/* .MaxTextureUnits = */ 0, -/* .MaxTextureCoords = */ 0, -/* .MaxVertexAttribs = */ 64, -/* .MaxVertexUniformComponents = */ 4096, -/* .MaxVaryingFloats = */ 64, -/* .MaxVertexTextureImageUnits = */ 0, -/* .MaxCombinedTextureImageUnits = */ 0, -/* .MaxTextureImageUnits = */ 0, -/* .MaxFragmentUniformComponents = */ 0, -/* .MaxDrawBuffers = */ 0, -/* .MaxVertexUniformVectors = */ 128, -/* .MaxVaryingVectors = */ 8, -/* .MaxFragmentUniformVectors = */ 0, -/* .MaxVertexOutputVectors = */ 16, -/* .MaxFragmentInputVectors = */ 0, -/* .MinProgramTexelOffset = */ -8, -/* .MaxProgramTexelOffset = */ 7, -/* .MaxClipDistances = */ 8, -/* .MaxComputeWorkGroupCountX = */ 65535, -/* .MaxComputeWorkGroupCountY = */ 65535, -/* .MaxComputeWorkGroupCountZ = */ 65535, -/* .MaxComputeWorkGroupSizeX = */ 1024, -/* .MaxComputeWorkGroupSizeY = */ 1024, -/* .MaxComputeWorkGroupSizeZ = */ 64, -/* .MaxComputeUniformComponents = */ 1024, -/* .MaxComputeTextureImageUnits = */ 16, -/* .MaxComputeImageUniforms = */ 8, -/* .MaxComputeAtomicCounters = */ 8, -/* .MaxComputeAtomicCounterBuffers = */ 1, -/* .MaxVaryingComponents = */ 60, -/* .MaxVertexOutputComponents = */ 64, -/* .MaxGeometryInputComponents = */ 64, -/* .MaxGeometryOutputComponents = */ 128, -/* .MaxFragmentInputComponents = */ 0, -/* .MaxImageUnits = */ 0, -/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0, -/* .MaxCombinedShaderOutputResources = */ 8, -/* .MaxImageSamples = */ 0, -/* .MaxVertexImageUniforms = */ 0, -/* .MaxTessControlImageUniforms = */ 0, -/* .MaxTessEvaluationImageUniforms = */ 0, -/* .MaxGeometryImageUniforms = */ 0, -/* .MaxFragmentImageUniforms = */ 0, -/* .MaxCombinedImageUniforms = */ 0, -/* .MaxGeometryTextureImageUnits = */ 0, -/* .MaxGeometryOutputVertices = */ 256, -/* .MaxGeometryTotalOutputComponents = */ 1024, -/* .MaxGeometryUniformComponents = */ 1024, -/* .MaxGeometryVaryingComponents = */ 64, -/* .MaxTessControlInputComponents = */ 128, -/* .MaxTessControlOutputComponents = */ 128, -/* .MaxTessControlTextureImageUnits = */ 0, -/* .MaxTessControlUniformComponents = */ 1024, -/* .MaxTessControlTotalOutputComponents = */ 4096, -/* .MaxTessEvaluationInputComponents = */ 128, -/* .MaxTessEvaluationOutputComponents = */ 128, -/* .MaxTessEvaluationTextureImageUnits = */ 16, -/* .MaxTessEvaluationUniformComponents = */ 1024, -/* .MaxTessPatchComponents = */ 120, -/* .MaxPatchVertices = */ 32, -/* .MaxTessGenLevel = */ 64, -/* .MaxViewports = */ 16, -/* .MaxVertexAtomicCounters = */ 0, -/* .MaxTessControlAtomicCounters = */ 0, -/* .MaxTessEvaluationAtomicCounters = */ 0, -/* .MaxGeometryAtomicCounters = */ 0, -/* .MaxFragmentAtomicCounters = */ 0, -/* .MaxCombinedAtomicCounters = */ 8, -/* .MaxAtomicCounterBindings = */ 1, -/* .MaxVertexAtomicCounterBuffers = */ 0, -/* .MaxTessControlAtomicCounterBuffers = */ 0, -/* .MaxTessEvaluationAtomicCounterBuffers = */ 0, -/* .MaxGeometryAtomicCounterBuffers = */ 0, -/* .MaxFragmentAtomicCounterBuffers = */ 0, -/* .MaxCombinedAtomicCounterBuffers = */ 1, -/* .MaxAtomicCounterBufferSize = */ 16384, -/* .MaxTransformFeedbackBuffers = */ 4, -/* .MaxTransformFeedbackInterleavedComponents = */ 64, -/* .MaxCullDistances = */ 8, -/* .MaxCombinedClipAndCullDistances = */ 8, -/* .MaxSamples = */ 4, -/* .maxMeshOutputVerticesNV = */ 256, -/* .maxMeshOutputPrimitivesNV = */ 512, -/* .maxMeshWorkGroupSizeX_NV = */ 32, -/* .maxMeshWorkGroupSizeY_NV = */ 1, -/* .maxMeshWorkGroupSizeZ_NV = */ 1, -/* .maxTaskWorkGroupSizeX_NV = */ 32, -/* .maxTaskWorkGroupSizeY_NV = */ 1, -/* .maxTaskWorkGroupSizeZ_NV = */ 1, -/* .maxMeshViewCountNV = */ 4, -/* .maxDualSourceDrawBuffersEXT = */ 1, + /* .MaxLights = */ 0, + /* .MaxClipPlanes = */ 0, + /* .MaxTextureUnits = */ 0, + /* .MaxTextureCoords = */ 0, + /* .MaxVertexAttribs = */ 64, + /* .MaxVertexUniformComponents = */ 4096, + /* .MaxVaryingFloats = */ 64, + /* .MaxVertexTextureImageUnits = */ 0, + /* .MaxCombinedTextureImageUnits = */ 0, + /* .MaxTextureImageUnits = */ 0, + /* .MaxFragmentUniformComponents = */ 0, + /* .MaxDrawBuffers = */ 0, + /* .MaxVertexUniformVectors = */ 128, + /* .MaxVaryingVectors = */ 8, + /* .MaxFragmentUniformVectors = */ 0, + /* .MaxVertexOutputVectors = */ 16, + /* .MaxFragmentInputVectors = */ 0, + /* .MinProgramTexelOffset = */ -8, + /* .MaxProgramTexelOffset = */ 7, + /* .MaxClipDistances = */ 8, + /* .MaxComputeWorkGroupCountX = */ 65535, + /* .MaxComputeWorkGroupCountY = */ 65535, + /* .MaxComputeWorkGroupCountZ = */ 65535, + /* .MaxComputeWorkGroupSizeX = */ 1024, + /* .MaxComputeWorkGroupSizeY = */ 1024, + /* .MaxComputeWorkGroupSizeZ = */ 64, + /* .MaxComputeUniformComponents = */ 1024, + /* .MaxComputeTextureImageUnits = */ 16, + /* .MaxComputeImageUniforms = */ 8, + /* .MaxComputeAtomicCounters = */ 8, + /* .MaxComputeAtomicCounterBuffers = */ 1, + /* .MaxVaryingComponents = */ 60, + /* .MaxVertexOutputComponents = */ 64, + /* .MaxGeometryInputComponents = */ 64, + /* .MaxGeometryOutputComponents = */ 128, + /* .MaxFragmentInputComponents = */ 0, + /* .MaxImageUnits = */ 0, + /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0, + /* .MaxCombinedShaderOutputResources = */ 8, + /* .MaxImageSamples = */ 0, + /* .MaxVertexImageUniforms = */ 0, + /* .MaxTessControlImageUniforms = */ 0, + /* .MaxTessEvaluationImageUniforms = */ 0, + /* .MaxGeometryImageUniforms = */ 0, + /* .MaxFragmentImageUniforms = */ 0, + /* .MaxCombinedImageUniforms = */ 0, + /* .MaxGeometryTextureImageUnits = */ 0, + /* .MaxGeometryOutputVertices = */ 256, + /* .MaxGeometryTotalOutputComponents = */ 1024, + /* .MaxGeometryUniformComponents = */ 1024, + /* .MaxGeometryVaryingComponents = */ 64, + /* .MaxTessControlInputComponents = */ 128, + /* .MaxTessControlOutputComponents = */ 128, + /* .MaxTessControlTextureImageUnits = */ 0, + /* .MaxTessControlUniformComponents = */ 1024, + /* .MaxTessControlTotalOutputComponents = */ 4096, + /* .MaxTessEvaluationInputComponents = */ 128, + /* .MaxTessEvaluationOutputComponents = */ 128, + /* .MaxTessEvaluationTextureImageUnits = */ 16, + /* .MaxTessEvaluationUniformComponents = */ 1024, + /* .MaxTessPatchComponents = */ 120, + /* .MaxPatchVertices = */ 32, + /* .MaxTessGenLevel = */ 64, + /* .MaxViewports = */ 16, + /* .MaxVertexAtomicCounters = */ 0, + /* .MaxTessControlAtomicCounters = */ 0, + /* .MaxTessEvaluationAtomicCounters = */ 0, + /* .MaxGeometryAtomicCounters = */ 0, + /* .MaxFragmentAtomicCounters = */ 0, + /* .MaxCombinedAtomicCounters = */ 8, + /* .MaxAtomicCounterBindings = */ 1, + /* .MaxVertexAtomicCounterBuffers = */ 0, + /* .MaxTessControlAtomicCounterBuffers = */ 0, + /* .MaxTessEvaluationAtomicCounterBuffers = */ 0, + /* .MaxGeometryAtomicCounterBuffers = */ 0, + /* .MaxFragmentAtomicCounterBuffers = */ 0, + /* .MaxCombinedAtomicCounterBuffers = */ 1, + /* .MaxAtomicCounterBufferSize = */ 16384, + /* .MaxTransformFeedbackBuffers = */ 4, + /* .MaxTransformFeedbackInterleavedComponents = */ 64, + /* .MaxCullDistances = */ 8, + /* .MaxCombinedClipAndCullDistances = */ 8, + /* .MaxSamples = */ 4, + /* .maxMeshOutputVerticesNV = */ 256, + /* .maxMeshOutputPrimitivesNV = */ 512, + /* .maxMeshWorkGroupSizeX_NV = */ 32, + /* .maxMeshWorkGroupSizeY_NV = */ 1, + /* .maxMeshWorkGroupSizeZ_NV = */ 1, + /* .maxTaskWorkGroupSizeX_NV = */ 32, + /* .maxTaskWorkGroupSizeY_NV = */ 1, + /* .maxTaskWorkGroupSizeZ_NV = */ 1, + /* .maxMeshViewCountNV = */ 4, + /* .maxDualSourceDrawBuffersEXT = */ 1, + + /* .limits = */ + { + /* .nonInductiveForLoops = */ 1, + /* .whileLoops = */ 1, + /* .doWhileLoops = */ 1, + /* .generalUniformIndexing = */ 1, + /* .generalAttributeMatrixVectorIndexing = */ 1, + /* .generalVaryingIndexing = */ 1, + /* .generalSamplerIndexing = */ 1, + /* .generalVariableIndexing = */ 1, + /* .generalConstantMatrixVectorIndexing = */ 1, + } +}; -/* .limits = */ { - /* .nonInductiveForLoops = */ 1, - /* .whileLoops = */ 1, - /* .doWhileLoops = */ 1, - /* .generalUniformIndexing = */ 1, - /* .generalAttributeMatrixVectorIndexing = */ 1, - /* .generalVaryingIndexing = */ 1, - /* .generalSamplerIndexing = */ 1, - /* .generalVariableIndexing = */ 1, - /* .generalConstantMatrixVectorIndexing = */ 1, -}}; - /** Shader utily class with functions to compile and process glsl files. */ -class Shader { -public: +class Shader +{ + public: /** * Compile multiple sources with optional filenames. Currently this function * uses the glslang C++ interface which is not thread safe so this funciton * should not be called from multiple threads concurrently. If you have a - * online shader processing multithreading use-case that can't use offline + * online shader processing multithreading use-case that can't use offline * compilation please open an issue. * * @param sources A list of raw glsl shaders in string format * @param files A list of file names respective to each of the sources * @param entryPoint The function name to use as entry point * @param definitions List of pairs containing key value definitions - * @param resourcesLimit A list that contains the resource limits for the GLSL compiler + * @param resourcesLimit A list that contains the resource limits for the + * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ static std::vector compile_sources( - const std::vector& sources, - const std::vector& files = {}, - const std::string& entryPoint = "main", - std::vector> definitions = {}, - const TBuiltInResource& resources = defaultResource); + const std::vector& sources, + const std::vector& files = {}, + const std::string& entryPoint = "main", + std::vector> definitions = {}, + const TBuiltInResource& resources = defaultResource); /** - * Compile a single glslang source from string value. Currently this function - * uses the glslang C++ interface which is not thread safe so this funciton - * should not be called from multiple threads concurrently. If you have a - * online shader processing multithreading use-case that can't use offline - * compilation please open an issue. + * Compile a single glslang source from string value. Currently this + * function uses the glslang C++ interface which is not thread safe so this + * funciton should not be called from multiple threads concurrently. If you + * have a online shader processing multithreading use-case that can't use + * offline compilation please open an issue. * * @param source An individual raw glsl shader in string format * @param entryPoint The function name to use as entry point * @param definitions List of pairs containing key value definitions - * @param resourcesLimit A list that contains the resource limits for the GLSL compiler + * @param resourcesLimit A list that contains the resource limits for the + * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ static std::vector compile_source( - const std::string& source, - const std::string& entryPoint = "main", - std::vector> definitions = {}, - const TBuiltInResource& resources = defaultResource); - + const std::string& source, + const std::string& entryPoint = "main", + std::vector> definitions = {}, + const TBuiltInResource& resources = defaultResource); }; } @@ -919,7 +951,7 @@ class Tensor * otherwise there is no need to copy from host memory. */ void rebuild(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice); + TensorTypes tensorType = TensorTypes::eDevice); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -990,9 +1022,8 @@ class Tensor * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ - void recordCopyFromStagingToDevice( - const vk::CommandBuffer& commandBuffer, - bool createBarrier); + void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer, + bool createBarrier); /** * Records a copy from the internal device memory to the staging memory @@ -1003,9 +1034,8 @@ class Tensor * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ - void recordCopyFromDeviceToStaging( - const vk::CommandBuffer& commandBuffer, - bool createBarrier); + void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer, + bool createBarrier); /** * Records the buffer memory barrier into the command buffer which @@ -1017,12 +1047,11 @@ class Tensor * @param scrStageMask Pipeline stage flags for source stage mask * @param dstStageMask Pipeline stage flags for destination stage mask */ - void recordBufferMemoryBarrier( - const vk::CommandBuffer& commandBuffer, - vk::AccessFlagBits srcAccessMask, - vk::AccessFlagBits dstAccessMask, - vk::PipelineStageFlagBits srcStageMask, - vk::PipelineStageFlagBits dstStageMask); + void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask); /** * Constructs a vulkan descriptor buffer info which can be used to specify @@ -1070,11 +1099,11 @@ class Tensor std::shared_ptr memory, vk::MemoryPropertyFlags memoryPropertyFlags); void recordCopyBuffer(const vk::CommandBuffer& commandBuffer, - std::shared_ptr bufferFrom, - std::shared_ptr bufferTo, - vk::DeviceSize bufferSize, - vk::BufferCopy copyRegion, - bool createBarrier); + std::shared_ptr bufferFrom, + std::shared_ptr bufferTo, + vk::DeviceSize bufferSize, + vk::BufferCopy copyRegion, + bool createBarrier); // Private util functions vk::BufferUsageFlags getPrimaryBufferUsageFlags(); @@ -1094,8 +1123,7 @@ namespace kp { */ class Algorithm { -public: - + public: /** * Default constructor for Algorithm * @@ -1103,12 +1131,11 @@ public: * @param commandBuffer The vulkan command buffer to bind the pipeline and * shaders */ - Algorithm( - std::shared_ptr device, - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}); + Algorithm(std::shared_ptr device, + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}); /** * Initialiser for the shader data provided to the algorithm as well as @@ -1116,14 +1143,13 @@ public: * * @param shaderFileData The bytes in spir-v format of the shader * @tensorParams The Tensors to be used in the Algorithm / shader for - * @specalizationInstalces The specialization parameters to pass to the function - * processing + * @specalizationInstalces The specialization parameters to pass to the + * function processing */ - void rebuild( - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}); + void rebuild(const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}); /** * Destructor for Algorithm which is responsible for freeing and desroying @@ -1143,7 +1169,8 @@ public: void bindCore(const vk::CommandBuffer& commandBuffer); - void bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants); + void bindPush(const vk::CommandBuffer& commandBuffer, + const Constants& pushConstants); bool isInit(); @@ -1155,7 +1182,7 @@ public: void destroy(); -private: + private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mDevice; std::vector> mTensors; @@ -1489,7 +1516,7 @@ namespace kp { /** * Container of operations that can be sent to GPU as batch */ -class Sequence: public std::enable_shared_from_this +class Sequence : public std::enable_shared_from_this { public: /** @@ -1526,8 +1553,9 @@ class Sequence: public std::enable_shared_from_this * which allows for extensible configurations on initialisation. */ template - std::shared_ptr - record(std::vector> tensors, TArgs&&... params) + std::shared_ptr record( + std::vector> tensors, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -1536,14 +1564,13 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->record(op); } template - std::shared_ptr - record(std::shared_ptr algorithm, TArgs&&... params) + std::shared_ptr record(std::shared_ptr algorithm, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -1552,8 +1579,8 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(algorithm, std::forward(params)...) }; + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; return this->record(op); } @@ -1576,8 +1603,8 @@ class Sequence: public std::enable_shared_from_this */ // TODO: Aim to have only a single function with tensors/algorithm template - std::shared_ptr - eval(std::vector> tensors, TArgs&&... params) + std::shared_ptr eval(std::vector> tensors, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -1586,16 +1613,16 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; - // TODO: Aim to be able to handle errors when returning without throw except + // TODO: Aim to be able to handle errors when returning without throw + // except return this->eval(op); } // Needded as otherise can't use initialiser list template - std::shared_ptr - eval(std::shared_ptr algorithm, TArgs&&... params) + std::shared_ptr eval(std::shared_ptr algorithm, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -1604,8 +1631,8 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(algorithm, std::forward(params)...) }; + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; return this->eval(op); } @@ -1627,8 +1654,9 @@ class Sequence: public std::enable_shared_from_this * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr - evalAsync(std::vector> tensors, TArgs&&... params) + std::shared_ptr evalAsync( + std::vector> tensors, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -1637,15 +1665,14 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->evalAsync(op); } // Needed as otherwise it's not possible to use initializer lists template - std::shared_ptr - evalAsync(std::shared_ptr algorithm, TArgs&&... params) + std::shared_ptr evalAsync(std::shared_ptr algorithm, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -1654,8 +1681,8 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(algorithm, std::forward(params)...) }; + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; return this->evalAsync(op); } @@ -1670,7 +1697,8 @@ class Sequence: public std::enable_shared_from_this std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); /** - * Clear function clears all operations currently recorded and starts recording again. + * Clear function clears all operations currently recorded and starts + * recording again. */ void clear(); @@ -1821,10 +1849,10 @@ class Manager Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice); std::shared_ptr algorithm( - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}); + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}); void destroy(); void clear(); @@ -1856,7 +1884,8 @@ class Manager // Create functions void createInstance(); - void createDevice(const std::vector& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0); + void createDevice(const std::vector& familyQueueIndices = {}, + uint32_t hysicalDeviceIndex = 0); }; } // End namespace kp diff --git a/src/include/kompute/Core.hpp b/src/include/kompute/Core.hpp index b50bf081d..3510a2021 100644 --- a/src/include/kompute/Core.hpp +++ b/src/include/kompute/Core.hpp @@ -61,8 +61,8 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) #define KP_LOG_DEBUG(...) \ - ((void)__android_log_print( \ - ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) + ((void)__android_log_write( \ + ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str())) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__)) #else @@ -81,8 +81,8 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) #define KP_LOG_INFO(...) \ - ((void)__android_log_print( \ - ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) + ((void)__android_log_write( \ + ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str())) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__)) #else @@ -101,8 +101,8 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) #define KP_LOG_WARN(...) \ - ((void)__android_log_print( \ - ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) + ((void)__android_log_write( \ + ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str())) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__)) #else @@ -121,8 +121,8 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) #define KP_LOG_ERROR(...) \ - ((void)__android_log_print( \ - ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) + ((void)__android_log_write( \ + ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str())) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__)) #else diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp index ba1e892d5..34818fcf0 100644 --- a/src/include/kompute/operations/OpBase.hpp +++ b/src/include/kompute/operations/OpBase.hpp @@ -1,7 +1,6 @@ #pragma once #include "kompute/Core.hpp" - #include "kompute/Tensor.hpp" #include "kompute/Algorithm.hpp"