diff --git a/Makefile b/Makefile index b6ff3ea58..64ae11155 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,8 @@ VERSION := $(shell cat ./VERSION) VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake" VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake" -# Regext to pass to catch2 to filter tests -FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps" +# These are the tests that don't work with swiftshader but can be run directly with vulkan +FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants.TestConstantsDouble" ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10... CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe" @@ -105,7 +105,7 @@ mk_run_tests_cpu: mk_build_swiftshader_library mk_build_tests mk_run_tests_cpu_o VS_BUILD_TYPE ?= "Debug" # Run with multiprocessin / parallel build by default VS_CMAKE_EXTRA_FLAGS ?= "" -VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MP" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest +VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MT" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest VS_INSTALL_PATH ?= "build/src/CMakeFiles/Export/" # Set to "" if prefer default vs_cmake: @@ -116,7 +116,7 @@ vs_cmake: -DKOMPUTE_EXTRA_CXX_FLAGS=$(VS_KOMPUTE_EXTRA_CXX_FLAGS) \ -DCMAKE_INSTALL_PREFIX=$(VS_INSTALL_PATH) \ -DKOMPUTE_OPT_INSTALL=1 \ - -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=0 \ + -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \ -DKOMPUTE_OPT_BUILD_TESTS=1 \ -DKOMPUTE_OPT_BUILD_SHADERS=1 \ -DKOMPUTE_OPT_BUILD_SINGLE_HEADER=1 \ diff --git a/README.md b/README.md index 71711973b..88ddbd36e 100644 --- a/README.md +++ b/README.md @@ -89,9 +89,9 @@ void kompute(const std::string& shader) { // 3. Create algorithm based on shader (supports buffers & push/spec constants) kp::Workgroup workgroup({3, 1, 1}); - kp::Constants specConsts({ 2 }); - kp::Constants pushConstsA({ 2.0 }); - kp::Constants pushConstsB({ 3.0 }); + std::vector specConsts({ 2 }); + std::vector pushConstsA({ 2.0 }); + std::vector pushConstsB({ 3.0 }); auto algorithm = mgr.algorithm(params, // See documentation shader section for compileSource diff --git a/docs/index.rst b/docs/index.rst index 9a62e7814..f3358a00f 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -38,9 +38,10 @@ Documentation Index (as per sidebar) .. toctree:: :titlesonly: - :caption: Concepts & Deep Dives: + :caption: Advanced Concepts & Deep Dives: - CI, Docker Images Docs & Tests + CI, Docker Images Docs & Tests + Variable Types for Tensors, and Push/Spec Constants Asynchronous & Parallel Operations Mobile App Integration (Android) Game Engine Integration (Godot Engine) diff --git a/docs/overview/advanced-examples.rst b/docs/overview/advanced-examples.rst index 68b901f76..ee44c5821 100644 --- a/docs/overview/advanced-examples.rst +++ b/docs/overview/advanced-examples.rst @@ -71,13 +71,13 @@ The example below shows how you can enable the "VK_EXT_shader_atomic_float" exte sq = mgr.sequence() ->record({ tensor }) ->record(algo, - kp::Constants{ 0.1, 0.2, 0.3 }) + std::vector{ 0.1, 0.2, 0.3 }) ->record(algo, - kp::Constants{ 0.3, 0.2, 0.1 }) + std::vector{ 0.3, 0.2, 0.1 }) ->record({ tensor }) ->eval(); - EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->data(), std::vector({ 0.4, 0.4, 0.4 })); } } diff --git a/docs/overview/variable-types.rst b/docs/overview/variable-types.rst new file mode 100644 index 000000000..a4c6481a7 --- /dev/null +++ b/docs/overview/variable-types.rst @@ -0,0 +1,92 @@ + +Variable Types for Tensors and Constants +============= + +By default the initial interfaces you may interact with, will be primarily using float values by default, which is enough to get through the basic conceptual examples. However as real world applications are being developed, more specialized types may be required for kp::Tensor, as well as for SpecializationConstants and PushConstants. + +Before diving into the practical classes and interfaces that can be used to take advantage of the variable type support of Kompute, we want to provide some high level intution on what each of these components are. + +Variable Tensor Types +------ + +For the kp::Tensor class, Kompute provides under the hood an interface to have more seamless interaction with multiple different underlying data types. This is done through the introduction of the class kp::TensorT and parent class kp::Tensor, however you as a developer you will be primarily interacting with the top level kp::Tensor class, as this is what is provided through the high level kp::Manager class. + +The kp::Tensor class does provide an "integrated" experience, which allows users to "seamlessly" retrieve the underlying data through the `data()` and `vector()` functions. This is done by leveraging C++ templates, as well as limiting the types that can be used, which are namely: + +* float +* uint32 +* int32 +* double +* bool + +Any other data type provided would result in an error, and for the time being Kompute will focus on primarily provide support for these classes. + +The tests under `TestTensor.cpp` and `test_tensor_types.py` provide an overview of how users can take advantage of these features using std::vector for C++ and numpy array for Python. + +C++ Tensor Types Usage +^^^^^^^ + +Below you can see how it is possible to define different types in C++. + +.. literalinclude:: ../../test/TestTensor.cpp + :language: cpp + :lines: 21- + +Python Tensor Types Usage +^^^^^^^^^^^^^^^^^ + +.. literalinclude:: ../../python/test/test_tensor_types.py + :language: python + :lines: 26-46 + +Variable Push Constants +---- + +Push constants are a relatively non-expensive way to provide dynamic data to a GPU Algorithm (shader) as further CPU compute is performed. Although Push Constants are a more efficient way to provide data, it is also a limited manner as there is a memory limit for push constants. + +Push constants with Kompute are flexible as it is possible to pass user-defined structs in C++. In Python it is limited to providing numpy arrays with multiple elements of the same type. + +C++ Push Consts Types Usage +^^^^^^^ + +As mentioned above, this test under `TestPushConstants.cpp` shows how it is possible to use user-defined structs for multiple elements from different types, which is not possible for specialized constants or tensors. + +These are defined in the `algorithm` function of the `kp::Manager`, and once it push constant is set, all other push constants provided have to consist of the same types and element size. + +More specifically, when passing a custom struct it is possible to pass a single element, or alternatively passing multiple scalar values as part of the vector, and access them as outlined in the rest of the tests. + +.. literalinclude:: ../../test/TestPushConstant.cpp + :language: cpp + :lines: 182-231 + + +Python Push Consts Types Usage +^^^^^^^^^^^^^^^^^ + +In python the push constants are limited to a single list of elements of the same type. These are provided by passing a numpy array to the `algorithm` function or the `kp::OpAlgoDispatch` operation. + +.. literalinclude:: ../../python/test/test_tensor_types.py + :language: python + :lines: 207-242 + +Variable Specialization Constants +------ + +Specialization constants are analogous to push constants, but these are not dynamic, can only be set on initialization or rebuild of `kp::Algorithm` and cannot be changed unless a `rebuild` is carried out. + +The usage of specailization constants is very similar to the push constants, but the only limitation are: + +* These are defined using the constant_id in the glsl shader +* Spec constants do not support complex types (i.e. user defined struct) +* Kompute supports an array of elements of same type for specialization constants + +C++ Push Consts Types Usage +^^^^ + +The specialization constant example shows how it is possible to define as a std::vector. + +.. literalinclude:: ../../test/TestSpecializationConstant.cpp + :language: cpp + :lines: 57- + + diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp index 647cd5236..ef604909b 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp @@ -47,7 +47,7 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); std::shared_ptr algo = - mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); + mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), std::vector({ 5.0 })); mgr.sequence()->eval(params); diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index c7cc827ba..ec52439ed 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -41,7 +41,7 @@ int main() + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); std::shared_ptr algo = mgr.algorithm( - params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); + params, spirv, kp::Workgroup({ 5 }), std::vector({ 5.0 })); mgr.sequence()->eval(params); diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index 8f2a7fe6b..fb1def637 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -36,9 +36,9 @@ tensors (optional) The tensors to use to create the descriptor resources @param spirv (optional) The spirv code to use to create the algorithm @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if -not set. @param specializationConstants (optional) The kp::Constants +not set. @param specializationConstants (optional) The std::vector to use to initialize the specialization constants which cannot be -changed once set. @param pushConstants (optional) The kp::Constants to +changed once set. @param pushConstants (optional) The std::vector to use when initializing the pipeline, which set the size of the push constants - these can be modified but all new values must have the same vector size as this initial value.)doc"; @@ -54,12 +54,12 @@ static const char *__doc_kp_Algorithm_destroy = R"doc()doc"; static const char *__doc_kp_Algorithm_getPush = R"doc(Gets the specialization constants of the current algorithm. -@returns The kp::Constants currently set for push constants)doc"; +@returns The std::vector currently set for push constants)doc"; static const char *__doc_kp_Algorithm_getSpecializationConstants = R"doc(Gets the specialization constants of the current algorithm. -@returns The kp::Constants currently set for specialization constants)doc"; +@returns The std::vector currently set for specialization constants)doc"; static const char *__doc_kp_Algorithm_getTensors = R"doc(Gets the current tensors that are used in the algorithm. @@ -127,9 +127,9 @@ parameters to create the underlying resources. @param spirv The spirv code to use to create the algorithm @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. @param -specializationConstants (optional) The kp::Constants to use to +specializationConstants (optional) The std::vector to use to initialize the specialization constants which cannot be changed once -set. @param pushConstants (optional) The kp::Constants to use when +set. @param pushConstants (optional) The std::vector to use when initializing the pipeline, which set the size of the push constants - these can be modified but all new values must have the same vector size as this initial value.)doc"; diff --git a/python/src/main.cpp b/python/src/main.cpp index 846576adb..d0447fe8e 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -14,6 +14,31 @@ namespace py = pybind11; //used in Core.hpp py::object kp_debug, kp_info, kp_warning, kp_error; +std::unique_ptr opAlgoDispatchPyInit( + std::shared_ptr& algorithm, + const py::array& push_consts) { + const py::buffer_info info = push_consts.request(); + KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with push_consts size {} dtype {}", + push_consts.size(), std::string(py::str(push_consts.dtype()))); + + + if (push_consts.dtype() == py::dtype::of()) { + std::vector dataVec((float*)info.ptr, ((float*)info.ptr) + info.size); + return std::unique_ptr{new kp::OpAlgoDispatch(algorithm, dataVec)}; + } else if (push_consts.dtype() == py::dtype::of()) { + std::vector dataVec((uint32_t*)info.ptr, ((uint32_t*)info.ptr) + info.size); + return std::unique_ptr{new kp::OpAlgoDispatch(algorithm, dataVec)}; + } else if (push_consts.dtype() == py::dtype::of()) { + std::vector dataVec((int32_t*)info.ptr, ((int32_t*)info.ptr) + info.size); + return std::unique_ptr{new kp::OpAlgoDispatch(algorithm, dataVec)}; + } else if (push_consts.dtype() == py::dtype::of()) { + std::vector dataVec((double*)info.ptr, ((double*)info.ptr) + info.size); + return std::unique_ptr{new kp::OpAlgoDispatch(algorithm, dataVec)}; + } else { + throw std::runtime_error("Kompute Python no valid dtype supported"); + } +} + PYBIND11_MODULE(kp, m) { // The logging modules are used in the Kompute.hpp file @@ -49,9 +74,12 @@ PYBIND11_MODULE(kp, m) { py::class_>( m, "OpAlgoDispatch", py::base(), DOC(kp, OpAlgoDispatch)) - .def(py::init&,const kp::Constants&>(), + .def(py::init&,const std::vector&>(), DOC(kp, OpAlgoDispatch, OpAlgoDispatch), - py::arg("algorithm"), py::arg("push_consts") = kp::Constants()); + py::arg("algorithm"), py::arg("push_consts") = std::vector()) + .def(py::init(&opAlgoDispatchPyInit), + DOC(kp, OpAlgoDispatch, OpAlgoDispatch), + py::arg("algorithm"), py::arg("push_consts")); py::class_>( m, "OpMult", py::base(), DOC(kp, OpMult)) @@ -61,7 +89,6 @@ PYBIND11_MODULE(kp, m) { py::class_>(m, "Algorithm", DOC(kp, Algorithm, Algorithm)) .def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors)) .def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy)) - .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants)) .def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit)); py::class_>(m, "Tensor", DOC(kp, Tensor)) @@ -179,8 +206,8 @@ PYBIND11_MODULE(kp, m) { const std::vector>& tensors, const py::bytes& spirv, const kp::Workgroup& workgroup, - const kp::Constants& spec_consts, - const kp::Constants& push_consts) { + const std::vector& spec_consts, + const std::vector& push_consts) { py::buffer_info info(py::buffer(spirv).request()); const char *data = reinterpret_cast(info.ptr); size_t length = static_cast(info.size); @@ -191,8 +218,100 @@ PYBIND11_MODULE(kp, m) { py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), - py::arg("spec_consts") = kp::Constants(), - py::arg("push_consts") = kp::Constants()) + py::arg("spec_consts") = std::vector(), + py::arg("push_consts") = std::vector()) + .def("algorithm", [np](kp::Manager& self, + const std::vector>& tensors, + const py::bytes& spirv, + const kp::Workgroup& workgroup, + const py::array& spec_consts, + const py::array& push_consts) { + + py::buffer_info info(py::buffer(spirv).request()); + const char *data = reinterpret_cast(info.ptr); + size_t length = static_cast(info.size); + std::vector spirvVec((uint32_t*)data, (uint32_t*)(data + length)); + + const py::buffer_info pushInfo = push_consts.request(); + const py::buffer_info specInfo = spec_consts.request(); + + KP_LOG_DEBUG("Kompute Python Manager creating Algorithm_T with " + "push consts data size {} dtype {} and spec const data size {} dtype {}", + push_consts.size(), std::string(py::str(push_consts.dtype())), + spec_consts.size(), std::string(py::str(spec_consts.dtype()))); + + // We have to iterate across a combination of parameters due to the lack of support for templating + if (spec_consts.dtype() == py::dtype::of()) { + std::vector specConstsVec((float*)specInfo.ptr, ((float*)specInfo.ptr) + specInfo.size); + if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushConstsVec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushConstsVec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushConstsVec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushConstsVec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec); + } + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector specconstsvec((int32_t*)specInfo.ptr, ((int32_t*)specInfo.ptr) + specInfo.size); + if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector specconstsvec((uint32_t*)specInfo.ptr, ((uint32_t*)specInfo.ptr) + specInfo.size); + if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector specconstsvec((double*)specInfo.ptr, ((double*)specInfo.ptr) + specInfo.size); + if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } + } else { + // If reach then no valid dtype supported + throw std::runtime_error("Kompute Python no valid dtype supported"); + } + }, + DOC(kp, Manager, algorithm), + py::arg("tensors"), + py::arg("spirv"), + py::arg("workgroup") = kp::Workgroup(), + py::arg("spec_consts") = std::vector(), + py::arg("push_consts") = std::vector()) .def("list_devices", [](kp::Manager& self){ const std::vector devices = self.listDevices(); py::list list; diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index eaf6b28db..8660b0151 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -197,10 +197,49 @@ def test_pushconsts(): .record(kp.OpTensorSyncDevice([tensor])) .record(kp.OpAlgoDispatch(algo)) .record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1])) + .record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1])) .record(kp.OpTensorSyncLocal([tensor])) .eval()) - assert np.all(tensor.data() == np.array([0.4, 0.4, 0.4], dtype=np.float32)) + assert np.allclose(tensor.data(), np.array([0.7, 0.6, 0.5], dtype=np.float32)) + + +def test_pushconsts_int(): + + spirv = compile_source(""" + #version 450 + layout(push_constant) uniform PushConstants { + int x; + int y; + int z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { int pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + } + """) + + mgr = kp.Manager() + + tensor = mgr.tensor_t(np.array([0, 0, 0], dtype=np.int32)) + + spec_consts = np.array([], dtype=np.int32) + push_consts = np.array([-1, -1, -1], dtype=np.int32) + + algo = mgr.algorithm([tensor], spirv, (1, 1, 1), spec_consts, push_consts) + + (mgr.sequence() + .record(kp.OpTensorSyncDevice([tensor])) + .record(kp.OpAlgoDispatch(algo)) + .record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32))) + .record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32))) + .record(kp.OpTensorSyncLocal([tensor])) + .eval()) + + assert np.all(tensor.data() == np.array([-3, -3, -3], dtype=np.int32)) def test_workgroup(): diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 12fe9cda9..012eae394 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1087,19 +1087,37 @@ class Algorithm * @param spirv (optional) The spirv code to use to create the algorithm * @param workgroup (optional) The kp::Workgroup to use for the dispatch * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to + * @param specializationConstants (optional) The templatable param is to be used to * initialize the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when + * @param pushConstants (optional) This templatable param is to be used when * initializing the pipeline, which set the size of the push constants - - * these can be modified but all new values must have the same vector size - * as this initial value. + * these can be modified but all new values must have the same data type and length + * as otherwise it will result in errors. */ + template Algorithm(std::shared_ptr device, const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector

& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); + + this->mDevice = device; + + if (tensors.size() && spirv.size()) { + KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and " + "spirv size: {}", + tensors.size(), + spirv.size()); + this->rebuild( + tensors, spirv, workgroup, specializationConstants, pushConstants); + } else { + KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or " + "spirv so not rebuilding vulkan components"); + } + } /** * Rebuild function to reconstruct algorithm with configuration parameters @@ -1109,18 +1127,64 @@ class Algorithm * @param spirv The spirv code to use to create the algorithm * @param workgroup (optional) The kp::Workgroup to use for the dispatch * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to + * @param specializationConstants (optional) The std::vector to use to * initialize the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when + * @param pushConstants (optional) The std::vector to use when * initializing the pipeline, which set the size of the push constants - * these can be modified but all new values must have the same vector size * as this initial value. */ + template void rebuild(const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector

& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute Algorithm rebuild started"); + + this->mTensors = tensors; + this->mSpirv = spirv; + + if (specializationConstants.size()) { + if (this->mSpecializationConstantsData) { + free(this->mSpecializationConstantsData); + } + uint32_t memorySize = sizeof(decltype(specializationConstants.back())); + uint32_t size = specializationConstants.size(); + uint32_t totalSize = size * memorySize; + this->mSpecializationConstantsData = malloc(totalSize); + memcpy(this->mSpecializationConstantsData, specializationConstants.data(), totalSize); + this->mSpecializationConstantsDataTypeMemorySize = memorySize; + this->mSpecializationConstantsSize = size; + } + + if (pushConstants.size()) { + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + + this->setWorkgroup(workgroup, + this->mTensors.size() ? this->mTensors[0]->size() : 1); + + // Descriptor pool is created first so if available then destroy all before + // rebuild + if (this->isInit()) { + this->destroy(); + } + + this->createParameters(); + this->createShaderModule(); + this->createPipeline(); + } /** * Destructor for Algorithm which is responsible for freeing and desroying @@ -1175,11 +1239,48 @@ class Algorithm * Sets the push constants to the new value provided to use in the next * bindPush() * - * @param The kp::Constant to use to set the push constants to use in the + * @param pushConstants The templatable vector is to be used to set the push constants to use in the * next bindPush(...) calls. The constants provided must be of the same size * as the ones created during initialization. */ - void setPush(const Constants& pushConstants); + template + void setPushConstants(const std::vector& pushConstants) + { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + + this->setPushConstants(pushConstants.data(), size, memorySize); + } + + /** + * Sets the push constants to the new value provided to use in the next + * bindPush() with the raw memory block location and memory size to be used. + * + * @param data The raw data point to copy the data from, without modifying the pointer. + * @param size The number of data elements provided in the data + * @param memorySize The memory size of each of the data elements in bytes. + */ + void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { + + uint32_t totalSize = memorySize * size; + uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize; + + if (totalSize != previousTotalSize) { + throw std::runtime_error( + fmt::format("Kompute Algorithm push " + "constant total memory size provided is {} but expected {} bytes", + totalSize, + previousTotalSize)); + } + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, data, totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } /** * Gets the current workgroup from the algorithm. @@ -1192,15 +1293,25 @@ class Algorithm /** * Gets the specialization constants of the current algorithm. * - * @returns The kp::Constants currently set for specialization constants + * @returns The std::vector currently set for specialization constants */ - const Constants& getSpecializationConstants(); + template + const std::vector getSpecializationConstants() + { + return { (T*)this->mSpecializationConstantsData, + ((T*)this->mSpecializationConstantsData) + this->mSpecializationConstantsSize }; + } /** * Gets the specialization constants of the current algorithm. * - * @returns The kp::Constants currently set for push constants + * @returns The std::vector currently set for push constants */ - const Constants& getPush(); + template + const std::vector getPushConstants() + { + return { (T*)this->mPushConstantsData, + ((T*)this->mPushConstantsData) + this->mPushConstantsSize }; + } /** * Gets the current tensors that are used in the algorithm. * @@ -1233,8 +1344,12 @@ class Algorithm // -------------- ALWAYS OWNED RESOURCES std::vector mSpirv; - Constants mSpecializationConstants; - Constants mPushConstants; + void* mSpecializationConstantsData = nullptr; + uint32_t mSpecializationConstantsDataTypeMemorySize = 0; + uint32_t mSpecializationConstantsSize = 0; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; Workgroup mWorkgroup; // Create util functions @@ -1575,8 +1690,24 @@ class OpAlgoDispatch : public OpBase * @param algorithm The algorithm object to use for dispatch * @param pushConstants The push constants to use for override */ + template OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants = {}); + const std::vector& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); + + this->mAlgorithm = algorithm; + + if (pushConstants.size()) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + } /** * Default destructor, which is in charge of destroying the algorithm @@ -1613,7 +1744,9 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES std::shared_ptr mAlgorithm; - Constants mPushConstants; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; }; } // End namespace kp @@ -1655,7 +1788,7 @@ class OpMult : public OpAlgoDispatch (uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv + kp::shader_data::shaders_glsl_opmult_comp_spv_len)); - algorithm->rebuild(tensors, spirv); + algorithm->rebuild<>(tensors, spirv); } /** @@ -2082,16 +2215,16 @@ class Manager } /** - * Create a managed algorithm that will be destroyed by this manager - * if it hasn't been destroyed by its reference count going to zero. + * Default non-template function that can be used to create algorithm objects + * which provides default types to the push and spec constants as floats. * * @param tensors (optional) The tensors to initialise the algorithm with * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch * @param workgroup (optional) kp::Workgroup for algorithm to use, and * defaults to (tensor[0].size(), 1, 1) - * @param specializationConstants (optional) kp::Constant to use for + * @param specializationConstants (optional) float vector to use for * specialization constants, and defaults to an empty constant - * @param pushConstants (optional) kp::Constant to use for push constants, + * @param pushConstants (optional) float vector to use for push constants, * and defaults to an empty constant * @returns Shared pointer with initialised algorithm */ @@ -2099,8 +2232,51 @@ class Manager const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector& pushConstants = {}) + { + return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants); + } + + /** + * Create a managed algorithm that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. + * + * @param tensors (optional) The tensors to initialise the algorithm with + * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch + * @param workgroup (optional) kp::Workgroup for algorithm to use, and + * defaults to (tensor[0].size(), 1, 1) + * @param specializationConstants (optional) templatable vector parameter to use for + * specialization constants, and defaults to an empty constant + * @param pushConstants (optional) templatable vector parameter to use for push constants, + * and defaults to an empty constant + * @returns Shared pointer with initialised algorithm + */ + template + std::shared_ptr algorithm( + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const std::vector& specializationConstants, + const std::vector

& pushConstants) + { + + KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); + + std::shared_ptr algorithm{ new kp::Algorithm( + this->mDevice, + tensors, + spirv, + workgroup, + specializationConstants, + pushConstants) }; + + if (this->mManageResources) { + this->mManagedAlgorithms.push_back(algorithm); + } + + return algorithm; + } /** * Destroy the GPU resources and all managed resources by manager. diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 8d510bb9c..6caccf9bb 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -5,30 +5,6 @@ namespace kp { -Algorithm::Algorithm(std::shared_ptr device, - const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants, - const Constants& pushConstants) -{ - KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); - - this->mDevice = device; - - if (tensors.size() && spirv.size()) { - KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and " - "spirv size: {}", - tensors.size(), - spirv.size()); - this->rebuild( - tensors, spirv, workgroup, specializationConstants, pushConstants); - } else { - KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or " - "spirv so not rebuilding vulkan components"); - } -} - Algorithm::~Algorithm() { KP_LOG_DEBUG("Kompute Algorithm Destructor started"); @@ -36,33 +12,6 @@ Algorithm::~Algorithm() this->destroy(); } -void -Algorithm::rebuild(const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants, - const Constants& pushConstants) -{ - KP_LOG_DEBUG("Kompute Algorithm rebuild started"); - - this->mTensors = tensors; - this->mSpirv = spirv; - this->mSpecializationConstants = specializationConstants; - this->mPushConstants = pushConstants; - this->setWorkgroup(workgroup, - this->mTensors.size() ? this->mTensors[0]->size() : 1); - - // Descriptor pool is created first so if available then destroy all before - // rebuild - if (this->isInit()) { - this->destroy(); - } - - this->createParameters(); - this->createShaderModule(); - this->createPipeline(); -} - bool Algorithm::isInit() { @@ -74,6 +23,13 @@ Algorithm::isInit() void Algorithm::destroy() { + // We don't have to free memory on destroy as it's freed by the commandBuffer destructor + // if (this->mPushConstantsData) { + // free(this->mPushConstantsData); + // } + // if (this->mSpecializationConstantsData) { + // free(this->mSpecializationConstantsData); + // } if (!this->mDevice) { KP_LOG_WARN("Kompute Algorithm destroy function reached with null " @@ -279,10 +235,10 @@ Algorithm::createPipeline() this->mDescriptorSetLayout.get()); vk::PushConstantRange pushConstantRange; - if (this->mPushConstants.size()) { + if (this->mPushConstantsSize) { pushConstantRange.setStageFlags(vk::ShaderStageFlagBits::eCompute); pushConstantRange.setOffset(0); - pushConstantRange.setSize(sizeof(float) * this->mPushConstants.size()); + pushConstantRange.setSize(this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize); pipelineLayoutInfo.setPushConstantRangeCount(1); pipelineLayoutInfo.setPPushConstantRanges(&pushConstantRange); @@ -295,11 +251,11 @@ Algorithm::createPipeline() std::vector specializationEntries; - for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) { + for (uint32_t i = 0; i < this->mSpecializationConstantsSize; i++) { vk::SpecializationMapEntry specializationEntry( static_cast(i), - static_cast(sizeof(float) * i), - sizeof(float)); + static_cast(this->mSpecializationConstantsDataTypeMemorySize * i), + this->mSpecializationConstantsDataTypeMemorySize); specializationEntries.push_back(specializationEntry); } @@ -309,8 +265,8 @@ Algorithm::createPipeline() vk::SpecializationInfo specializationInfo( static_cast(specializationEntries.size()), specializationEntries.data(), - sizeof(float) * this->mSpecializationConstants.size(), - this->mSpecializationConstants.data()); + this->mSpecializationConstantsDataTypeMemorySize * this->mSpecializationConstantsSize, + this->mSpecializationConstantsData); vk::PipelineShaderStageCreateInfo shaderStage( vk::PipelineShaderStageCreateFlags(), @@ -381,15 +337,15 @@ Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer) void Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) { - if (this->mPushConstants.size()) { - KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", - this->mPushConstants.size()); + if (this->mPushConstantsSize) { + KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}", + this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize); commandBuffer.pushConstants(*this->mPipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, - this->mPushConstants.size() * sizeof(float), - this->mPushConstants.data()); + this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize, + this->mPushConstantsData); } } @@ -426,39 +382,12 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) this->mWorkgroup[2]); } -void -Algorithm::setPush(const Constants& pushConstants) -{ - - if (pushConstants.size() != this->mPushConstants.size()) { - throw std::runtime_error( - fmt::format("Kompute Algorithm push " - "constant provided is size {} but expected size {}", - pushConstants.size(), - this->mPushConstants.size())); - } - - this->mPushConstants = pushConstants; -} - const Workgroup& Algorithm::getWorkgroup() { return this->mWorkgroup; } -const Constants& -Algorithm::getSpecializationConstants() -{ - return this->mSpecializationConstants; -} - -const Constants& -Algorithm::getPush() -{ - return this->mPushConstants; -} - const std::vector>& Algorithm::getTensors() { diff --git a/src/Manager.cpp b/src/Manager.cpp index 80f308983..a553d667f 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -422,31 +422,6 @@ Manager::createDevice(const std::vector& familyQueueIndices, KP_LOG_DEBUG("Kompute Manager compute queue obtained"); } -std::shared_ptr -Manager::algorithm(const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants, - const Constants& pushConstants) -{ - - KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); - - std::shared_ptr algorithm{ new kp::Algorithm( - this->mDevice, - tensors, - spirv, - workgroup, - specializationConstants, - pushConstants) }; - - if (this->mManageResources) { - this->mManagedAlgorithms.push_back(algorithm); - } - - return algorithm; -} - std::shared_ptr Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps) { diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index 0fd323b7d..88d6e55fb 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -5,18 +5,14 @@ namespace kp { -OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants) -{ - KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); - - this->mAlgorithm = algorithm; - this->mPushConstants = pushConstants; -} - OpAlgoDispatch::~OpAlgoDispatch() { KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started"); + + if (this->mPushConstantsData) { + KP_LOG_DEBUG("Kompute freeing push constants data"); + free(this->mPushConstantsData); + } } void @@ -35,8 +31,11 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) vk::PipelineStageFlagBits::eComputeShader); } - if (this->mPushConstants.size()) { - this->mAlgorithm->setPush(this->mPushConstants); + if (this->mPushConstantsSize) { + this->mAlgorithm->setPushConstants( + this->mPushConstantsData, + this->mPushConstantsSize, + this->mPushConstantsDataTypeMemorySize); } this->mAlgorithm->recordBindCore(commandBuffer); diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 2ec2797a8..be17a2d09 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -24,19 +24,37 @@ class Algorithm * @param spirv (optional) The spirv code to use to create the algorithm * @param workgroup (optional) The kp::Workgroup to use for the dispatch * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to + * @param specializationConstants (optional) The templatable param is to be used to * initialize the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when + * @param pushConstants (optional) This templatable param is to be used when * initializing the pipeline, which set the size of the push constants - - * these can be modified but all new values must have the same vector size - * as this initial value. + * these can be modified but all new values must have the same data type and length + * as otherwise it will result in errors. */ + template Algorithm(std::shared_ptr device, const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector

& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); + + this->mDevice = device; + + if (tensors.size() && spirv.size()) { + KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and " + "spirv size: {}", + tensors.size(), + spirv.size()); + this->rebuild( + tensors, spirv, workgroup, specializationConstants, pushConstants); + } else { + KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or " + "spirv so not rebuilding vulkan components"); + } + } /** * Rebuild function to reconstruct algorithm with configuration parameters @@ -46,18 +64,64 @@ class Algorithm * @param spirv The spirv code to use to create the algorithm * @param workgroup (optional) The kp::Workgroup to use for the dispatch * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to + * @param specializationConstants (optional) The std::vector to use to * initialize the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when + * @param pushConstants (optional) The std::vector to use when * initializing the pipeline, which set the size of the push constants - * these can be modified but all new values must have the same vector size * as this initial value. */ + template void rebuild(const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector

& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute Algorithm rebuild started"); + + this->mTensors = tensors; + this->mSpirv = spirv; + + if (specializationConstants.size()) { + if (this->mSpecializationConstantsData) { + free(this->mSpecializationConstantsData); + } + uint32_t memorySize = sizeof(decltype(specializationConstants.back())); + uint32_t size = specializationConstants.size(); + uint32_t totalSize = size * memorySize; + this->mSpecializationConstantsData = malloc(totalSize); + memcpy(this->mSpecializationConstantsData, specializationConstants.data(), totalSize); + this->mSpecializationConstantsDataTypeMemorySize = memorySize; + this->mSpecializationConstantsSize = size; + } + + if (pushConstants.size()) { + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + + this->setWorkgroup(workgroup, + this->mTensors.size() ? this->mTensors[0]->size() : 1); + + // Descriptor pool is created first so if available then destroy all before + // rebuild + if (this->isInit()) { + this->destroy(); + } + + this->createParameters(); + this->createShaderModule(); + this->createPipeline(); + } /** * Destructor for Algorithm which is responsible for freeing and desroying @@ -112,11 +176,48 @@ class Algorithm * Sets the push constants to the new value provided to use in the next * bindPush() * - * @param The kp::Constant to use to set the push constants to use in the + * @param pushConstants The templatable vector is to be used to set the push constants to use in the * next bindPush(...) calls. The constants provided must be of the same size * as the ones created during initialization. */ - void setPush(const Constants& pushConstants); + template + void setPushConstants(const std::vector& pushConstants) + { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + + this->setPushConstants(pushConstants.data(), size, memorySize); + } + + /** + * Sets the push constants to the new value provided to use in the next + * bindPush() with the raw memory block location and memory size to be used. + * + * @param data The raw data point to copy the data from, without modifying the pointer. + * @param size The number of data elements provided in the data + * @param memorySize The memory size of each of the data elements in bytes. + */ + void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { + + uint32_t totalSize = memorySize * size; + uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize; + + if (totalSize != previousTotalSize) { + throw std::runtime_error( + fmt::format("Kompute Algorithm push " + "constant total memory size provided is {} but expected {} bytes", + totalSize, + previousTotalSize)); + } + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, data, totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } /** * Gets the current workgroup from the algorithm. @@ -129,15 +230,25 @@ class Algorithm /** * Gets the specialization constants of the current algorithm. * - * @returns The kp::Constants currently set for specialization constants + * @returns The std::vector currently set for specialization constants */ - const Constants& getSpecializationConstants(); + template + const std::vector getSpecializationConstants() + { + return { (T*)this->mSpecializationConstantsData, + ((T*)this->mSpecializationConstantsData) + this->mSpecializationConstantsSize }; + } /** * Gets the specialization constants of the current algorithm. * - * @returns The kp::Constants currently set for push constants + * @returns The std::vector currently set for push constants */ - const Constants& getPush(); + template + const std::vector getPushConstants() + { + return { (T*)this->mPushConstantsData, + ((T*)this->mPushConstantsData) + this->mPushConstantsSize }; + } /** * Gets the current tensors that are used in the algorithm. * @@ -170,8 +281,12 @@ class Algorithm // -------------- ALWAYS OWNED RESOURCES std::vector mSpirv; - Constants mSpecializationConstants; - Constants mPushConstants; + void* mSpecializationConstantsData = nullptr; + uint32_t mSpecializationConstantsDataTypeMemorySize = 0; + uint32_t mSpecializationConstantsSize = 0; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; Workgroup mWorkgroup; // Create util functions diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 62f98d6d5..d9b850efd 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -124,16 +124,16 @@ class Manager } /** - * Create a managed algorithm that will be destroyed by this manager - * if it hasn't been destroyed by its reference count going to zero. + * Default non-template function that can be used to create algorithm objects + * which provides default types to the push and spec constants as floats. * * @param tensors (optional) The tensors to initialise the algorithm with * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch * @param workgroup (optional) kp::Workgroup for algorithm to use, and * defaults to (tensor[0].size(), 1, 1) - * @param specializationConstants (optional) kp::Constant to use for + * @param specializationConstants (optional) float vector to use for * specialization constants, and defaults to an empty constant - * @param pushConstants (optional) kp::Constant to use for push constants, + * @param pushConstants (optional) float vector to use for push constants, * and defaults to an empty constant * @returns Shared pointer with initialised algorithm */ @@ -141,8 +141,51 @@ class Manager const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector& pushConstants = {}) + { + return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants); + } + + /** + * Create a managed algorithm that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. + * + * @param tensors (optional) The tensors to initialise the algorithm with + * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch + * @param workgroup (optional) kp::Workgroup for algorithm to use, and + * defaults to (tensor[0].size(), 1, 1) + * @param specializationConstants (optional) templatable vector parameter to use for + * specialization constants, and defaults to an empty constant + * @param pushConstants (optional) templatable vector parameter to use for push constants, + * and defaults to an empty constant + * @returns Shared pointer with initialised algorithm + */ + template + std::shared_ptr algorithm( + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const std::vector& specializationConstants, + const std::vector

& pushConstants) + { + + KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); + + std::shared_ptr algorithm{ new kp::Algorithm( + this->mDevice, + tensors, + spirv, + workgroup, + specializationConstants, + pushConstants) }; + + if (this->mManageResources) { + this->mManagedAlgorithms.push_back(algorithm); + } + + return algorithm; + } /** * Destroy the GPU resources and all managed resources by manager. diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp index 600b6116c..48acd6014 100644 --- a/src/include/kompute/operations/OpAlgoDispatch.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -25,8 +25,24 @@ class OpAlgoDispatch : public OpBase * @param algorithm The algorithm object to use for dispatch * @param pushConstants The push constants to use for override */ + template OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants = {}); + const std::vector& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); + + this->mAlgorithm = algorithm; + + if (pushConstants.size()) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + } /** * Default destructor, which is in charge of destroying the algorithm @@ -63,7 +79,9 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES std::shared_ptr mAlgorithm; - Constants mPushConstants; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index 97b29cad9..2d6b88057 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -45,7 +45,7 @@ class OpMult : public OpAlgoDispatch (uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv + kp::shader_data::shaders_glsl_opmult_comp_spv_len)); - algorithm->rebuild(tensors, spirv); + algorithm->rebuild<>(tensors, spirv); } /** diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index c1818ec27..9b736213f 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -48,7 +48,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) test_shaders_glsl_test_logistic_regression_comp_spv_len)); std::shared_ptr algorithm = mgr.algorithm( - params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); + params, spirv, kp::Workgroup({ 5 }), std::vector({ 5.0 })); std::shared_ptr sq = mgr.sequence() @@ -127,7 +127,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) shaders_glsl_logisticregression_comp_spv_len)); std::shared_ptr algorithm = - mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({ 5.0 })); + mgr.algorithm(params, spirv, kp::Workgroup(), std::vector({ 5.0 })); std::shared_ptr sq = mgr.sequence() diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 7f63c208f..40d190c62 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -49,9 +49,9 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) }; kp::Workgroup workgroup({ 3, 1, 1 }); - kp::Constants specConsts({ 2 }); - kp::Constants pushConstsA({ 2.0 }); - kp::Constants pushConstsB({ 3.0 }); + std::vector specConsts({ 2 }); + std::vector pushConstsA({ 2.0 }); + std::vector pushConstsB({ 3.0 }); auto algorithm = mgr.algorithm(params, compileSource(shader), @@ -220,7 +220,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } -TEST(TestAlgoUtils, TestAlgorithmUtilFunctions) +TEST(TestMultipleAlgoExecutions, TestAlgorithmUtilFunctions) { kp::Manager mgr; @@ -263,8 +263,8 @@ TEST(TestAlgoUtils, TestAlgorithmUtilFunctions) }; kp::Workgroup workgroup({ 3, 1, 1 }); - kp::Constants specConsts({ 2 }); - kp::Constants pushConsts({ 2.0 }); + std::vector specConsts({ 2 }); + std::vector pushConsts({ 2.0 }); auto algorithm = mgr.algorithm(params, compileSource(shader), @@ -273,6 +273,6 @@ TEST(TestAlgoUtils, TestAlgorithmUtilFunctions) pushConsts); EXPECT_EQ(algorithm->getWorkgroup(), workgroup); - EXPECT_EQ(algorithm->getPush(), pushConsts); - EXPECT_EQ(algorithm->getSpecializationConstants(), specConsts); + EXPECT_EQ(algorithm->getPushConstants(), pushConsts); + EXPECT_EQ(algorithm->getSpecializationConstants(), specConsts); } diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index 83b3d3d83..4742cd187 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -44,11 +44,11 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride) // We need to run this in sequence to avoid race condition // We can't use atomicAdd as swiftshader doesn't support it for // float - sq->eval(algo, kp::Constants{ 0.1, 0.2, 0.3 }); - sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); + sq->eval(algo, std::vector{ 0.1, 0.2, 0.3 }); + sq->eval(algo, std::vector{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); - EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->vector(), std::vector({ 0.4, 0.4, 0.4 })); } } } @@ -90,10 +90,10 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride) // We can't use atomicAdd as swiftshader doesn't support it for // float sq->eval(algo); - sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); + sq->eval(algo, std::vector{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); - EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->vector(), std::vector({ 0.4, 0.4, 0.4 })); } } } @@ -132,8 +132,235 @@ TEST(TestPushConstants, TestConstantsWrongSize) sq = mgr.sequence()->record({ tensor }); EXPECT_THROW(sq->record( - algo, kp::Constants{ 0.1, 0.2, 0.3 }), + algo, std::vector{ 0.1, 0.2, 0.3 }), std::runtime_error); } } } + +// TODO: Ensure different types are considered for push constants +// TEST(TestPushConstants, TestConstantsWrongType) +// { +// { +// std::string shader(R"( +// #version 450 +// layout(push_constant) uniform PushConstants { +// float x; +// float y; +// float z; +// } pcs; +// layout (local_size_x = 1) in; +// layout(set = 0, binding = 0) buffer a { float pa[]; }; +// void main() { +// pa[0] += pcs.x; +// pa[1] += pcs.y; +// pa[2] += pcs.z; +// })"); +// +// std::vector spirv = compileSource(shader); +// +// std::shared_ptr sq = nullptr; +// +// { +// kp::Manager mgr; +// +// std::shared_ptr> tensor = +// mgr.tensor({ 0, 0, 0 }); +// +// std::shared_ptr algo = mgr.algorithm( +// { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 }); +// +// sq = mgr.sequence()->record({ tensor }); +// +// EXPECT_THROW(sq->record( +// algo, std::vector{ 1, 2, 3 }), +// std::runtime_error); +// } +// } +// } + +TEST(TestPushConstants, TestConstantsMixedTypes) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + float x; + uint y; + int z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { float pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y - 2147483000; + pa[2] += pcs.z; + })"); + + struct TestConsts{ + float x; + uint32_t y; + int32_t z; + }; + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 15.32, 2147483650, 10 }}); + sq->eval(algo, std::vector{{ 30.32, 2147483650, -3 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 45.64, 1300, 7 })); + } + } +} + +TEST(TestPushConstants, TestConstantsInt) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + int x; + int y; + int z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { int pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ -1, -1, -1 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ -1, -1, -1 }}); + sq->eval(algo, std::vector{{ -1, -1, -1 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ -3, -3, -3 })); + } + } +} + +TEST(TestPushConstants, TestConstantsUnsignedInt) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + uint x; + uint y; + uint z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { uint pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 2147483650, 2147483650, 2147483650 }}); + sq->eval(algo, std::vector{{ 5, 5, 5 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 2147483655, 2147483655, 2147483655 })); + } + } +} + +TEST(TestPushConstants, TestConstantsDouble) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + double x; + double y; + double z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { double pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }}); + sq->eval(algo, std::vector{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 2.2222444466668888, 4.2222444466668888, 6.2222444466668888 })); + } + } +} diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index 15da143a0..abc507e7e 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -37,7 +37,7 @@ TEST(TestSpecializationConstants, TestTwoConstants) std::vector> params = { tensorA, tensorB }; - kp::Constants spec = kp::Constants({ 5.0, 0.3 }); + std::vector spec = std::vector({ 5.0, 0.3 }); std::shared_ptr algo = mgr.algorithm(params, spirv, {}, spec); @@ -53,3 +53,52 @@ TEST(TestSpecializationConstants, TestTwoConstants) } } } + +TEST(TestSpecializationConstants, TestConstantsInt) +{ + { + std::string shader(R"( + #version 450 + layout (constant_id = 0) const int cOne = 1; + layout (constant_id = 1) const int cTwo = 1; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { int pa[]; }; + layout(set = 0, binding = 1) buffer b { int pb[]; }; + void main() { + uint index = gl_GlobalInvocationID.x; + pa[index] = cOne; + pb[index] = cTwo; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensorA = + mgr.tensorT({ 0, 0, 0 }); + std::shared_ptr> tensorB = + mgr.tensorT({ 0, 0, 0 }); + + std::vector> params = { tensorA, + tensorB }; + + std::vector spec({ -1, -2 }); + + std::shared_ptr algo = + mgr.algorithm(params, spirv, {}, spec, {}); + + sq = mgr.sequence() + ->record(params) + ->record(algo) + ->record(params) + ->eval(); + + EXPECT_EQ(tensorA->vector(), std::vector({ -1, -1, -1 })); + EXPECT_EQ(tensorB->vector(), std::vector({ -2, -2, -2 })); + } + } +} +