Merge pull request #242 from KomputeProject/multi_types_consts
Adding support for different types for spec and push consts
This commit is contained in:
commit
b7643a1b53
22 changed files with 1007 additions and 225 deletions
8
Makefile
8
Makefile
|
|
@ -12,8 +12,8 @@ VERSION := $(shell cat ./VERSION)
|
|||
VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake"
|
||||
VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake"
|
||||
|
||||
# Regext to pass to catch2 to filter tests
|
||||
FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps"
|
||||
# These are the tests that don't work with swiftshader but can be run directly with vulkan
|
||||
FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants.TestConstantsDouble"
|
||||
|
||||
ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10...
|
||||
CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe"
|
||||
|
|
@ -105,7 +105,7 @@ mk_run_tests_cpu: mk_build_swiftshader_library mk_build_tests mk_run_tests_cpu_o
|
|||
VS_BUILD_TYPE ?= "Debug"
|
||||
# Run with multiprocessin / parallel build by default
|
||||
VS_CMAKE_EXTRA_FLAGS ?= ""
|
||||
VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MP" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest
|
||||
VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MT" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest
|
||||
VS_INSTALL_PATH ?= "build/src/CMakeFiles/Export/" # Set to "" if prefer default
|
||||
|
||||
vs_cmake:
|
||||
|
|
@ -116,7 +116,7 @@ vs_cmake:
|
|||
-DKOMPUTE_EXTRA_CXX_FLAGS=$(VS_KOMPUTE_EXTRA_CXX_FLAGS) \
|
||||
-DCMAKE_INSTALL_PREFIX=$(VS_INSTALL_PATH) \
|
||||
-DKOMPUTE_OPT_INSTALL=1 \
|
||||
-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=0 \
|
||||
-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \
|
||||
-DKOMPUTE_OPT_BUILD_TESTS=1 \
|
||||
-DKOMPUTE_OPT_BUILD_SHADERS=1 \
|
||||
-DKOMPUTE_OPT_BUILD_SINGLE_HEADER=1 \
|
||||
|
|
|
|||
|
|
@ -89,9 +89,9 @@ void kompute(const std::string& shader) {
|
|||
|
||||
// 3. Create algorithm based on shader (supports buffers & push/spec constants)
|
||||
kp::Workgroup workgroup({3, 1, 1});
|
||||
kp::Constants specConsts({ 2 });
|
||||
kp::Constants pushConstsA({ 2.0 });
|
||||
kp::Constants pushConstsB({ 3.0 });
|
||||
std::vector<float> specConsts({ 2 });
|
||||
std::vector<float> pushConstsA({ 2.0 });
|
||||
std::vector<float> pushConstsB({ 3.0 });
|
||||
|
||||
auto algorithm = mgr.algorithm(params,
|
||||
// See documentation shader section for compileSource
|
||||
|
|
|
|||
|
|
@ -38,9 +38,10 @@ Documentation Index (as per sidebar)
|
|||
|
||||
.. toctree::
|
||||
:titlesonly:
|
||||
:caption: Concepts & Deep Dives:
|
||||
:caption: Advanced Concepts & Deep Dives:
|
||||
|
||||
CI, Docker Images Docs & Tests <overview/ci-tests.rst>
|
||||
CI, Docker Images Docs & Tests <overview/ci-tests>
|
||||
Variable Types for Tensors, and Push/Spec Constants <overview/variable-types>
|
||||
Asynchronous & Parallel Operations <overview/async-parallel>
|
||||
Mobile App Integration (Android) <overview/mobile-android>
|
||||
Game Engine Integration (Godot Engine) <overview/game-engine-godot>
|
||||
|
|
|
|||
|
|
@ -71,13 +71,13 @@ The example below shows how you can enable the "VK_EXT_shader_atomic_float" exte
|
|||
sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ tensor })
|
||||
->record<kp::OpAlgoDispatch>(algo,
|
||||
kp::Constants{ 0.1, 0.2, 0.3 })
|
||||
std::vector<float>{ 0.1, 0.2, 0.3 })
|
||||
->record<kp::OpAlgoDispatch>(algo,
|
||||
kp::Constants{ 0.3, 0.2, 0.1 })
|
||||
std::vector<float>{ 0.3, 0.2, 0.1 })
|
||||
->record<kp::OpTensorSyncLocal>({ tensor })
|
||||
->eval();
|
||||
|
||||
EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
|
||||
EXPECT_EQ(tensor->data(), std::vector<float>({ 0.4, 0.4, 0.4 }));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
92
docs/overview/variable-types.rst
Normal file
92
docs/overview/variable-types.rst
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
|
||||
Variable Types for Tensors and Constants
|
||||
=============
|
||||
|
||||
By default the initial interfaces you may interact with, will be primarily using float values by default, which is enough to get through the basic conceptual examples. However as real world applications are being developed, more specialized types may be required for kp::Tensor, as well as for SpecializationConstants and PushConstants.
|
||||
|
||||
Before diving into the practical classes and interfaces that can be used to take advantage of the variable type support of Kompute, we want to provide some high level intution on what each of these components are.
|
||||
|
||||
Variable Tensor Types
|
||||
------
|
||||
|
||||
For the kp::Tensor class, Kompute provides under the hood an interface to have more seamless interaction with multiple different underlying data types. This is done through the introduction of the class kp::TensorT<type> and parent class kp::Tensor, however you as a developer you will be primarily interacting with the top level kp::Tensor class, as this is what is provided through the high level kp::Manager class.
|
||||
|
||||
The kp::Tensor class does provide an "integrated" experience, which allows users to "seamlessly" retrieve the underlying data through the `data()` and `vector()` functions. This is done by leveraging C++ templates, as well as limiting the types that can be used, which are namely:
|
||||
|
||||
* float
|
||||
* uint32
|
||||
* int32
|
||||
* double
|
||||
* bool
|
||||
|
||||
Any other data type provided would result in an error, and for the time being Kompute will focus on primarily provide support for these classes.
|
||||
|
||||
The tests under `TestTensor.cpp` and `test_tensor_types.py` provide an overview of how users can take advantage of these features using std::vector for C++ and numpy array for Python.
|
||||
|
||||
C++ Tensor Types Usage
|
||||
^^^^^^^
|
||||
|
||||
Below you can see how it is possible to define different types in C++.
|
||||
|
||||
.. literalinclude:: ../../test/TestTensor.cpp
|
||||
:language: cpp
|
||||
:lines: 21-
|
||||
|
||||
Python Tensor Types Usage
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: ../../python/test/test_tensor_types.py
|
||||
:language: python
|
||||
:lines: 26-46
|
||||
|
||||
Variable Push Constants
|
||||
----
|
||||
|
||||
Push constants are a relatively non-expensive way to provide dynamic data to a GPU Algorithm (shader) as further CPU compute is performed. Although Push Constants are a more efficient way to provide data, it is also a limited manner as there is a memory limit for push constants.
|
||||
|
||||
Push constants with Kompute are flexible as it is possible to pass user-defined structs in C++. In Python it is limited to providing numpy arrays with multiple elements of the same type.
|
||||
|
||||
C++ Push Consts Types Usage
|
||||
^^^^^^^
|
||||
|
||||
As mentioned above, this test under `TestPushConstants.cpp` shows how it is possible to use user-defined structs for multiple elements from different types, which is not possible for specialized constants or tensors.
|
||||
|
||||
These are defined in the `algorithm` function of the `kp::Manager`, and once it push constant is set, all other push constants provided have to consist of the same types and element size.
|
||||
|
||||
More specifically, when passing a custom struct it is possible to pass a single element, or alternatively passing multiple scalar values as part of the vector, and access them as outlined in the rest of the tests.
|
||||
|
||||
.. literalinclude:: ../../test/TestPushConstant.cpp
|
||||
:language: cpp
|
||||
:lines: 182-231
|
||||
|
||||
|
||||
Python Push Consts Types Usage
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
In python the push constants are limited to a single list of elements of the same type. These are provided by passing a numpy array to the `algorithm` function or the `kp::OpAlgoDispatch` operation.
|
||||
|
||||
.. literalinclude:: ../../python/test/test_tensor_types.py
|
||||
:language: python
|
||||
:lines: 207-242
|
||||
|
||||
Variable Specialization Constants
|
||||
------
|
||||
|
||||
Specialization constants are analogous to push constants, but these are not dynamic, can only be set on initialization or rebuild of `kp::Algorithm` and cannot be changed unless a `rebuild` is carried out.
|
||||
|
||||
The usage of specailization constants is very similar to the push constants, but the only limitation are:
|
||||
|
||||
* These are defined using the constant_id in the glsl shader
|
||||
* Spec constants do not support complex types (i.e. user defined struct)
|
||||
* Kompute supports an array of elements of same type for specialization constants
|
||||
|
||||
C++ Push Consts Types Usage
|
||||
^^^^
|
||||
|
||||
The specialization constant example shows how it is possible to define as a std::vector.
|
||||
|
||||
.. literalinclude:: ../../test/TestSpecializationConstant.cpp
|
||||
:language: cpp
|
||||
:lines: 57-
|
||||
|
||||
|
||||
|
|
@ -47,7 +47,7 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
|
|||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
|
||||
mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ int main()
|
|||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
||||
params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
|
||||
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
|
|
|
|||
|
|
@ -36,9 +36,9 @@ tensors (optional) The tensors to use to create the descriptor
|
|||
resources @param spirv (optional) The spirv code to use to create the
|
||||
algorithm @param workgroup (optional) The kp::Workgroup to use for the
|
||||
dispatch which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if
|
||||
not set. @param specializationConstants (optional) The kp::Constants
|
||||
not set. @param specializationConstants (optional) The std::vector<float>
|
||||
to use to initialize the specialization constants which cannot be
|
||||
changed once set. @param pushConstants (optional) The kp::Constants to
|
||||
changed once set. @param pushConstants (optional) The std::vector<float> to
|
||||
use when initializing the pipeline, which set the size of the push
|
||||
constants - these can be modified but all new values must have the
|
||||
same vector size as this initial value.)doc";
|
||||
|
|
@ -54,12 +54,12 @@ static const char *__doc_kp_Algorithm_destroy = R"doc()doc";
|
|||
static const char *__doc_kp_Algorithm_getPush =
|
||||
R"doc(Gets the specialization constants of the current algorithm.
|
||||
|
||||
@returns The kp::Constants currently set for push constants)doc";
|
||||
@returns The std::vector<float> currently set for push constants)doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_getSpecializationConstants =
|
||||
R"doc(Gets the specialization constants of the current algorithm.
|
||||
|
||||
@returns The kp::Constants currently set for specialization constants)doc";
|
||||
@returns The std::vector<float> currently set for specialization constants)doc";
|
||||
|
||||
static const char *__doc_kp_Algorithm_getTensors =
|
||||
R"doc(Gets the current tensors that are used in the algorithm.
|
||||
|
|
@ -127,9 +127,9 @@ parameters to create the underlying resources.
|
|||
@param spirv The spirv code to use to create the algorithm @param
|
||||
workgroup (optional) The kp::Workgroup to use for the dispatch which
|
||||
defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. @param
|
||||
specializationConstants (optional) The kp::Constants to use to
|
||||
specializationConstants (optional) The std::vector<float> to use to
|
||||
initialize the specialization constants which cannot be changed once
|
||||
set. @param pushConstants (optional) The kp::Constants to use when
|
||||
set. @param pushConstants (optional) The std::vector<float> to use when
|
||||
initializing the pipeline, which set the size of the push constants -
|
||||
these can be modified but all new values must have the same vector
|
||||
size as this initial value.)doc";
|
||||
|
|
|
|||
|
|
@ -14,6 +14,31 @@ namespace py = pybind11;
|
|||
//used in Core.hpp
|
||||
py::object kp_debug, kp_info, kp_warning, kp_error;
|
||||
|
||||
std::unique_ptr<kp::OpAlgoDispatch> opAlgoDispatchPyInit(
|
||||
std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const py::array& push_consts) {
|
||||
const py::buffer_info info = push_consts.request();
|
||||
KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with push_consts size {} dtype {}",
|
||||
push_consts.size(), std::string(py::str(push_consts.dtype())));
|
||||
|
||||
|
||||
if (push_consts.dtype() == py::dtype::of<std::float_t>()) {
|
||||
std::vector<float> dataVec((float*)info.ptr, ((float*)info.ptr) + info.size);
|
||||
return std::unique_ptr<kp::OpAlgoDispatch>{new kp::OpAlgoDispatch(algorithm, dataVec)};
|
||||
} else if (push_consts.dtype() == py::dtype::of<std::uint32_t>()) {
|
||||
std::vector<uint32_t> dataVec((uint32_t*)info.ptr, ((uint32_t*)info.ptr) + info.size);
|
||||
return std::unique_ptr<kp::OpAlgoDispatch>{new kp::OpAlgoDispatch(algorithm, dataVec)};
|
||||
} else if (push_consts.dtype() == py::dtype::of<std::int32_t>()) {
|
||||
std::vector<int32_t> dataVec((int32_t*)info.ptr, ((int32_t*)info.ptr) + info.size);
|
||||
return std::unique_ptr<kp::OpAlgoDispatch>{new kp::OpAlgoDispatch(algorithm, dataVec)};
|
||||
} else if (push_consts.dtype() == py::dtype::of<std::double_t>()) {
|
||||
std::vector<double> dataVec((double*)info.ptr, ((double*)info.ptr) + info.size);
|
||||
return std::unique_ptr<kp::OpAlgoDispatch>{new kp::OpAlgoDispatch(algorithm, dataVec)};
|
||||
} else {
|
||||
throw std::runtime_error("Kompute Python no valid dtype supported");
|
||||
}
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(kp, m) {
|
||||
|
||||
// The logging modules are used in the Kompute.hpp file
|
||||
|
|
@ -49,9 +74,12 @@ PYBIND11_MODULE(kp, m) {
|
|||
|
||||
py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(
|
||||
m, "OpAlgoDispatch", py::base<kp::OpBase>(), DOC(kp, OpAlgoDispatch))
|
||||
.def(py::init<const std::shared_ptr<kp::Algorithm>&,const kp::Constants&>(),
|
||||
.def(py::init<const std::shared_ptr<kp::Algorithm>&,const std::vector<float>&>(),
|
||||
DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
|
||||
py::arg("algorithm"), py::arg("push_consts") = kp::Constants());
|
||||
py::arg("algorithm"), py::arg("push_consts") = std::vector<float>())
|
||||
.def(py::init(&opAlgoDispatchPyInit),
|
||||
DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
|
||||
py::arg("algorithm"), py::arg("push_consts"));
|
||||
|
||||
py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(
|
||||
m, "OpMult", py::base<kp::OpBase>(), DOC(kp, OpMult))
|
||||
|
|
@ -61,7 +89,6 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm", DOC(kp, Algorithm, Algorithm))
|
||||
.def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors))
|
||||
.def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy))
|
||||
.def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants))
|
||||
.def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit));
|
||||
|
||||
py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
|
||||
|
|
@ -179,8 +206,8 @@ PYBIND11_MODULE(kp, m) {
|
|||
const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
|
||||
const py::bytes& spirv,
|
||||
const kp::Workgroup& workgroup,
|
||||
const kp::Constants& spec_consts,
|
||||
const kp::Constants& push_consts) {
|
||||
const std::vector<float>& spec_consts,
|
||||
const std::vector<float>& push_consts) {
|
||||
py::buffer_info info(py::buffer(spirv).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
|
|
@ -191,8 +218,100 @@ PYBIND11_MODULE(kp, m) {
|
|||
py::arg("tensors"),
|
||||
py::arg("spirv"),
|
||||
py::arg("workgroup") = kp::Workgroup(),
|
||||
py::arg("spec_consts") = kp::Constants(),
|
||||
py::arg("push_consts") = kp::Constants())
|
||||
py::arg("spec_consts") = std::vector<float>(),
|
||||
py::arg("push_consts") = std::vector<float>())
|
||||
.def("algorithm", [np](kp::Manager& self,
|
||||
const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
|
||||
const py::bytes& spirv,
|
||||
const kp::Workgroup& workgroup,
|
||||
const py::array& spec_consts,
|
||||
const py::array& push_consts) {
|
||||
|
||||
py::buffer_info info(py::buffer(spirv).request());
|
||||
const char *data = reinterpret_cast<const char *>(info.ptr);
|
||||
size_t length = static_cast<size_t>(info.size);
|
||||
std::vector<uint32_t> spirvVec((uint32_t*)data, (uint32_t*)(data + length));
|
||||
|
||||
const py::buffer_info pushInfo = push_consts.request();
|
||||
const py::buffer_info specInfo = spec_consts.request();
|
||||
|
||||
KP_LOG_DEBUG("Kompute Python Manager creating Algorithm_T with "
|
||||
"push consts data size {} dtype {} and spec const data size {} dtype {}",
|
||||
push_consts.size(), std::string(py::str(push_consts.dtype())),
|
||||
spec_consts.size(), std::string(py::str(spec_consts.dtype())));
|
||||
|
||||
// We have to iterate across a combination of parameters due to the lack of support for templating
|
||||
if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
|
||||
std::vector<float> specConstsVec((float*)specInfo.ptr, ((float*)specInfo.ptr) + specInfo.size);
|
||||
if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
|
||||
std::vector<float> pushConstsVec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
|
||||
std::vector<int32_t> pushConstsVec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
|
||||
std::vector<uint32_t> pushConstsVec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
|
||||
std::vector<double> pushConstsVec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec);
|
||||
}
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
|
||||
std::vector<int32_t> specconstsvec((int32_t*)specInfo.ptr, ((int32_t*)specInfo.ptr) + specInfo.size);
|
||||
if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
|
||||
std::vector<float> pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
|
||||
std::vector<int32_t> pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
|
||||
std::vector<uint32_t> pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
|
||||
std::vector<double> pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
}
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
|
||||
std::vector<uint32_t> specconstsvec((uint32_t*)specInfo.ptr, ((uint32_t*)specInfo.ptr) + specInfo.size);
|
||||
if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
|
||||
std::vector<float> pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
|
||||
std::vector<int32_t> pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
|
||||
std::vector<uint32_t> pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
|
||||
std::vector<double> pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
}
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
|
||||
std::vector<double> specconstsvec((double*)specInfo.ptr, ((double*)specInfo.ptr) + specInfo.size);
|
||||
if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
|
||||
std::vector<float> pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
|
||||
std::vector<float> pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
|
||||
std::vector<float> pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
} else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
|
||||
std::vector<float> pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size);
|
||||
return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
|
||||
}
|
||||
} else {
|
||||
// If reach then no valid dtype supported
|
||||
throw std::runtime_error("Kompute Python no valid dtype supported");
|
||||
}
|
||||
},
|
||||
DOC(kp, Manager, algorithm),
|
||||
py::arg("tensors"),
|
||||
py::arg("spirv"),
|
||||
py::arg("workgroup") = kp::Workgroup(),
|
||||
py::arg("spec_consts") = std::vector<float>(),
|
||||
py::arg("push_consts") = std::vector<float>())
|
||||
.def("list_devices", [](kp::Manager& self){
|
||||
const std::vector<vk::PhysicalDevice> devices = self.listDevices();
|
||||
py::list list;
|
||||
|
|
|
|||
|
|
@ -197,10 +197,49 @@ def test_pushconsts():
|
|||
.record(kp.OpTensorSyncDevice([tensor]))
|
||||
.record(kp.OpAlgoDispatch(algo))
|
||||
.record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1]))
|
||||
.record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1]))
|
||||
.record(kp.OpTensorSyncLocal([tensor]))
|
||||
.eval())
|
||||
|
||||
assert np.all(tensor.data() == np.array([0.4, 0.4, 0.4], dtype=np.float32))
|
||||
assert np.allclose(tensor.data(), np.array([0.7, 0.6, 0.5], dtype=np.float32))
|
||||
|
||||
|
||||
def test_pushconsts_int():
|
||||
|
||||
spirv = compile_source("""
|
||||
#version 450
|
||||
layout(push_constant) uniform PushConstants {
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
} pcs;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { int pa[]; };
|
||||
void main() {
|
||||
pa[0] += pcs.x;
|
||||
pa[1] += pcs.y;
|
||||
pa[2] += pcs.z;
|
||||
}
|
||||
""")
|
||||
|
||||
mgr = kp.Manager()
|
||||
|
||||
tensor = mgr.tensor_t(np.array([0, 0, 0], dtype=np.int32))
|
||||
|
||||
spec_consts = np.array([], dtype=np.int32)
|
||||
push_consts = np.array([-1, -1, -1], dtype=np.int32)
|
||||
|
||||
algo = mgr.algorithm([tensor], spirv, (1, 1, 1), spec_consts, push_consts)
|
||||
|
||||
(mgr.sequence()
|
||||
.record(kp.OpTensorSyncDevice([tensor]))
|
||||
.record(kp.OpAlgoDispatch(algo))
|
||||
.record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32)))
|
||||
.record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32)))
|
||||
.record(kp.OpTensorSyncLocal([tensor]))
|
||||
.eval())
|
||||
|
||||
assert np.all(tensor.data() == np.array([-3, -3, -3], dtype=np.int32))
|
||||
|
||||
|
||||
def test_workgroup():
|
||||
|
|
|
|||
|
|
@ -1087,19 +1087,37 @@ class Algorithm
|
|||
* @param spirv (optional) The spirv code to use to create the algorithm
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
|
||||
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The kp::Constants to use to
|
||||
* @param specializationConstants (optional) The templatable param is to be used to
|
||||
* initialize the specialization constants which cannot be changed once set.
|
||||
* @param pushConstants (optional) The kp::Constants to use when
|
||||
* @param pushConstants (optional) This templatable param is to be used when
|
||||
* initializing the pipeline, which set the size of the push constants -
|
||||
* these can be modified but all new values must have the same vector size
|
||||
* as this initial value.
|
||||
* these can be modified but all new values must have the same data type and length
|
||||
* as otherwise it will result in errors.
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
Algorithm(std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
const std::vector<S>& specializationConstants = {},
|
||||
const std::vector<P>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
||||
|
||||
this->mDevice = device;
|
||||
|
||||
if (tensors.size() && spirv.size()) {
|
||||
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
|
||||
"spirv size: {}",
|
||||
tensors.size(),
|
||||
spirv.size());
|
||||
this->rebuild(
|
||||
tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
|
||||
"spirv so not rebuilding vulkan components");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild function to reconstruct algorithm with configuration parameters
|
||||
|
|
@ -1109,18 +1127,64 @@ class Algorithm
|
|||
* @param spirv The spirv code to use to create the algorithm
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
|
||||
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The kp::Constants to use to
|
||||
* @param specializationConstants (optional) The std::vector<float> to use to
|
||||
* initialize the specialization constants which cannot be changed once set.
|
||||
* @param pushConstants (optional) The kp::Constants to use when
|
||||
* @param pushConstants (optional) The std::vector<float> to use when
|
||||
* initializing the pipeline, which set the size of the push constants -
|
||||
* these can be modified but all new values must have the same vector size
|
||||
* as this initial value.
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
const std::vector<S>& specializationConstants = {},
|
||||
const std::vector<P>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
|
||||
|
||||
this->mTensors = tensors;
|
||||
this->mSpirv = spirv;
|
||||
|
||||
if (specializationConstants.size()) {
|
||||
if (this->mSpecializationConstantsData) {
|
||||
free(this->mSpecializationConstantsData);
|
||||
}
|
||||
uint32_t memorySize = sizeof(decltype(specializationConstants.back()));
|
||||
uint32_t size = specializationConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mSpecializationConstantsData = malloc(totalSize);
|
||||
memcpy(this->mSpecializationConstantsData, specializationConstants.data(), totalSize);
|
||||
this->mSpecializationConstantsDataTypeMemorySize = memorySize;
|
||||
this->mSpecializationConstantsSize = size;
|
||||
}
|
||||
|
||||
if (pushConstants.size()) {
|
||||
if (this->mPushConstantsData) {
|
||||
free(this->mPushConstantsData);
|
||||
}
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
|
||||
this->setWorkgroup(workgroup,
|
||||
this->mTensors.size() ? this->mTensors[0]->size() : 1);
|
||||
|
||||
// Descriptor pool is created first so if available then destroy all before
|
||||
// rebuild
|
||||
if (this->isInit()) {
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
this->createParameters();
|
||||
this->createShaderModule();
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
|
|
@ -1175,11 +1239,48 @@ class Algorithm
|
|||
* Sets the push constants to the new value provided to use in the next
|
||||
* bindPush()
|
||||
*
|
||||
* @param The kp::Constant to use to set the push constants to use in the
|
||||
* @param pushConstants The templatable vector is to be used to set the push constants to use in the
|
||||
* next bindPush(...) calls. The constants provided must be of the same size
|
||||
* as the ones created during initialization.
|
||||
*/
|
||||
void setPush(const Constants& pushConstants);
|
||||
template<typename T>
|
||||
void setPushConstants(const std::vector<T>& pushConstants)
|
||||
{
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
|
||||
this->setPushConstants(pushConstants.data(), size, memorySize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the push constants to the new value provided to use in the next
|
||||
* bindPush() with the raw memory block location and memory size to be used.
|
||||
*
|
||||
* @param data The raw data point to copy the data from, without modifying the pointer.
|
||||
* @param size The number of data elements provided in the data
|
||||
* @param memorySize The memory size of each of the data elements in bytes.
|
||||
*/
|
||||
void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
|
||||
|
||||
uint32_t totalSize = memorySize * size;
|
||||
uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;
|
||||
|
||||
if (totalSize != previousTotalSize) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("Kompute Algorithm push "
|
||||
"constant total memory size provided is {} but expected {} bytes",
|
||||
totalSize,
|
||||
previousTotalSize));
|
||||
}
|
||||
if (this->mPushConstantsData) {
|
||||
free(this->mPushConstantsData);
|
||||
}
|
||||
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, data, totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current workgroup from the algorithm.
|
||||
|
|
@ -1192,15 +1293,25 @@ class Algorithm
|
|||
/**
|
||||
* Gets the specialization constants of the current algorithm.
|
||||
*
|
||||
* @returns The kp::Constants currently set for specialization constants
|
||||
* @returns The std::vector<float> currently set for specialization constants
|
||||
*/
|
||||
const Constants& getSpecializationConstants();
|
||||
template<typename T>
|
||||
const std::vector<T> getSpecializationConstants()
|
||||
{
|
||||
return { (T*)this->mSpecializationConstantsData,
|
||||
((T*)this->mSpecializationConstantsData) + this->mSpecializationConstantsSize };
|
||||
}
|
||||
/**
|
||||
* Gets the specialization constants of the current algorithm.
|
||||
*
|
||||
* @returns The kp::Constants currently set for push constants
|
||||
* @returns The std::vector<float> currently set for push constants
|
||||
*/
|
||||
const Constants& getPush();
|
||||
template<typename T>
|
||||
const std::vector<T> getPushConstants()
|
||||
{
|
||||
return { (T*)this->mPushConstantsData,
|
||||
((T*)this->mPushConstantsData) + this->mPushConstantsSize };
|
||||
}
|
||||
/**
|
||||
* Gets the current tensors that are used in the algorithm.
|
||||
*
|
||||
|
|
@ -1233,8 +1344,12 @@ class Algorithm
|
|||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<uint32_t> mSpirv;
|
||||
Constants mSpecializationConstants;
|
||||
Constants mPushConstants;
|
||||
void* mSpecializationConstantsData = nullptr;
|
||||
uint32_t mSpecializationConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mSpecializationConstantsSize = 0;
|
||||
void* mPushConstantsData = nullptr;
|
||||
uint32_t mPushConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mPushConstantsSize = 0;
|
||||
Workgroup mWorkgroup;
|
||||
|
||||
// Create util functions
|
||||
|
|
@ -1575,8 +1690,24 @@ class OpAlgoDispatch : public OpBase
|
|||
* @param algorithm The algorithm object to use for dispatch
|
||||
* @param pushConstants The push constants to use for override
|
||||
*/
|
||||
template<typename T = float>
|
||||
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const kp::Constants& pushConstants = {});
|
||||
const std::vector<T>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
this->mAlgorithm = algorithm;
|
||||
|
||||
if (pushConstants.size()) {
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -1613,7 +1744,9 @@ class OpAlgoDispatch : public OpBase
|
|||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
Constants mPushConstants;
|
||||
void* mPushConstantsData = nullptr;
|
||||
uint32_t mPushConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mPushConstantsSize = 0;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -1655,7 +1788,7 @@ class OpMult : public OpAlgoDispatch
|
|||
(uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
|
||||
|
||||
algorithm->rebuild(tensors, spirv);
|
||||
algorithm->rebuild<>(tensors, spirv);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -2082,16 +2215,16 @@ class Manager
|
|||
}
|
||||
|
||||
/**
|
||||
* Create a managed algorithm that will be destroyed by this manager
|
||||
* if it hasn't been destroyed by its reference count going to zero.
|
||||
* Default non-template function that can be used to create algorithm objects
|
||||
* which provides default types to the push and spec constants as floats.
|
||||
*
|
||||
* @param tensors (optional) The tensors to initialise the algorithm with
|
||||
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
|
||||
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
|
||||
* defaults to (tensor[0].size(), 1, 1)
|
||||
* @param specializationConstants (optional) kp::Constant to use for
|
||||
* @param specializationConstants (optional) float vector to use for
|
||||
* specialization constants, and defaults to an empty constant
|
||||
* @param pushConstants (optional) kp::Constant to use for push constants,
|
||||
* @param pushConstants (optional) float vector to use for push constants,
|
||||
* and defaults to an empty constant
|
||||
* @returns Shared pointer with initialised algorithm
|
||||
*/
|
||||
|
|
@ -2099,8 +2232,51 @@ class Manager
|
|||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
const std::vector<float>& specializationConstants = {},
|
||||
const std::vector<float>& pushConstants = {})
|
||||
{
|
||||
return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a managed algorithm that will be destroyed by this manager
|
||||
* if it hasn't been destroyed by its reference count going to zero.
|
||||
*
|
||||
* @param tensors (optional) The tensors to initialise the algorithm with
|
||||
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
|
||||
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
|
||||
* defaults to (tensor[0].size(), 1, 1)
|
||||
* @param specializationConstants (optional) templatable vector parameter to use for
|
||||
* specialization constants, and defaults to an empty constant
|
||||
* @param pushConstants (optional) templatable vector parameter to use for push constants,
|
||||
* and defaults to an empty constant
|
||||
* @returns Shared pointer with initialised algorithm
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
std::shared_ptr<Algorithm> algorithm(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const std::vector<S>& specializationConstants,
|
||||
const std::vector<P>& pushConstants)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
||||
this->mDevice,
|
||||
tensors,
|
||||
spirv,
|
||||
workgroup,
|
||||
specializationConstants,
|
||||
pushConstants) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
}
|
||||
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy the GPU resources and all managed resources by manager.
|
||||
|
|
|
|||
|
|
@ -5,30 +5,6 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants,
|
||||
const Constants& pushConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
||||
|
||||
this->mDevice = device;
|
||||
|
||||
if (tensors.size() && spirv.size()) {
|
||||
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
|
||||
"spirv size: {}",
|
||||
tensors.size(),
|
||||
spirv.size());
|
||||
this->rebuild(
|
||||
tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
|
||||
"spirv so not rebuilding vulkan components");
|
||||
}
|
||||
}
|
||||
|
||||
Algorithm::~Algorithm()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
|
||||
|
|
@ -36,33 +12,6 @@ Algorithm::~Algorithm()
|
|||
this->destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants,
|
||||
const Constants& pushConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
|
||||
|
||||
this->mTensors = tensors;
|
||||
this->mSpirv = spirv;
|
||||
this->mSpecializationConstants = specializationConstants;
|
||||
this->mPushConstants = pushConstants;
|
||||
this->setWorkgroup(workgroup,
|
||||
this->mTensors.size() ? this->mTensors[0]->size() : 1);
|
||||
|
||||
// Descriptor pool is created first so if available then destroy all before
|
||||
// rebuild
|
||||
if (this->isInit()) {
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
this->createParameters();
|
||||
this->createShaderModule();
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
bool
|
||||
Algorithm::isInit()
|
||||
{
|
||||
|
|
@ -74,6 +23,13 @@ Algorithm::isInit()
|
|||
void
|
||||
Algorithm::destroy()
|
||||
{
|
||||
// We don't have to free memory on destroy as it's freed by the commandBuffer destructor
|
||||
// if (this->mPushConstantsData) {
|
||||
// free(this->mPushConstantsData);
|
||||
// }
|
||||
// if (this->mSpecializationConstantsData) {
|
||||
// free(this->mSpecializationConstantsData);
|
||||
// }
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
|
||||
|
|
@ -279,10 +235,10 @@ Algorithm::createPipeline()
|
|||
this->mDescriptorSetLayout.get());
|
||||
|
||||
vk::PushConstantRange pushConstantRange;
|
||||
if (this->mPushConstants.size()) {
|
||||
if (this->mPushConstantsSize) {
|
||||
pushConstantRange.setStageFlags(vk::ShaderStageFlagBits::eCompute);
|
||||
pushConstantRange.setOffset(0);
|
||||
pushConstantRange.setSize(sizeof(float) * this->mPushConstants.size());
|
||||
pushConstantRange.setSize(this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize);
|
||||
|
||||
pipelineLayoutInfo.setPushConstantRangeCount(1);
|
||||
pipelineLayoutInfo.setPPushConstantRanges(&pushConstantRange);
|
||||
|
|
@ -295,11 +251,11 @@ Algorithm::createPipeline()
|
|||
|
||||
std::vector<vk::SpecializationMapEntry> specializationEntries;
|
||||
|
||||
for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) {
|
||||
for (uint32_t i = 0; i < this->mSpecializationConstantsSize; i++) {
|
||||
vk::SpecializationMapEntry specializationEntry(
|
||||
static_cast<uint32_t>(i),
|
||||
static_cast<uint32_t>(sizeof(float) * i),
|
||||
sizeof(float));
|
||||
static_cast<uint32_t>(this->mSpecializationConstantsDataTypeMemorySize * i),
|
||||
this->mSpecializationConstantsDataTypeMemorySize);
|
||||
|
||||
specializationEntries.push_back(specializationEntry);
|
||||
}
|
||||
|
|
@ -309,8 +265,8 @@ Algorithm::createPipeline()
|
|||
vk::SpecializationInfo specializationInfo(
|
||||
static_cast<uint32_t>(specializationEntries.size()),
|
||||
specializationEntries.data(),
|
||||
sizeof(float) * this->mSpecializationConstants.size(),
|
||||
this->mSpecializationConstants.data());
|
||||
this->mSpecializationConstantsDataTypeMemorySize * this->mSpecializationConstantsSize,
|
||||
this->mSpecializationConstantsData);
|
||||
|
||||
vk::PipelineShaderStageCreateInfo shaderStage(
|
||||
vk::PipelineShaderStageCreateFlags(),
|
||||
|
|
@ -381,15 +337,15 @@ Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer)
|
|||
void
|
||||
Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
if (this->mPushConstants.size()) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}",
|
||||
this->mPushConstants.size());
|
||||
if (this->mPushConstantsSize) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}",
|
||||
this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize);
|
||||
|
||||
commandBuffer.pushConstants(*this->mPipelineLayout,
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
0,
|
||||
this->mPushConstants.size() * sizeof(float),
|
||||
this->mPushConstants.data());
|
||||
this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize,
|
||||
this->mPushConstantsData);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -426,39 +382,12 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
|
|||
this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::setPush(const Constants& pushConstants)
|
||||
{
|
||||
|
||||
if (pushConstants.size() != this->mPushConstants.size()) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("Kompute Algorithm push "
|
||||
"constant provided is size {} but expected size {}",
|
||||
pushConstants.size(),
|
||||
this->mPushConstants.size()));
|
||||
}
|
||||
|
||||
this->mPushConstants = pushConstants;
|
||||
}
|
||||
|
||||
const Workgroup&
|
||||
Algorithm::getWorkgroup()
|
||||
{
|
||||
return this->mWorkgroup;
|
||||
}
|
||||
|
||||
const Constants&
|
||||
Algorithm::getSpecializationConstants()
|
||||
{
|
||||
return this->mSpecializationConstants;
|
||||
}
|
||||
|
||||
const Constants&
|
||||
Algorithm::getPush()
|
||||
{
|
||||
return this->mPushConstants;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<Tensor>>&
|
||||
Algorithm::getTensors()
|
||||
{
|
||||
|
|
|
|||
|
|
@ -422,31 +422,6 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
|||
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
|
||||
}
|
||||
|
||||
std::shared_ptr<Algorithm>
|
||||
Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants,
|
||||
const Constants& pushConstants)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
||||
this->mDevice,
|
||||
tensors,
|
||||
spirv,
|
||||
workgroup,
|
||||
specializationConstants,
|
||||
pushConstants) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
}
|
||||
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -5,18 +5,14 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const kp::Constants& pushConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
this->mAlgorithm = algorithm;
|
||||
this->mPushConstants = pushConstants;
|
||||
}
|
||||
|
||||
OpAlgoDispatch::~OpAlgoDispatch()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
|
||||
|
||||
if (this->mPushConstantsData) {
|
||||
KP_LOG_DEBUG("Kompute freeing push constants data");
|
||||
free(this->mPushConstantsData);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -35,8 +31,11 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
|
|||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
}
|
||||
|
||||
if (this->mPushConstants.size()) {
|
||||
this->mAlgorithm->setPush(this->mPushConstants);
|
||||
if (this->mPushConstantsSize) {
|
||||
this->mAlgorithm->setPushConstants(
|
||||
this->mPushConstantsData,
|
||||
this->mPushConstantsSize,
|
||||
this->mPushConstantsDataTypeMemorySize);
|
||||
}
|
||||
|
||||
this->mAlgorithm->recordBindCore(commandBuffer);
|
||||
|
|
|
|||
|
|
@ -24,19 +24,37 @@ class Algorithm
|
|||
* @param spirv (optional) The spirv code to use to create the algorithm
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
|
||||
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The kp::Constants to use to
|
||||
* @param specializationConstants (optional) The templatable param is to be used to
|
||||
* initialize the specialization constants which cannot be changed once set.
|
||||
* @param pushConstants (optional) The kp::Constants to use when
|
||||
* @param pushConstants (optional) This templatable param is to be used when
|
||||
* initializing the pipeline, which set the size of the push constants -
|
||||
* these can be modified but all new values must have the same vector size
|
||||
* as this initial value.
|
||||
* these can be modified but all new values must have the same data type and length
|
||||
* as otherwise it will result in errors.
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
Algorithm(std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
const std::vector<S>& specializationConstants = {},
|
||||
const std::vector<P>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
||||
|
||||
this->mDevice = device;
|
||||
|
||||
if (tensors.size() && spirv.size()) {
|
||||
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
|
||||
"spirv size: {}",
|
||||
tensors.size(),
|
||||
spirv.size());
|
||||
this->rebuild(
|
||||
tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
|
||||
"spirv so not rebuilding vulkan components");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild function to reconstruct algorithm with configuration parameters
|
||||
|
|
@ -46,18 +64,64 @@ class Algorithm
|
|||
* @param spirv The spirv code to use to create the algorithm
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
|
||||
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The kp::Constants to use to
|
||||
* @param specializationConstants (optional) The std::vector<float> to use to
|
||||
* initialize the specialization constants which cannot be changed once set.
|
||||
* @param pushConstants (optional) The kp::Constants to use when
|
||||
* @param pushConstants (optional) The std::vector<float> to use when
|
||||
* initializing the pipeline, which set the size of the push constants -
|
||||
* these can be modified but all new values must have the same vector size
|
||||
* as this initial value.
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
const std::vector<S>& specializationConstants = {},
|
||||
const std::vector<P>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
|
||||
|
||||
this->mTensors = tensors;
|
||||
this->mSpirv = spirv;
|
||||
|
||||
if (specializationConstants.size()) {
|
||||
if (this->mSpecializationConstantsData) {
|
||||
free(this->mSpecializationConstantsData);
|
||||
}
|
||||
uint32_t memorySize = sizeof(decltype(specializationConstants.back()));
|
||||
uint32_t size = specializationConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mSpecializationConstantsData = malloc(totalSize);
|
||||
memcpy(this->mSpecializationConstantsData, specializationConstants.data(), totalSize);
|
||||
this->mSpecializationConstantsDataTypeMemorySize = memorySize;
|
||||
this->mSpecializationConstantsSize = size;
|
||||
}
|
||||
|
||||
if (pushConstants.size()) {
|
||||
if (this->mPushConstantsData) {
|
||||
free(this->mPushConstantsData);
|
||||
}
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
|
||||
this->setWorkgroup(workgroup,
|
||||
this->mTensors.size() ? this->mTensors[0]->size() : 1);
|
||||
|
||||
// Descriptor pool is created first so if available then destroy all before
|
||||
// rebuild
|
||||
if (this->isInit()) {
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
this->createParameters();
|
||||
this->createShaderModule();
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
|
|
@ -112,11 +176,48 @@ class Algorithm
|
|||
* Sets the push constants to the new value provided to use in the next
|
||||
* bindPush()
|
||||
*
|
||||
* @param The kp::Constant to use to set the push constants to use in the
|
||||
* @param pushConstants The templatable vector is to be used to set the push constants to use in the
|
||||
* next bindPush(...) calls. The constants provided must be of the same size
|
||||
* as the ones created during initialization.
|
||||
*/
|
||||
void setPush(const Constants& pushConstants);
|
||||
template<typename T>
|
||||
void setPushConstants(const std::vector<T>& pushConstants)
|
||||
{
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
|
||||
this->setPushConstants(pushConstants.data(), size, memorySize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the push constants to the new value provided to use in the next
|
||||
* bindPush() with the raw memory block location and memory size to be used.
|
||||
*
|
||||
* @param data The raw data point to copy the data from, without modifying the pointer.
|
||||
* @param size The number of data elements provided in the data
|
||||
* @param memorySize The memory size of each of the data elements in bytes.
|
||||
*/
|
||||
void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
|
||||
|
||||
uint32_t totalSize = memorySize * size;
|
||||
uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;
|
||||
|
||||
if (totalSize != previousTotalSize) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("Kompute Algorithm push "
|
||||
"constant total memory size provided is {} but expected {} bytes",
|
||||
totalSize,
|
||||
previousTotalSize));
|
||||
}
|
||||
if (this->mPushConstantsData) {
|
||||
free(this->mPushConstantsData);
|
||||
}
|
||||
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, data, totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current workgroup from the algorithm.
|
||||
|
|
@ -129,15 +230,25 @@ class Algorithm
|
|||
/**
|
||||
* Gets the specialization constants of the current algorithm.
|
||||
*
|
||||
* @returns The kp::Constants currently set for specialization constants
|
||||
* @returns The std::vector<float> currently set for specialization constants
|
||||
*/
|
||||
const Constants& getSpecializationConstants();
|
||||
template<typename T>
|
||||
const std::vector<T> getSpecializationConstants()
|
||||
{
|
||||
return { (T*)this->mSpecializationConstantsData,
|
||||
((T*)this->mSpecializationConstantsData) + this->mSpecializationConstantsSize };
|
||||
}
|
||||
/**
|
||||
* Gets the specialization constants of the current algorithm.
|
||||
*
|
||||
* @returns The kp::Constants currently set for push constants
|
||||
* @returns The std::vector<float> currently set for push constants
|
||||
*/
|
||||
const Constants& getPush();
|
||||
template<typename T>
|
||||
const std::vector<T> getPushConstants()
|
||||
{
|
||||
return { (T*)this->mPushConstantsData,
|
||||
((T*)this->mPushConstantsData) + this->mPushConstantsSize };
|
||||
}
|
||||
/**
|
||||
* Gets the current tensors that are used in the algorithm.
|
||||
*
|
||||
|
|
@ -170,8 +281,12 @@ class Algorithm
|
|||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<uint32_t> mSpirv;
|
||||
Constants mSpecializationConstants;
|
||||
Constants mPushConstants;
|
||||
void* mSpecializationConstantsData = nullptr;
|
||||
uint32_t mSpecializationConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mSpecializationConstantsSize = 0;
|
||||
void* mPushConstantsData = nullptr;
|
||||
uint32_t mPushConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mPushConstantsSize = 0;
|
||||
Workgroup mWorkgroup;
|
||||
|
||||
// Create util functions
|
||||
|
|
|
|||
|
|
@ -124,16 +124,16 @@ class Manager
|
|||
}
|
||||
|
||||
/**
|
||||
* Create a managed algorithm that will be destroyed by this manager
|
||||
* if it hasn't been destroyed by its reference count going to zero.
|
||||
* Default non-template function that can be used to create algorithm objects
|
||||
* which provides default types to the push and spec constants as floats.
|
||||
*
|
||||
* @param tensors (optional) The tensors to initialise the algorithm with
|
||||
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
|
||||
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
|
||||
* defaults to (tensor[0].size(), 1, 1)
|
||||
* @param specializationConstants (optional) kp::Constant to use for
|
||||
* @param specializationConstants (optional) float vector to use for
|
||||
* specialization constants, and defaults to an empty constant
|
||||
* @param pushConstants (optional) kp::Constant to use for push constants,
|
||||
* @param pushConstants (optional) float vector to use for push constants,
|
||||
* and defaults to an empty constant
|
||||
* @returns Shared pointer with initialised algorithm
|
||||
*/
|
||||
|
|
@ -141,8 +141,51 @@ class Manager
|
|||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
const std::vector<float>& specializationConstants = {},
|
||||
const std::vector<float>& pushConstants = {})
|
||||
{
|
||||
return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a managed algorithm that will be destroyed by this manager
|
||||
* if it hasn't been destroyed by its reference count going to zero.
|
||||
*
|
||||
* @param tensors (optional) The tensors to initialise the algorithm with
|
||||
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
|
||||
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
|
||||
* defaults to (tensor[0].size(), 1, 1)
|
||||
* @param specializationConstants (optional) templatable vector parameter to use for
|
||||
* specialization constants, and defaults to an empty constant
|
||||
* @param pushConstants (optional) templatable vector parameter to use for push constants,
|
||||
* and defaults to an empty constant
|
||||
* @returns Shared pointer with initialised algorithm
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
std::shared_ptr<Algorithm> algorithm(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const std::vector<S>& specializationConstants,
|
||||
const std::vector<P>& pushConstants)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
||||
this->mDevice,
|
||||
tensors,
|
||||
spirv,
|
||||
workgroup,
|
||||
specializationConstants,
|
||||
pushConstants) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
}
|
||||
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy the GPU resources and all managed resources by manager.
|
||||
|
|
|
|||
|
|
@ -25,8 +25,24 @@ class OpAlgoDispatch : public OpBase
|
|||
* @param algorithm The algorithm object to use for dispatch
|
||||
* @param pushConstants The push constants to use for override
|
||||
*/
|
||||
template<typename T = float>
|
||||
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const kp::Constants& pushConstants = {});
|
||||
const std::vector<T>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
this->mAlgorithm = algorithm;
|
||||
|
||||
if (pushConstants.size()) {
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -63,7 +79,9 @@ class OpAlgoDispatch : public OpBase
|
|||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
Constants mPushConstants;
|
||||
void* mPushConstantsData = nullptr;
|
||||
uint32_t mPushConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mPushConstantsSize = 0;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class OpMult : public OpAlgoDispatch
|
|||
(uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
|
||||
|
||||
algorithm->rebuild(tensors, spirv);
|
||||
algorithm->rebuild<>(tensors, spirv);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
|
|||
test_shaders_glsl_test_logistic_regression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(
|
||||
params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
|
||||
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
|
|
@ -127,7 +127,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({ 5.0 }));
|
||||
mgr.algorithm(params, spirv, kp::Workgroup(), std::vector<float>({ 5.0 }));
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
|
|
|
|||
|
|
@ -49,9 +49,9 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
|
|||
};
|
||||
|
||||
kp::Workgroup workgroup({ 3, 1, 1 });
|
||||
kp::Constants specConsts({ 2 });
|
||||
kp::Constants pushConstsA({ 2.0 });
|
||||
kp::Constants pushConstsB({ 3.0 });
|
||||
std::vector<float> specConsts({ 2 });
|
||||
std::vector<float> pushConstsA({ 2.0 });
|
||||
std::vector<float> pushConstsB({ 3.0 });
|
||||
|
||||
auto algorithm = mgr.algorithm(params,
|
||||
compileSource(shader),
|
||||
|
|
@ -220,7 +220,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
|||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestAlgoUtils, TestAlgorithmUtilFunctions)
|
||||
TEST(TestMultipleAlgoExecutions, TestAlgorithmUtilFunctions)
|
||||
{
|
||||
|
||||
kp::Manager mgr;
|
||||
|
|
@ -263,8 +263,8 @@ TEST(TestAlgoUtils, TestAlgorithmUtilFunctions)
|
|||
};
|
||||
|
||||
kp::Workgroup workgroup({ 3, 1, 1 });
|
||||
kp::Constants specConsts({ 2 });
|
||||
kp::Constants pushConsts({ 2.0 });
|
||||
std::vector<float> specConsts({ 2 });
|
||||
std::vector<float> pushConsts({ 2.0 });
|
||||
|
||||
auto algorithm = mgr.algorithm(params,
|
||||
compileSource(shader),
|
||||
|
|
@ -273,6 +273,6 @@ TEST(TestAlgoUtils, TestAlgorithmUtilFunctions)
|
|||
pushConsts);
|
||||
|
||||
EXPECT_EQ(algorithm->getWorkgroup(), workgroup);
|
||||
EXPECT_EQ(algorithm->getPush(), pushConsts);
|
||||
EXPECT_EQ(algorithm->getSpecializationConstants(), specConsts);
|
||||
EXPECT_EQ(algorithm->getPushConstants<float>(), pushConsts);
|
||||
EXPECT_EQ(algorithm->getSpecializationConstants<float>(), specConsts);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,11 +44,11 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
|
|||
// We need to run this in sequence to avoid race condition
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for
|
||||
// float
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.1, 0.2, 0.3 });
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<float>{ 0.1, 0.2, 0.3 });
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<float>{ 0.3, 0.2, 0.1 });
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
||||
EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
|
||||
EXPECT_EQ(tensor->vector(), std::vector<float>({ 0.4, 0.4, 0.4 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -90,10 +90,10 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
|
|||
// We can't use atomicAdd as swiftshader doesn't support it for
|
||||
// float
|
||||
sq->eval<kp::OpAlgoDispatch>(algo);
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<float>{ 0.3, 0.2, 0.1 });
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
||||
EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
|
||||
EXPECT_EQ(tensor->vector(), std::vector<float>({ 0.4, 0.4, 0.4 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -132,8 +132,235 @@ TEST(TestPushConstants, TestConstantsWrongSize)
|
|||
sq = mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensor });
|
||||
|
||||
EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(
|
||||
algo, kp::Constants{ 0.1, 0.2, 0.3 }),
|
||||
algo, std::vector<float>{ 0.1, 0.2, 0.3 }),
|
||||
std::runtime_error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Ensure different types are considered for push constants
|
||||
// TEST(TestPushConstants, TestConstantsWrongType)
|
||||
// {
|
||||
// {
|
||||
// std::string shader(R"(
|
||||
// #version 450
|
||||
// layout(push_constant) uniform PushConstants {
|
||||
// float x;
|
||||
// float y;
|
||||
// float z;
|
||||
// } pcs;
|
||||
// layout (local_size_x = 1) in;
|
||||
// layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
// void main() {
|
||||
// pa[0] += pcs.x;
|
||||
// pa[1] += pcs.y;
|
||||
// pa[2] += pcs.z;
|
||||
// })");
|
||||
//
|
||||
// std::vector<uint32_t> spirv = compileSource(shader);
|
||||
//
|
||||
// std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
//
|
||||
// {
|
||||
// kp::Manager mgr;
|
||||
//
|
||||
// std::shared_ptr<kp::TensorT<float>> tensor =
|
||||
// mgr.tensor({ 0, 0, 0 });
|
||||
//
|
||||
// std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
||||
// { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
|
||||
//
|
||||
// sq = mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensor });
|
||||
//
|
||||
// EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(
|
||||
// algo, std::vector<uint32_t>{ 1, 2, 3 }),
|
||||
// std::runtime_error);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
TEST(TestPushConstants, TestConstantsMixedTypes)
|
||||
{
|
||||
{
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout(push_constant) uniform PushConstants {
|
||||
float x;
|
||||
uint y;
|
||||
int z;
|
||||
} pcs;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
pa[0] += pcs.x;
|
||||
pa[1] += pcs.y - 2147483000;
|
||||
pa[2] += pcs.z;
|
||||
})");
|
||||
|
||||
struct TestConsts{
|
||||
float x;
|
||||
uint32_t y;
|
||||
int32_t z;
|
||||
};
|
||||
|
||||
std::vector<uint32_t> spirv = compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> tensor =
|
||||
mgr.tensorT<float>({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<float, TestConsts>(
|
||||
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
|
||||
|
||||
sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
|
||||
|
||||
// We need to run this in sequence to avoid race condition
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for
|
||||
// float
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<TestConsts>{{ 15.32, 2147483650, 10 }});
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<TestConsts>{{ 30.32, 2147483650, -3 }});
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
||||
EXPECT_EQ(tensor->vector(), std::vector<float>({ 45.64, 1300, 7 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TestPushConstants, TestConstantsInt)
|
||||
{
|
||||
{
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout(push_constant) uniform PushConstants {
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
} pcs;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { int pa[]; };
|
||||
void main() {
|
||||
pa[0] += pcs.x;
|
||||
pa[1] += pcs.y;
|
||||
pa[2] += pcs.z;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<int32_t>> tensor =
|
||||
mgr.tensorT<int32_t>({ -1, -1, -1 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<int32_t , int32_t>(
|
||||
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
|
||||
|
||||
sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
|
||||
|
||||
// We need to run this in sequence to avoid race condition
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for
|
||||
// float
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<int32_t>{{ -1, -1, -1 }});
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<int32_t>{{ -1, -1, -1 }});
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
||||
EXPECT_EQ(tensor->vector(), std::vector<int32_t>({ -3, -3, -3 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TestPushConstants, TestConstantsUnsignedInt)
|
||||
{
|
||||
{
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout(push_constant) uniform PushConstants {
|
||||
uint x;
|
||||
uint y;
|
||||
uint z;
|
||||
} pcs;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { uint pa[]; };
|
||||
void main() {
|
||||
pa[0] += pcs.x;
|
||||
pa[1] += pcs.y;
|
||||
pa[2] += pcs.z;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<uint32_t>> tensor =
|
||||
mgr.tensorT<uint32_t>({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<uint32_t , uint32_t>(
|
||||
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
|
||||
|
||||
sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
|
||||
|
||||
// We need to run this in sequence to avoid race condition
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for
|
||||
// float
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<uint32_t>{{ 2147483650, 2147483650, 2147483650 }});
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<uint32_t>{{ 5, 5, 5 }});
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
||||
EXPECT_EQ(tensor->vector(), std::vector<uint32_t>({ 2147483655, 2147483655, 2147483655 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TestPushConstants, TestConstantsDouble)
|
||||
{
|
||||
{
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout(push_constant) uniform PushConstants {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
} pcs;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { double pa[]; };
|
||||
void main() {
|
||||
pa[0] += pcs.x;
|
||||
pa[1] += pcs.y;
|
||||
pa[2] += pcs.z;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<double>> tensor =
|
||||
mgr.tensorT<double>({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<double, double>(
|
||||
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
|
||||
|
||||
sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
|
||||
|
||||
// We need to run this in sequence to avoid race condition
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for
|
||||
// float
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<double>{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }});
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, std::vector<double>{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }});
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
||||
EXPECT_EQ(tensor->vector(), std::vector<double>({ 2.2222444466668888, 4.2222444466668888, 6.2222444466668888 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ TEST(TestSpecializationConstants, TestTwoConstants)
|
|||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
|
||||
tensorB };
|
||||
|
||||
kp::Constants spec = kp::Constants({ 5.0, 0.3 });
|
||||
std::vector<float> spec = std::vector<float>({ 5.0, 0.3 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm(params, spirv, {}, spec);
|
||||
|
|
@ -53,3 +53,52 @@ TEST(TestSpecializationConstants, TestTwoConstants)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TestSpecializationConstants, TestConstantsInt)
|
||||
{
|
||||
{
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (constant_id = 0) const int cOne = 1;
|
||||
layout (constant_id = 1) const int cTwo = 1;
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { int pa[]; };
|
||||
layout(set = 0, binding = 1) buffer b { int pb[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = cOne;
|
||||
pb[index] = cTwo;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<int32_t>> tensorA =
|
||||
mgr.tensorT<int32_t>({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<int32_t>> tensorB =
|
||||
mgr.tensorT<int32_t>({ 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
|
||||
tensorB };
|
||||
|
||||
std::vector<int32_t> spec({ -1, -2 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm(params, spirv, {}, spec, {});
|
||||
|
||||
sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>(params)
|
||||
->eval();
|
||||
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<int32_t>({ -1, -1, -1 }));
|
||||
EXPECT_EQ(tensorB->vector(), std::vector<int32_t>({ -2, -2, -2 }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue