From 5d3795b539a7f23d23727abedfe5057ed8543b29 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 31 Oct 2020 17:36:05 +0000 Subject: [PATCH 01/39] Initial exploration of pybind11 for python bindings --- pybind/README.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pybind/README.md diff --git a/pybind/README.md b/pybind/README.md new file mode 100644 index 000000000..7b0d89f0e --- /dev/null +++ b/pybind/README.md @@ -0,0 +1,2 @@ +# Python Bindings for Vulkan Kompute + From 68c119df4c02b4deff0b1596f1a4a21abeaef8e7 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 31 Oct 2020 18:54:10 +0000 Subject: [PATCH 02/39] Added pybind as a submodule --- .gitmodules | 3 +++ pybind/pybind11 | 1 + 2 files changed, 4 insertions(+) create mode 160000 pybind/pybind11 diff --git a/.gitmodules b/.gitmodules index 1c5db0adc..7365ba0fd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ path = external/spdlog url = https://github.com/gabime/spdlog branch = v1.8.1 +[submodule "pybind/pybind11"] + path = pybind/pybind11 + url = https://github.com/pybind/pybind11 diff --git a/pybind/pybind11 b/pybind/pybind11 new file mode 160000 index 000000000..06a54018c --- /dev/null +++ b/pybind/pybind11 @@ -0,0 +1 @@ +Subproject commit 06a54018c8a9fd9a7be5f5b56414b5da9259f637 From ac06761f1bc69f66cd25d7aebc7912f5e3394c01 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 31 Oct 2020 19:00:18 +0000 Subject: [PATCH 03/39] Added basic version of example of python bindings --- pybind/CMakeLists.txt | 5 +++ pybind/setup.py | 73 +++++++++++++++++++++++++++++++++++++++++++ pybind/src/main.cpp | 40 ++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 pybind/CMakeLists.txt create mode 100644 pybind/setup.py create mode 100644 pybind/src/main.cpp diff --git a/pybind/CMakeLists.txt b/pybind/CMakeLists.txt new file mode 100644 index 000000000..31449ec1c --- /dev/null +++ b/pybind/CMakeLists.txt @@ -0,0 +1,5 @@ +cmake_minimum_required(VERSION 2.8.12) +project(cmake_example) + +add_subdirectory(pybind11) +pybind11_add_module(cmake_example src/main.cpp) diff --git a/pybind/setup.py b/pybind/setup.py new file mode 100644 index 000000000..bd30b12b7 --- /dev/null +++ b/pybind/setup.py @@ -0,0 +1,73 @@ +import os +import re +import sys +import platform +import subprocess + +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext +from distutils.version import LooseVersion + + +class CMakeExtension(Extension): + def __init__(self, name, sourcedir=''): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + + +class CMakeBuild(build_ext): + def run(self): + try: + out = subprocess.check_output(['cmake', '--version']) + except OSError: + raise RuntimeError("CMake must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions)) + + if platform.system() == "Windows": + cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) + if cmake_version < '3.1.0': + raise RuntimeError("CMake >= 3.1.0 is required on Windows") + + for ext in self.extensions: + self.build_extension(ext) + + def build_extension(self, ext): + extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + # required for auto-detection of auxiliary "native" libs + if not extdir.endswith(os.path.sep): + extdir += os.path.sep + + cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, + '-DPYTHON_EXECUTABLE=' + sys.executable] + + cfg = 'Debug' if self.debug else 'Release' + build_args = ['--config', cfg] + + if platform.system() == "Windows": + cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] + if sys.maxsize > 2**32: + cmake_args += ['-A', 'x64'] + build_args += ['--', '/m'] + else: + cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] + build_args += ['--', '-j2'] + + env = os.environ.copy() + env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), + self.distribution.get_version()) + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) + subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) + +setup( + name='cmake_example', + version='0.0.1', + author='Dean Moldovan', + author_email='dean0x7d@gmail.com', + description='A test project using pybind11 and CMake', + long_description='', + ext_modules=[CMakeExtension('cmake_example')], + cmdclass=dict(build_ext=CMakeBuild), + zip_safe=False, +) diff --git a/pybind/src/main.cpp b/pybind/src/main.cpp new file mode 100644 index 000000000..86ab58210 --- /dev/null +++ b/pybind/src/main.cpp @@ -0,0 +1,40 @@ +#include + +int add(int i, int j) { + return i + j; +} + +namespace py = pybind11; + +PYBIND11_MODULE(cmake_example, m) { + m.doc() = R"pbdoc( + Pybind11 example plugin + ----------------------- + + .. currentmodule:: cmake_example + + .. autosummary:: + :toctree: _generate + + add + subtract + )pbdoc"; + + m.def("add", &add, R"pbdoc( + Add two numbers + + Some other explanation about the add function. + )pbdoc"); + + m.def("subtract", [](int i, int j) { return i - j; }, R"pbdoc( + Subtract two numbers + + Some other explanation about the subtract function. + )pbdoc"); + +#ifdef VERSION_INFO + m.attr("__version__") = VERSION_INFO; +#else + m.attr("__version__") = "dev"; +#endif +} From e3e111e07ffd0bf485fda1949a503c4b32c888ff Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 31 Oct 2020 19:09:01 +0000 Subject: [PATCH 04/39] UPdated modules --- .gitmodules | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitmodules b/.gitmodules index 7365ba0fd..c16e05825 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,4 @@ [submodule "pybind/pybind11"] path = pybind/pybind11 url = https://github.com/pybind/pybind11 + From 281aabf05b28a1c27622914167264152a88d6b68 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 31 Oct 2020 19:10:09 +0000 Subject: [PATCH 05/39] Updated folder --- pybind/pybind11 | 1 - {pybind => python}/CMakeLists.txt | 0 {pybind => python}/README.md | 0 {pybind => python}/setup.py | 0 {pybind => python}/src/main.cpp | 0 5 files changed, 1 deletion(-) delete mode 160000 pybind/pybind11 rename {pybind => python}/CMakeLists.txt (100%) rename {pybind => python}/README.md (100%) rename {pybind => python}/setup.py (100%) rename {pybind => python}/src/main.cpp (100%) diff --git a/pybind/pybind11 b/pybind/pybind11 deleted file mode 160000 index 06a54018c..000000000 --- a/pybind/pybind11 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 06a54018c8a9fd9a7be5f5b56414b5da9259f637 diff --git a/pybind/CMakeLists.txt b/python/CMakeLists.txt similarity index 100% rename from pybind/CMakeLists.txt rename to python/CMakeLists.txt diff --git a/pybind/README.md b/python/README.md similarity index 100% rename from pybind/README.md rename to python/README.md diff --git a/pybind/setup.py b/python/setup.py similarity index 100% rename from pybind/setup.py rename to python/setup.py diff --git a/pybind/src/main.cpp b/python/src/main.cpp similarity index 100% rename from pybind/src/main.cpp rename to python/src/main.cpp From 816c5c7f5d10c4c453b0bbb9950f057311a0d68b Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 31 Oct 2020 19:12:23 +0000 Subject: [PATCH 06/39] Readded pybind module --- .gitmodules | 5 ++--- python/pybind11 | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) create mode 160000 python/pybind11 diff --git a/.gitmodules b/.gitmodules index c16e05825..33549db54 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,7 +10,6 @@ path = external/spdlog url = https://github.com/gabime/spdlog branch = v1.8.1 -[submodule "pybind/pybind11"] - path = pybind/pybind11 +[submodule "python/pybind11"] + path = python/pybind11 url = https://github.com/pybind/pybind11 - diff --git a/python/pybind11 b/python/pybind11 new file mode 160000 index 000000000..06a54018c --- /dev/null +++ b/python/pybind11 @@ -0,0 +1 @@ +Subproject commit 06a54018c8a9fd9a7be5f5b56414b5da9259f637 From f86e5b1341850ea0606fb5ab74fa8d960765350e Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 06:53:51 +0000 Subject: [PATCH 07/39] Updated setup.py to build base python setup --- CMakeLists.txt | 11 ++++++++--- python/CMakeLists.txt | 3 +-- python/src/main.cpp | 29 +---------------------------- python/setup.py => setup.py | 3 +++ 4 files changed, 13 insertions(+), 33 deletions(-) rename python/setup.py => setup.py (95%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 52e45fcf9..6f1338b87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.4.1) -project(kompute VERSION 0.3.0) +project(kompute VERSION 0.4.2) set(CMAKE_CXX_STANDARD 14) @@ -13,6 +13,7 @@ option(KOMPUTE_OPT_BUILD_SHADERS "Enable if you want to re-build all shader file option(KOMPUTE_OPT_BUILD_SINGLE_HEADER "Enable if you want to build the single header file" 0) option(KOMPUTE_OPT_INSTALL "Enable if you want to enable installation" 0) # Build options +option(KOMPUTE_OPT_BUILD_PYTHON "Enable if you want to build python bindings" 0) option(KOMPUTE_OPT_ENABLE_SPDLOG "Extra compile flags for Kompute, see docs for full list" 0) option(KOMPUTE_OPT_REPO_SUBMODULE_BUILD, "Use the submodule repos instead of external package manager" 0) option(KOMPUTE_OPT_ANDOID_BUILD "Enable android compilation flags required" 0) @@ -43,12 +44,16 @@ endfunction() add_subdirectory(src) +if(KOMPUTE_OPT_BUILD_TESTS) + add_subdirectory(test) +endif() + if(KOMPUTE_OPT_BUILD_DOCS) set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/config" ${CMAKE_MODULE_PATH}) add_subdirectory(docs) endif() -if(KOMPUTE_OPT_BUILD_TESTS) - add_subdirectory(test) +if(KOMPUTE_OPT_BUILD_PYTHON) + add_subdirectory(python) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 31449ec1c..f0b4949ac 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,4 @@ -cmake_minimum_required(VERSION 2.8.12) -project(cmake_example) add_subdirectory(pybind11) pybind11_add_module(cmake_example src/main.cpp) + diff --git a/python/src/main.cpp b/python/src/main.cpp index 86ab58210..1330bab19 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -1,36 +1,9 @@ #include - -int add(int i, int j) { - return i + j; -} +#include "kompute/Kompute.hpp" namespace py = pybind11; PYBIND11_MODULE(cmake_example, m) { - m.doc() = R"pbdoc( - Pybind11 example plugin - ----------------------- - - .. currentmodule:: cmake_example - - .. autosummary:: - :toctree: _generate - - add - subtract - )pbdoc"; - - m.def("add", &add, R"pbdoc( - Add two numbers - - Some other explanation about the add function. - )pbdoc"); - - m.def("subtract", [](int i, int j) { return i - j; }, R"pbdoc( - Subtract two numbers - - Some other explanation about the subtract function. - )pbdoc"); #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; diff --git a/python/setup.py b/setup.py similarity index 95% rename from python/setup.py rename to setup.py index bd30b12b7..07d769b5b 100644 --- a/python/setup.py +++ b/setup.py @@ -38,6 +38,8 @@ class CMakeBuild(build_ext): extdir += os.path.sep cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, + '-DKOMPUTE_OPT_BUILD_PYTHON=1', + '-DKOMPUTE_OPT_BUILD_SINGLE_HEADER=1', '-DPYTHON_EXECUTABLE=' + sys.executable] cfg = 'Debug' if self.debug else 'Release' @@ -57,6 +59,7 @@ class CMakeBuild(build_ext): self.distribution.get_version()) if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) + subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) From 9559c79eee1b6a00decb48e5f1d27f59a637e617 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 15:13:33 +0000 Subject: [PATCH 08/39] Updated ccls to include pybind --- .ccls | 1 + 1 file changed, 1 insertion(+) diff --git a/.ccls b/.ccls index 2ce15d72f..f215ea9d1 100644 --- a/.ccls +++ b/.ccls @@ -13,6 +13,7 @@ -DDEBUG=1 -DKOMPUTE_INCLUDE_FOR_SYNTAX +-I./python/pybind11/include/ -I./external/Vulkan-Headers/include/ -I./external/googletest/googletest/include/ -I./external/spdlog/include/ From 0e9ba00b710e060b46ec1c23a4ee4f9542b3b031 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 15:14:22 +0000 Subject: [PATCH 09/39] Added base capabilities for tensor in python --- python/CMakeLists.txt | 9 ++++++++- python/src/main.cpp | 22 ++++++++++++++++++++-- setup.py | 10 +++++----- src/CMakeLists.txt | 3 ++- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index f0b4949ac..6ef7fde4b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,4 +1,11 @@ add_subdirectory(pybind11) -pybind11_add_module(cmake_example src/main.cpp) +pybind11_add_module(komputepy src/main.cpp) + +include_directories( + ${PROJECT_SOURCE_DIR}/single_include/) + +target_link_libraries( + komputepy PRIVATE + kompute::kompute) diff --git a/python/src/main.cpp b/python/src/main.cpp index 1330bab19..5fe74f021 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -1,9 +1,27 @@ #include -#include "kompute/Kompute.hpp" +#include + +#include namespace py = pybind11; -PYBIND11_MODULE(cmake_example, m) { +PYBIND11_MODULE(komputepy, m) { + + py::enum_(m, "TensorTypes") + .value("eDevice", kp::Tensor::TensorTypes::eDevice) + .value("eStaging", kp::Tensor::TensorTypes::eStaging) + .value("eStorage", kp::Tensor::TensorTypes::eStorage) + .export_values(); + + py::class_(m, "Tensor") + .def(py::init( + [](const std::vector& data) { + return std::unique_ptr(new kp::Tensor(data)); + })) + .def(py::init( + [](const std::vector& data, kp::Tensor::TensorTypes tensorTypes) { + return std::unique_ptr(new kp::Tensor(data, tensorTypes)); + })); #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; diff --git a/setup.py b/setup.py index 07d769b5b..e09673a97 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,7 @@ class CMakeBuild(build_ext): cmake_args += ['-A', 'x64'] build_args += ['--', '/m'] else: + cmake_args += ['-DKOMPUTE_EXTRA_CXX_FLAGS="-fPIC"'] cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] build_args += ['--', '-j2'] @@ -64,13 +65,12 @@ class CMakeBuild(build_ext): subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) setup( - name='cmake_example', + name='komputepy', version='0.0.1', - author='Dean Moldovan', - author_email='dean0x7d@gmail.com', - description='A test project using pybind11 and CMake', + author='Alejandro Saucedo', + description='Blazing fast, mobile-enabled, asynchronous, and optimized for advanced GPU processing usecases.', long_description='', - ext_modules=[CMakeExtension('cmake_example')], + ext_modules=[CMakeExtension('komputepy')], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6161b782b..348c0536a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,7 +48,8 @@ if(KOMPUTE_OPT_ANDOID_BUILD) ${PROJECT_SOURCE_DIR}/vk_ndk_wrapper_include/kompute_vk_ndk_wrapper.cpp) endif() -add_library(kompute +add_library( + kompute STATIC ${kompute_CPP}) target_include_directories( From 6afe6463c2f737fad2aab39c01e4f5a3732e29b3 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 16:27:59 +0000 Subject: [PATCH 10/39] Updated to add opbase --- python/src/main.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 5fe74f021..e3b7fb371 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -21,7 +21,10 @@ PYBIND11_MODULE(komputepy, m) { .def(py::init( [](const std::vector& data, kp::Tensor::TensorTypes tensorTypes) { return std::unique_ptr(new kp::Tensor(data, tensorTypes)); - })); + })) + .def("data", &kp::Tensor::data); + + py::class_(m, "OpBase"); #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; From 3ad5e4d3e780e1bbd78cf698bf081134822d4a06 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 16:28:48 +0000 Subject: [PATCH 11/39] Removed workgroup templates on opalgobase classes --- src/OpAlgoBase.cpp | 162 ++++++++++++++ src/OpAlgoLhsRhsOut.cpp | 129 +++++++++++ src/include/kompute/operations/OpAlgoBase.hpp | 206 ++---------------- .../kompute/operations/OpAlgoLhsRhsOut.hpp | 145 +----------- src/include/kompute/operations/OpMult.hpp | 24 +- 5 files changed, 322 insertions(+), 344 deletions(-) create mode 100644 src/OpAlgoBase.cpp create mode 100644 src/OpAlgoLhsRhsOut.cpp diff --git a/src/OpAlgoBase.cpp b/src/OpAlgoBase.cpp new file mode 100644 index 000000000..99e3a9ac1 --- /dev/null +++ b/src/OpAlgoBase.cpp @@ -0,0 +1,162 @@ +#pragma once + +#include "kompute/operations/OpAlgoBase.hpp" + +namespace kp { + +OpAlgoBase::OpAlgoBase() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase constructor base"); +} + +OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + KomputeWorkgroup komputeWorkgroup) + : OpBase(physicalDevice, device, commandBuffer, tensors, false) +{ + SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); + + // The dispatch size is set up based on either explicitly provided template + // parameters or by default it would take the shape and size of the tensors + if (komputeWorkgroup.x > 0) { + // If at least the x value is provided we use mainly the parameters + // provided + this->mKomputeWorkgroup = { + 0, + komputeWorkgroup.y > 0 ? komputeWorkgroup.y : 1, + komputeWorkgroup.z > 0 ? komputeWorkgroup.z : 1 + }; + } else { + this->mKomputeWorkgroup = {tensors[0]->size(), 1, 1}; + } + SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}", + this->mKomputeWorkgroup.x, + this->mKomputeWorkgroup.y, + this->mKomputeWorkgroup.z); + + this->mAlgorithm = std::make_shared(device, commandBuffer); +} + +OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + std::string shaderFilePath, + KomputeWorkgroup komputeWorkgroup) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) +{ + SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); + + this->mShaderFilePath = shaderFilePath; +} + +OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + const std::vector& shaderDataRaw, + KomputeWorkgroup komputeWorkgroup) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) +{ + SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); + + this->mShaderDataRaw = shaderDataRaw; +} + +OpAlgoBase::~OpAlgoBase() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase destructor started"); +} + +void +OpAlgoBase::init() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase init called"); + + if (this->mTensors.size() < 1) { + throw std::runtime_error( + "Kompute OpAlgoBase called with less than 1 tensor"); + } + + for (std::shared_ptr tensor : this->mTensors) { + if(!tensor->isInit()) { + throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised."); + } + } + + SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); + + std::vector shaderFileData = this->fetchSpirvBinaryData(); + + SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component"); + + this->mAlgorithm->init(shaderFileData, this->mTensors); +} + +void +OpAlgoBase::record() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase record called"); + + // Barrier to ensure the data is finished writing to buffer memory + for (std::shared_ptr tensor : this->mTensors) { + tensor->recordBufferMemoryBarrier( + this->mCommandBuffer, + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + } + + this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup.x, this->mKomputeWorkgroup.y, this->mKomputeWorkgroup.z); +} + +void +OpAlgoBase::preEval() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase preEval called"); +} + +void +OpAlgoBase::postEval() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); +} + +std::vector OpAlgoBase::fetchSpirvBinaryData() +{ + SPDLOG_WARN( + "Kompute OpAlgoBase Running shaders directly from spirv file"); + + if (this->mShaderFilePath.size()) { + std::ifstream fileStream(this->mShaderFilePath, + std::ios::binary | std::ios::in | std::ios::ate); + + if (!fileStream.good()) { + throw std::runtime_error("Error reading file: " + this->mShaderFilePath); + } + + size_t shaderFileSize = fileStream.tellg(); + fileStream.seekg(0, std::ios::beg); + char* shaderDataRaw = new char[shaderFileSize]; + fileStream.read(shaderDataRaw, shaderFileSize); + fileStream.close(); + + SPDLOG_WARN( + "Kompute OpAlgoBase fetched {} bytes", shaderFileSize); + + return std::vector(shaderDataRaw, + shaderDataRaw + shaderFileSize); + } + else if (this->mShaderDataRaw.size()) { + return this->mShaderDataRaw; + } + else { + throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided"); + } +} + +} + diff --git a/src/OpAlgoLhsRhsOut.cpp b/src/OpAlgoLhsRhsOut.cpp new file mode 100644 index 000000000..444ec63a3 --- /dev/null +++ b/src/OpAlgoLhsRhsOut.cpp @@ -0,0 +1,129 @@ +#pragma once + +#include "kompute/operations/OpAlgoLhsRhsOut.hpp" + +namespace kp { + +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); +} + +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup) + // The inheritance is initialised with the copyOutputData to false given that + // this depencendant class handles the transfer of data via staging buffers in + // a granular way. + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); +} + +OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); +} + +void +OpAlgoLhsRhsOut::init() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); + + if (this->mTensors.size() < 3) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); + } else if (this->mTensors.size() > 3) { + SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); + } + + this->mTensorLHS = this->mTensors[0]; + this->mTensorRHS = this->mTensors[1]; + this->mTensorOutput = this->mTensors[2]; + + + if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && + this->mTensorOutput->isInit())) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + + std::to_string(this->mTensorLHS->isInit()) + + " RHS: " + std::to_string(this->mTensorRHS->isInit()) + + " Output: " + std::to_string(this->mTensorOutput->isInit())); + } + + if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && + this->mTensorRHS->size() == this->mTensorOutput->size())) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + + std::to_string(this->mTensorLHS->size()) + + " RHS: " + std::to_string(this->mTensorRHS->size()) + + " Output: " + std::to_string(this->mTensorOutput->size())); + } + + this->mTensorOutputStaging = std::make_shared( + this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); + + this->mTensorOutputStaging->init( + this->mPhysicalDevice, this->mDevice); + + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); + + std::vector shaderFileData = this->fetchSpirvBinaryData(); + + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); + + this->mAlgorithm->init(shaderFileData, this->mTensors); +} + +void +OpAlgoLhsRhsOut::record() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); + + // Barrier to ensure the data is finished writing to buffer memory + this->mTensorLHS->recordBufferMemoryBarrier( + this->mCommandBuffer, + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + this->mTensorRHS->recordBufferMemoryBarrier( + this->mCommandBuffer, + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + + this->mAlgorithm->recordDispatch( + this->mKomputeWorkgroup.x, + this->mKomputeWorkgroup.y, + this->mKomputeWorkgroup.z); + + // Barrier to ensure the shader code is executed before buffer read + this->mTensorOutput->recordBufferMemoryBarrier( + this->mCommandBuffer, + vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer); + + this->mTensorOutputStaging->recordCopyFrom( + this->mCommandBuffer, + this->mTensorOutput, + true); +} + +void +OpAlgoLhsRhsOut::postEval() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); + + this->mTensorOutputStaging->mapDataFromHostMemory(); + + this->mTensorOutput->setData(this->mTensorOutputStaging->data()); +} + +} + diff --git a/src/include/kompute/operations/OpAlgoBase.hpp b/src/include/kompute/operations/OpAlgoBase.hpp index 653006952..74108d285 100644 --- a/src/include/kompute/operations/OpAlgoBase.hpp +++ b/src/include/kompute/operations/OpAlgoBase.hpp @@ -17,20 +17,17 @@ namespace kp { * Operation that provides a general abstraction that simplifies the use of * algorithm and parameter components which can be used with shaders. * By default it enables the user to provide a dynamic number of tensors - * which are then passed as inputs. - * - * All of these tensors are expected to be initlaised and this is checked with throw std exception in the init function. - * - * See OpLhsRhsOut for an example implementation on a more specific granularity on tensor parameters. - * - * The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + * which are then passed as inputs. */ -template class OpAlgoBase : public OpBase { public: + struct KomputeWorkgroup { + uint32_t x; + uint32_t y; + uint32_t z; + }; + /** * Base constructor, should not be used unless explicitly intended. */ @@ -46,11 +43,13 @@ class OpAlgoBase : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector>& tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Constructor that enables a file to be passed to the operation with @@ -61,13 +60,15 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - std::string shaderFilePath); + std::string shaderFilePath, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Constructor that enables raw shader data to be passed to the main operation @@ -78,12 +79,14 @@ class OpAlgoBase : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - const std::vector& shaderDataRaw); + const std::vector& shaderDataRaw, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Default destructor, which is in charge of destroying the algorithm @@ -131,9 +134,7 @@ class OpAlgoBase : public OpBase // -------------- ALWAYS OWNED RESOURCES - uint32_t mX; - uint32_t mY; - uint32_t mZ; + KomputeWorkgroup mKomputeWorkgroup; std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing std::vector mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content @@ -143,174 +144,3 @@ class OpAlgoBase : public OpBase } // End namespace kp -// Including implementation for template class -#ifndef OPALGOBASE_IMPL -#define OPALGOBASE_IMPL - -namespace kp { - -template -OpAlgoBase::OpAlgoBase() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase constructor base"); -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); - - // The dispatch size is set up based on either explicitly provided template - // parameters or by default it would take the shape and size of the tensors - if (tX > 0) { - // If at least the x value is provided we use mainly the parameters - // provided - this->mX = tX; - this->mY = tY > 0 ? tY : 1; - this->mZ = tZ > 0 ? tZ : 1; - } else { - this->mX = tensors[0]->size(); - this->mY = 1; - this->mZ = 1; - } - SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}", - this->mX, - this->mY, - this->mZ); - - this->mAlgorithm = std::make_shared(device, commandBuffer); -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - std::string shaderFilePath) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); - - this->mShaderFilePath = shaderFilePath; -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const std::vector& shaderDataRaw) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); - - this->mShaderDataRaw = shaderDataRaw; -} - -template -OpAlgoBase::~OpAlgoBase() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase destructor started"); -} - -template -void -OpAlgoBase::init() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpAlgoBase called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if(!tensor->isInit()) { - throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised."); - } - } - - SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpAlgoBase::record() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase record called"); - - // Barrier to ensure the data is finished writing to buffer memory - for (std::shared_ptr tensor : this->mTensors) { - tensor->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - } - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); -} - -template -void -OpAlgoBase::preEval() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase preEval called"); -} - -template -void -OpAlgoBase::postEval() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); -} - -template -std::vector OpAlgoBase::fetchSpirvBinaryData() -{ - SPDLOG_WARN( - "Kompute OpAlgoBase Running shaders directly from spirv file"); - - if (this->mShaderFilePath.size()) { - std::ifstream fileStream(this->mShaderFilePath, - std::ios::binary | std::ios::in | std::ios::ate); - - if (!fileStream.good()) { - throw std::runtime_error("Error reading file: " + this->mShaderFilePath); - } - - size_t shaderFileSize = fileStream.tellg(); - fileStream.seekg(0, std::ios::beg); - char* shaderDataRaw = new char[shaderFileSize]; - fileStream.read(shaderDataRaw, shaderFileSize); - fileStream.close(); - - SPDLOG_WARN( - "Kompute OpAlgoBase fetched {} bytes", shaderFileSize); - - return std::vector(shaderDataRaw, - shaderDataRaw + shaderFileSize); - } - else if (this->mShaderDataRaw.size()) { - return this->mShaderDataRaw; - } - else { - throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided"); - } -} - -} - -#endif // #ifndef OPALGOBASE_IMPL - diff --git a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp index 5c22bdcc6..c826bd324 100644 --- a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp +++ b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp @@ -15,12 +15,8 @@ namespace kp { * Operation base class to simplify the creation of operations that require * right hand and left hand side datapoints together with a single output. * The expected data passed is two input tensors and one output tensor. - * The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" */ -template -class OpAlgoLhsRhsOut : public OpAlgoBase +class OpAlgoLhsRhsOut : public OpAlgoBase { public: /** @@ -38,11 +34,13 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param freeTensors Whether operation manages the memory of the Tensors + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector> tensors); + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Default destructor, which is in charge of destroying the algorithm @@ -73,7 +71,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * of the GPU Device memory into the staging buffer so the output data can * be retrieved. */ - virtual void postSubmit() override; + virtual void postEval() override; protected: // -------------- NEVER OWNED RESOURCES @@ -87,136 +85,3 @@ class OpAlgoLhsRhsOut : public OpAlgoBase } // End namespace kp -// Including implementation for template class -#ifndef OPALGOLHSRHSOUT_CPP -#define OPALGOLHSRHSOUT_CPP - -namespace kp { - -template -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); -} - -template -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors) - // The inheritance is initialised with the copyOutputData to false given that - // this depencendant class handles the transfer of data via staging buffers in - // a granular way. - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); -} - -template -OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); -} - -template -void -OpAlgoLhsRhsOut::init() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); - - if (this->mTensors.size() < 3) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); - } else if (this->mTensors.size() > 3) { - SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); - } - - this->mTensorLHS = this->mTensors[0]; - this->mTensorRHS = this->mTensors[1]; - this->mTensorOutput = this->mTensors[2]; - - - if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && - this->mTensorOutput->isInit())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + - std::to_string(this->mTensorLHS->isInit()) + - " RHS: " + std::to_string(this->mTensorRHS->isInit()) + - " Output: " + std::to_string(this->mTensorOutput->isInit())); - } - - if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && - this->mTensorRHS->size() == this->mTensorOutput->size())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + - std::to_string(this->mTensorLHS->size()) + - " RHS: " + std::to_string(this->mTensorRHS->size()) + - " Output: " + std::to_string(this->mTensorOutput->size())); - } - - this->mTensorOutputStaging = std::make_shared( - this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); - - this->mTensorOutputStaging->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); - - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpAlgoLhsRhsOut::record() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); - - // Barrier to ensure the data is finished writing to buffer memory - this->mTensorLHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - this->mTensorRHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); - - // Barrier to ensure the shader code is executed before buffer read - this->mTensorOutput->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer); - - this->mTensorOutputStaging->recordCopyFrom( - this->mCommandBuffer, - this->mTensorOutput, - true); -} - -template -void -OpAlgoLhsRhsOut::postSubmit() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); - - this->mTensorOutputStaging->mapDataFromHostMemory(); - - this->mTensorOutput->setData(this->mTensorOutputStaging->data()); -} - -} - -#endif // #ifndef OPALGOLHSRHSOUT_CPP - diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index ba3cb21a0..f555f8ac1 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -17,12 +17,9 @@ namespace kp { /** * Operation that performs multiplication on two tensors and outpus on third - * tensor. The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + * tensor. */ -template -class OpMult : public OpAlgoBase +class OpMult : public OpAlgoBase { public: /** @@ -41,13 +38,14 @@ class OpMult : public OpAlgoBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector> tensors) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "") + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) { SPDLOG_DEBUG("Kompute OpMult constructor with params"); @@ -58,14 +56,8 @@ class OpMult : public OpAlgoBase #if RELEASE /** - * If release it will be using the static version of the shader which is - * loaded using this file directly. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors + * If RELEASE=1 it will be using the static version of the shader which is + * loaded using this file directly. Otherwise it should not override the function. */ std::vector fetchSpirvBinaryData() override { From 552a6c051fee47c81e883cef0b295bae78499741 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 16:29:07 +0000 Subject: [PATCH 12/39] Updated tests without tempaltes on opalgobase classes --- test/TestAsyncOperations.cpp | 8 ++++---- test/TestLogisticRegression.cpp | 4 ++-- test/TestManager.cpp | 6 +++--- test/TestMultipleAlgoExecutions.cpp | 24 ++++++++++++------------ test/TestOpAlgoLoopsPassingData.cpp | 2 +- test/TestOpShadersFromStringAndFile.cpp | 8 ++++---- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 11bdee98f..43bccf99b 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -54,7 +54,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) auto startSync = std::chrono::high_resolution_clock::now(); for (uint32_t i = 0; i < numParallel; i++) { - mgr.evalOpDefault>( + mgr.evalOpDefault( { inputsSyncB[i] }, std::vector(shader.begin(), shader.end())); } @@ -86,7 +86,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) auto startAsync = std::chrono::high_resolution_clock::now(); for (uint32_t i = 0; i < numParallel; i++) { - mgrAsync.evalOpAsync>( + mgrAsync.evalOpAsync( { inputsAsyncB[i] }, "async" + std::to_string(i), std::vector(shader.begin(), shader.end())); @@ -151,10 +151,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpAsync>( + mgr.evalOpAsync( { tensorA }, "asyncOne", std::vector(shader.begin(), shader.end())); - mgr.evalOpAsync>( + mgr.evalOpAsync( { tensorB }, "asyncTwo", std::vector(shader.begin(), shader.end())); mgr.evalOpAwait("asyncOne"); diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index fa8dc7b59..9822c08d1 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -46,7 +46,7 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) sq->record({ wIn, bIn }); - sq->record>( + sq->record( params, "test/shaders/glsl/test_logistic_regression.comp"); sq->record({ wOutI, wOutJ, bOut, lOut }); @@ -127,7 +127,7 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) // Record op algo base sq->begin(); - sq->record>( + sq->record( params, "test/shaders/glsl/test_logistic_regression.comp"); sq->record({ wOutI, wOutJ, bOut, lOut }); diff --git a/test/TestManager.cpp b/test/TestManager.cpp index 1550d8efb..0cb2a78fd 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -17,7 +17,7 @@ TEST(TestManager, EndToEndOpMultFlow) mgr.evalOpDefault({ tensorOutput }); - mgr.evalOpDefault>({ tensorLHS, tensorRHS, tensorOutput }); + mgr.evalOpDefault({ tensorLHS, tensorRHS, tensorOutput }); mgr.evalOpDefault({ tensorOutput }); @@ -44,7 +44,7 @@ TEST(TestManager, OpMultSequenceFlow) sq->record({ tensorRHS }); sq->record({ tensorOutput }); - sq->record>({ tensorLHS, tensorRHS, tensorOutput }); + sq->record({ tensorLHS, tensorRHS, tensorOutput }); sq->record({ tensorOutput }); @@ -100,7 +100,7 @@ TEST(TestManager, TestMultipleTensorsAtOnce) EXPECT_TRUE(tensorRHS->isInit()); EXPECT_TRUE(tensorOutput->isInit()); - sq->record>({ tensorLHS, tensorRHS, tensorOutput }); + sq->record({ tensorLHS, tensorRHS, tensorOutput }); sq->record({ tensorOutput }); diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index cdccd62fc..a0355416c 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -26,11 +26,11 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) sq->record({ tensorA }); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->record({ tensorA }); @@ -70,19 +70,19 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) // Then perform the computations sq->begin(); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->end(); sq->eval(); sq->begin(); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->end(); sq->eval(); sq->begin(); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->end(); sq->eval(); @@ -118,7 +118,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->record({ tensorA }); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->end(); @@ -130,7 +130,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) if (std::shared_ptr sq = sqWeakPtr2.lock()) { sq->begin(); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->end(); @@ -142,7 +142,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) if (std::shared_ptr sq = sqWeakPtr3.lock()) { sq->begin(); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->end(); @@ -195,7 +195,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) if (std::shared_ptr sq = sqWeakPtr2.lock()) { sq->begin(); - sq->record>( + sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->end(); @@ -252,7 +252,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate) } )"); - mgr.evalOpDefault>( + mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, std::vector(shader.begin(), shader.end())); @@ -289,7 +289,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) } )"); - mgr.evalOpDefault>( + mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, std::vector(shader.begin(), shader.end())); diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index 2c47b0de3..9c592e356 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -49,7 +49,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) if (std::shared_ptr sq = sqWeakPtr2.lock()) { sq->begin(); - sq->record>( + sq->record( { tensorA, tensorB }, std::vector(shader.begin(), shader.end())); diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 58a361558..273421b26 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -28,7 +28,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) } )"); - mgr.evalOpDefault>( + mgr.evalOpDefault( { tensorA, tensorB }, std::vector(shader.begin(), shader.end())); mgr.evalOpDefault({ tensorA, tensorB }); @@ -45,7 +45,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpDefault>( + mgr.evalOpDefault( { tensorA, tensorB }, std::vector( kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv, @@ -67,7 +67,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpDefault>( + mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp"); mgr.evalOpDefault({ tensorA, tensorB }); @@ -84,7 +84,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpDefault>( + mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv"); mgr.evalOpDefault({ tensorA, tensorB }); From b0d394a50b6f7f633f41073d75d4774b0bb4fe99 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 16:29:27 +0000 Subject: [PATCH 13/39] Updated single include with non-templated opalgobase classes --- single_include/kompute/Kompute.hpp | 374 +++-------------------------- 1 file changed, 31 insertions(+), 343 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 8def06e4a..382b7131d 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1620,20 +1620,17 @@ namespace kp { * Operation that provides a general abstraction that simplifies the use of * algorithm and parameter components which can be used with shaders. * By default it enables the user to provide a dynamic number of tensors - * which are then passed as inputs. - * - * All of these tensors are expected to be initlaised and this is checked with throw std exception in the init function. - * - * See OpLhsRhsOut for an example implementation on a more specific granularity on tensor parameters. - * - * The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + * which are then passed as inputs. */ -template class OpAlgoBase : public OpBase { public: + struct KomputeWorkgroup { + uint32_t x; + uint32_t y; + uint32_t z; + }; + /** * Base constructor, should not be used unless explicitly intended. */ @@ -1649,11 +1646,13 @@ class OpAlgoBase : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector>& tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Constructor that enables a file to be passed to the operation with @@ -1664,13 +1663,15 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - std::string shaderFilePath); + std::string shaderFilePath, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Constructor that enables raw shader data to be passed to the main operation @@ -1681,12 +1682,14 @@ class OpAlgoBase : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - const std::vector& shaderDataRaw); + const std::vector& shaderDataRaw, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Default destructor, which is in charge of destroying the algorithm @@ -1733,9 +1736,7 @@ class OpAlgoBase : public OpBase // -------------- ALWAYS OWNED RESOURCES - uint32_t mX; - uint32_t mY; - uint32_t mZ; + KomputeWorkgroup mKomputeWorkgroup; std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing std::vector mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content @@ -1745,177 +1746,6 @@ class OpAlgoBase : public OpBase } // End namespace kp -// Including implementation for template class -#ifndef OPALGOBASE_IMPL -#define OPALGOBASE_IMPL - -namespace kp { - -template -OpAlgoBase::OpAlgoBase() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase constructor base"); -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); - - // The dispatch size is set up based on either explicitly provided template - // parameters or by default it would take the shape and size of the tensors - if (tX > 0) { - // If at least the x value is provided we use mainly the parameters - // provided - this->mX = tX; - this->mY = tY > 0 ? tY : 1; - this->mZ = tZ > 0 ? tZ : 1; - } else { - this->mX = tensors[0]->size(); - this->mY = 1; - this->mZ = 1; - } - SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}", - this->mX, - this->mY, - this->mZ); - - this->mAlgorithm = std::make_shared(device, commandBuffer); -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - std::string shaderFilePath) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); - - this->mShaderFilePath = shaderFilePath; -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const std::vector& shaderDataRaw) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); - - this->mShaderDataRaw = shaderDataRaw; -} - -template -OpAlgoBase::~OpAlgoBase() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase destructor started"); -} - -template -void -OpAlgoBase::init() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpAlgoBase called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if(!tensor->isInit()) { - throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised."); - } - } - - SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpAlgoBase::record() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase record called"); - - // Barrier to ensure the data is finished writing to buffer memory - for (std::shared_ptr tensor : this->mTensors) { - tensor->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - } - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); -} - -template -void -OpAlgoBase::preEval() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase preEval called"); -} - -template -void -OpAlgoBase::postEval() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); -} - -template -std::vector OpAlgoBase::fetchSpirvBinaryData() -{ - SPDLOG_WARN( - "Kompute OpAlgoBase Running shaders directly from spirv file"); - - if (this->mShaderFilePath.size()) { - std::ifstream fileStream(this->mShaderFilePath, - std::ios::binary | std::ios::in | std::ios::ate); - - if (!fileStream.good()) { - throw std::runtime_error("Error reading file: " + this->mShaderFilePath); - } - - size_t shaderFileSize = fileStream.tellg(); - fileStream.seekg(0, std::ios::beg); - char* shaderDataRaw = new char[shaderFileSize]; - fileStream.read(shaderDataRaw, shaderFileSize); - fileStream.close(); - - SPDLOG_WARN( - "Kompute OpAlgoBase fetched {} bytes", shaderFileSize); - - return std::vector(shaderDataRaw, - shaderDataRaw + shaderFileSize); - } - else if (this->mShaderDataRaw.size()) { - return this->mShaderDataRaw; - } - else { - throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided"); - } -} - -} - -#endif // #ifndef OPALGOBASE_IMPL - #include namespace kp { @@ -1924,12 +1754,8 @@ namespace kp { * Operation base class to simplify the creation of operations that require * right hand and left hand side datapoints together with a single output. * The expected data passed is two input tensors and one output tensor. - * The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" */ -template -class OpAlgoLhsRhsOut : public OpAlgoBase +class OpAlgoLhsRhsOut : public OpAlgoBase { public: /** @@ -1947,11 +1773,13 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param freeTensors Whether operation manages the memory of the Tensors + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector> tensors); + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Default destructor, which is in charge of destroying the algorithm @@ -1982,7 +1810,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * of the GPU Device memory into the staging buffer so the output data can * be retrieved. */ - virtual void postSubmit() override; + virtual void postEval() override; protected: // -------------- NEVER OWNED RESOURCES @@ -1996,138 +1824,6 @@ class OpAlgoLhsRhsOut : public OpAlgoBase } // End namespace kp -// Including implementation for template class -#ifndef OPALGOLHSRHSOUT_CPP -#define OPALGOLHSRHSOUT_CPP - -namespace kp { - -template -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); -} - -template -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors) - // The inheritance is initialised with the copyOutputData to false given that - // this depencendant class handles the transfer of data via staging buffers in - // a granular way. - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); -} - -template -OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); -} - -template -void -OpAlgoLhsRhsOut::init() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); - - if (this->mTensors.size() < 3) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); - } else if (this->mTensors.size() > 3) { - SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); - } - - this->mTensorLHS = this->mTensors[0]; - this->mTensorRHS = this->mTensors[1]; - this->mTensorOutput = this->mTensors[2]; - - if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && - this->mTensorOutput->isInit())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + - std::to_string(this->mTensorLHS->isInit()) + - " RHS: " + std::to_string(this->mTensorRHS->isInit()) + - " Output: " + std::to_string(this->mTensorOutput->isInit())); - } - - if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && - this->mTensorRHS->size() == this->mTensorOutput->size())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + - std::to_string(this->mTensorLHS->size()) + - " RHS: " + std::to_string(this->mTensorRHS->size()) + - " Output: " + std::to_string(this->mTensorOutput->size())); - } - - this->mTensorOutputStaging = std::make_shared( - this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); - - this->mTensorOutputStaging->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); - - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpAlgoLhsRhsOut::record() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); - - // Barrier to ensure the data is finished writing to buffer memory - this->mTensorLHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - this->mTensorRHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); - - // Barrier to ensure the shader code is executed before buffer read - this->mTensorOutput->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer); - - this->mTensorOutputStaging->recordCopyFrom( - this->mCommandBuffer, - this->mTensorOutput, - true); -} - -template -void -OpAlgoLhsRhsOut::postSubmit() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); - - this->mTensorOutputStaging->mapDataFromHostMemory(); - - this->mTensorOutput->setData(this->mTensorOutputStaging->data()); -} - -} - -#endif // #ifndef OPALGOLHSRHSOUT_CPP - #include #if RELEASE @@ -2138,12 +1834,9 @@ namespace kp { /** * Operation that performs multiplication on two tensors and outpus on third - * tensor. The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + * tensor. */ -template -class OpMult : public OpAlgoBase +class OpMult : public OpAlgoBase { public: /** @@ -2162,13 +1855,14 @@ class OpMult : public OpAlgoBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector> tensors) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "") + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) { SPDLOG_DEBUG("Kompute OpMult constructor with params"); @@ -2179,14 +1873,8 @@ class OpMult : public OpAlgoBase #if RELEASE /** - * If release it will be using the static version of the shader which is - * loaded using this file directly. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors + * If RELEASE=1 it will be using the static version of the shader which is + * loaded using this file directly. Otherwise it should not override the function. */ std::vector fetchSpirvBinaryData() override { From 473031d1f3b424c8d8229a3ed14c40e70a1ddeb4 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 20:25:15 +0000 Subject: [PATCH 14/39] Sequence now exposed via shared_ptr instead of weak_ptr and memory release is done through destructor based on the isInit member variable --- src/Manager.cpp | 9 ++++-- src/Sequence.cpp | 10 +++++++ src/include/kompute/Manager.hpp | 50 +++++++++++++++------------------ 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/Manager.cpp b/src/Manager.cpp index ec86b18ed..5c7a2d3be 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -59,7 +59,10 @@ Manager::~Manager() } if (this->mManagedSequences.size()) { - SPDLOG_DEBUG("Releasing managed sequence"); + SPDLOG_DEBUG("Kompute Manager explicitly running destructor for managed sequences"); + for (const std::pair> &sqPair : this->mManagedSequences) { + sqPair.second->~Sequence(); + } this->mManagedSequences.clear(); } @@ -91,7 +94,7 @@ Manager::~Manager() } } -std::weak_ptr +std::shared_ptr Manager::getOrCreateManagedSequence(std::string sequenceName) { SPDLOG_DEBUG("Kompute Manager creating Sequence object"); @@ -106,7 +109,7 @@ Manager::getOrCreateManagedSequence(std::string sequenceName) } } -std::weak_ptr +std::shared_ptr Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex) { diff --git a/src/Sequence.cpp b/src/Sequence.cpp index c4446ff37..0f6eccfd2 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -27,9 +27,15 @@ Sequence::~Sequence() { SPDLOG_DEBUG("Kompute Sequence Destructor started"); + if (!this->mIsInit) { + SPDLOG_WARN("Kompute Sequence destructor called but sequence is not initialized."); + return; + } + if (!this->mDevice) { SPDLOG_ERROR( "Kompute Sequence destructor reached with null Device pointer"); + this->mIsInit = false; return; } @@ -38,6 +44,7 @@ Sequence::~Sequence() if (!this->mCommandBuffer) { SPDLOG_ERROR("Kompute Sequence destructor reached with null " "CommandPool pointer"); + this->mIsInit = false; return; } this->mDevice->freeCommandBuffers( @@ -50,11 +57,14 @@ Sequence::~Sequence() if (this->mCommandPool == nullptr) { SPDLOG_ERROR("Kompute Sequence destructor reached with null " "CommandPool pointer"); + this->mIsInit = false; return; } this->mDevice->destroy(*this->mCommandPool, (vk::Optional)nullptr); SPDLOG_DEBUG("Kompute Sequence Destroyed CommandPool"); } + + this->mIsInit = false; } void diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 32c04535b..98e8e82c5 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -63,9 +63,9 @@ class Manager * * @param sequenceName The name for the named sequence to be retrieved or * created - * @return Weak pointer to the manager owned sequence resource + * @return Shared pointer to the manager owned sequence resource */ - std::weak_ptr getOrCreateManagedSequence( + std::shared_ptr getOrCreateManagedSequence( std::string sequenceName); /** @@ -77,7 +77,7 @@ class Manager * @param queueIndex The queue to use from the available queues * @return Weak pointer to the manager owned sequence resource */ - std::weak_ptr createManagedSequence(std::string sequenceName = "", + std::shared_ptr createManagedSequence(std::string sequenceName = "", uint32_t queueIndex = 0); /** @@ -94,22 +94,21 @@ class Manager TArgs&&... params) { SPDLOG_DEBUG("Kompute Manager evalOp triggered"); - std::weak_ptr sqWeakPtr = + std::shared_ptr sq = this->getOrCreateManagedSequence(sequenceName); - if (std::shared_ptr sq = sqWeakPtr.lock()) { - SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); - sq->begin(); + SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); + sq->begin(); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); - sq->record(tensors, std::forward(params)...); + SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); + sq->record(tensors, std::forward(params)...); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); - sq->end(); + SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); + sq->end(); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); + sq->eval(); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); - sq->eval(); - } SPDLOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS"); } @@ -147,26 +146,21 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered"); - std::weak_ptr sqWeakPtr = + std::shared_ptr sq = this->getOrCreateManagedSequence(sequenceName); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); + sq->begin(); - SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); - sq->begin(); + SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD"); + sq->record(tensors, std::forward(params)...); - SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD"); - sq->record(tensors, std::forward(params)...); + SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence END"); + sq->end(); - SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence END"); - sq->end(); + SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL"); + sq->evalAsync(); - SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL"); - sq->evalAsync(); - } else { - SPDLOG_ERROR("Kompute Manager evalOpAsync sequence [{}] not found", - sequenceName); - } SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS"); } From e2f6e876bc376d029fec0a5cd8993fdf6a00f8ae Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 20:25:53 +0000 Subject: [PATCH 15/39] Updated tests to align with new sequence memory management workflow --- test/TestLogisticRegression.cpp | 8 ++-- test/TestManager.cpp | 30 +++++++-------- test/TestMultipleAlgoExecutions.cpp | 60 ++++++++++++++++------------- test/TestTensor.cpp | 2 +- 4 files changed, 54 insertions(+), 46 deletions(-) diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 9822c08d1..91dd1f430 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -32,14 +32,14 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) kp::Manager mgr; std::shared_ptr sqTensor = - mgr.createManagedSequence().lock(); + mgr.createManagedSequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence().lock(); + std::shared_ptr sq = mgr.createManagedSequence(); // Record op algo base sq->begin(); @@ -115,14 +115,14 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) kp::Manager mgr; std::shared_ptr sqTensor = - mgr.createManagedSequence().lock(); + mgr.createManagedSequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence().lock(); + std::shared_ptr sq = mgr.createManagedSequence(); // Record op algo base sq->begin(); diff --git a/test/TestManager.cpp b/test/TestManager.cpp index 0cb2a78fd..3076b2a62 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -35,9 +35,10 @@ TEST(TestManager, OpMultSequenceFlow) kp::Manager mgr; - std::weak_ptr sqWeakPtr = - mgr.getOrCreateManagedSequence("newSequence"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence"); + sq->begin(); sq->record({ tensorLHS }); @@ -51,7 +52,6 @@ TEST(TestManager, OpMultSequenceFlow) sq->end(); sq->eval(); } - sqWeakPtr.reset(); EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); } @@ -60,22 +60,22 @@ TEST(TestManager, TestMultipleSequences) { kp::Manager mgr; - std::weak_ptr sqWeakPtrOne = + std::shared_ptr sqOne = mgr.getOrCreateManagedSequence("sqOne"); - std::weak_ptr sqWeakPtrTwo = + std::shared_ptr sqTwo = mgr.getOrCreateManagedSequence("sqTwo"); - std::weak_ptr sqWeakPtrOneRef = + std::shared_ptr sqOneRef = mgr.getOrCreateManagedSequence("sqOne"); - std::weak_ptr sqWeakPtrTwoRef = + std::shared_ptr sqTwoRef = mgr.getOrCreateManagedSequence("sqTwo"); - EXPECT_EQ(sqWeakPtrOne.lock(), sqWeakPtrOneRef.lock()); - EXPECT_NE(sqWeakPtrTwo.lock(), sqWeakPtrOneRef.lock()); - EXPECT_EQ(sqWeakPtrTwo.lock(), sqWeakPtrTwoRef.lock()); - EXPECT_NE(sqWeakPtrOneRef.lock(), sqWeakPtrTwoRef.lock()); + EXPECT_EQ(sqOne, sqOneRef); + EXPECT_NE(sqTwo, sqOneRef); + EXPECT_EQ(sqTwo, sqTwoRef); + EXPECT_NE(sqOneRef, sqTwoRef); } TEST(TestManager, TestMultipleTensorsAtOnce) @@ -89,9 +89,10 @@ TEST(TestManager, TestMultipleTensorsAtOnce) kp::Manager mgr; - std::weak_ptr sqWeakPtr = + std::shared_ptr sq = mgr.getOrCreateManagedSequence("newSequence"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + + { sq->begin(); sq->record({ tensorLHS, tensorRHS, tensorOutput }); @@ -107,7 +108,6 @@ TEST(TestManager, TestMultipleTensorsAtOnce) sq->end(); sq->eval(); } - sqWeakPtr.reset(); EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); } diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index a0355416c..f45367313 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -19,9 +19,10 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) pa[index] = pa[index] + 1; })"); - std::weak_ptr sqWeakPtr = + std::shared_ptr sq = mgr.getOrCreateManagedSequence("newSequence"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + + { sq->begin(); sq->record({ tensorA }); @@ -38,7 +39,6 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) sq->end(); sq->eval(); } - sqWeakPtr.reset(); EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); } @@ -58,9 +58,9 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) pa[index] = pa[index] + 1; })"); - std::shared_ptr sqTensor = mgr.createManagedSequence().lock(); + std::shared_ptr sqTensor = mgr.createManagedSequence(); - std::shared_ptr sq = mgr.createManagedSequence().lock(); + std::shared_ptr sq = mgr.createManagedSequence(); // First create the tensor in a separate sequence sqTensor->begin(); @@ -111,9 +111,10 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) pa[index] = pa[index] + 1; })"); - std::weak_ptr sqWeakPtr = - mgr.getOrCreateManagedSequence("newSequence"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence"); + sq->begin(); sq->record({ tensorA }); @@ -125,9 +126,10 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->eval(); } - std::weak_ptr sqWeakPtr2 = - mgr.getOrCreateManagedSequence("newSequence2"); - if (std::shared_ptr sq = sqWeakPtr2.lock()) { + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence2"); + sq->begin(); sq->record( @@ -137,9 +139,10 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->eval(); } - std::weak_ptr sqWeakPtr3 = - mgr.getOrCreateManagedSequence("newSequence3"); - if (std::shared_ptr sq = sqWeakPtr3.lock()) { + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence3"); + sq->begin(); sq->record( @@ -149,9 +152,10 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->eval(); } - std::weak_ptr sqWeakPtr4 = - mgr.getOrCreateManagedSequence("newSequence5"); - if (std::shared_ptr sq = sqWeakPtr4.lock()) { + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence5"); + sq->begin(); sq->record({ tensorA }); @@ -179,9 +183,10 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) pa[index] = pa[index] + 1; })"); - std::weak_ptr sqWeakPtr = - mgr.getOrCreateManagedSequence("newSequence"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence"); + sq->begin(); sq->record({ tensorA }); @@ -190,9 +195,10 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) sq->eval(); } - std::weak_ptr sqWeakPtr2 = - mgr.getOrCreateManagedSequence("newSequence2"); - if (std::shared_ptr sq = sqWeakPtr2.lock()) { + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence2"); + sq->begin(); sq->record( @@ -205,9 +211,11 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) sq->eval(); } - std::weak_ptr sqWeakPtr3 = - mgr.getOrCreateManagedSequence("newSequence3"); - if (std::shared_ptr sq = sqWeakPtr2.lock()) { + + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence3"); + sq->begin(); sq->record({ tensorA }); diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index 676b9f423..42731bcfe 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -24,7 +24,7 @@ TEST(TestTensor, CopyFromHostData) kp::Manager mgr; if (std::shared_ptr sq = - mgr.getOrCreateManagedSequence("new").lock()) { + mgr.getOrCreateManagedSequence("new")) { sq->begin(); sq->record({ tensorA, tensorB }); From ac33cb450a91577624eb73f13829754199ad4d1e Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 20:26:04 +0000 Subject: [PATCH 16/39] Updated tests to align with new sequence memory management workflow --- test/TestOpAlgoLoopsPassingData.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index 9c592e356..35a08e02a 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -30,10 +30,11 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) } )"); - std::weak_ptr sqWeakPtr = - mgr.getOrCreateManagedSequence("default"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("default"); + sq->begin(); sq->record({ tensorA, tensorB }); @@ -43,10 +44,10 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) sq->eval(); } - std::weak_ptr sqWeakPtr2 = - mgr.getOrCreateManagedSequence("run"); + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("run"); - if (std::shared_ptr sq = sqWeakPtr2.lock()) { sq->begin(); sq->record( @@ -61,10 +62,10 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) } } - std::weak_ptr sqWeakPtr3 = - mgr.getOrCreateManagedSequence("export"); + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("export"); - if (std::shared_ptr sq = sqWeakPtr3.lock()) { sq->begin(); sq->record({ tensorA, tensorB }); From 81277aa60ef4c4db9408cdd7b355dd3021264e92 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 20:26:29 +0000 Subject: [PATCH 17/39] Added test to verify memory management via isInit member variable --- test/TestSequence.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index c66dcf43a..882729dcf 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -7,10 +7,10 @@ TEST(TestSequence, CmdBufSequenceBeginEnd) { kp::Manager mgr; - std::weak_ptr sqWeakPtr = - mgr.getOrCreateManagedSequence("newSequence"); + { + std::shared_ptr sq = + mgr.getOrCreateManagedSequence("newSequence"); - if (std::shared_ptr sq = sqWeakPtr.lock()) { EXPECT_TRUE(sq->eval()); EXPECT_TRUE(!sq->isRecording()); EXPECT_TRUE(sq->begin()); @@ -24,3 +24,18 @@ TEST(TestSequence, CmdBufSequenceBeginEnd) EXPECT_TRUE(sq->eval()); } } + +TEST(TestSequence, SequenceDestructorViaManager) +{ + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + sq = mgr.getOrCreateManagedSequence("newSequence"); + + EXPECT_TRUE(sq->isInit()); + } + + EXPECT_FALSE(sq->isInit()); +} From a33f65a90b65d83d4d9f558ca967c0535e4017c3 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 20:26:41 +0000 Subject: [PATCH 18/39] Updated single_include header --- single_include/kompute/Kompute.hpp | 50 +++++++++++++----------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 382b7131d..c417182c2 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1301,9 +1301,9 @@ class Manager * * @param sequenceName The name for the named sequence to be retrieved or * created - * @return Weak pointer to the manager owned sequence resource + * @return Shared pointer to the manager owned sequence resource */ - std::weak_ptr getOrCreateManagedSequence( + std::shared_ptr getOrCreateManagedSequence( std::string sequenceName); /** @@ -1315,7 +1315,7 @@ class Manager * @param queueIndex The queue to use from the available queues * @return Weak pointer to the manager owned sequence resource */ - std::weak_ptr createManagedSequence(std::string sequenceName = "", + std::shared_ptr createManagedSequence(std::string sequenceName = "", uint32_t queueIndex = 0); /** @@ -1332,22 +1332,21 @@ class Manager TArgs&&... params) { SPDLOG_DEBUG("Kompute Manager evalOp triggered"); - std::weak_ptr sqWeakPtr = + std::shared_ptr sq = this->getOrCreateManagedSequence(sequenceName); - if (std::shared_ptr sq = sqWeakPtr.lock()) { - SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); - sq->begin(); + SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); + sq->begin(); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); - sq->record(tensors, std::forward(params)...); + SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); + sq->record(tensors, std::forward(params)...); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); - sq->end(); + SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); + sq->end(); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); + sq->eval(); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); - sq->eval(); - } SPDLOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS"); } @@ -1385,26 +1384,21 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered"); - std::weak_ptr sqWeakPtr = + std::shared_ptr sq = this->getOrCreateManagedSequence(sequenceName); - if (std::shared_ptr sq = sqWeakPtr.lock()) { + SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); + sq->begin(); - SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); - sq->begin(); + SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD"); + sq->record(tensors, std::forward(params)...); - SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD"); - sq->record(tensors, std::forward(params)...); + SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence END"); + sq->end(); - SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence END"); - sq->end(); + SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL"); + sq->evalAsync(); - SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL"); - sq->evalAsync(); - } else { - SPDLOG_ERROR("Kompute Manager evalOpAsync sequence [{}] not found", - sequenceName); - } SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS"); } From 8ce3b669de2cc7c620ea96ad7cb5795012233ba6 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 20:27:02 +0000 Subject: [PATCH 19/39] Added functioning python bindings for Kompute --- python/src/main.cpp | 72 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index e3b7fb371..9573b8074 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -13,7 +13,7 @@ PYBIND11_MODULE(komputepy, m) { .value("eStorage", kp::Tensor::TensorTypes::eStorage) .export_values(); - py::class_(m, "Tensor") + py::class_>(m, "Tensor") .def(py::init( [](const std::vector& data) { return std::unique_ptr(new kp::Tensor(data)); @@ -24,7 +24,75 @@ PYBIND11_MODULE(komputepy, m) { })) .def("data", &kp::Tensor::data); - py::class_(m, "OpBase"); + py::class_>(m, "Sequence") + .def("init", &kp::Sequence::init) + .def("begin", &kp::Sequence::begin) + .def("end", &kp::Sequence::end) + .def("eval", &kp::Sequence::eval) + .def("evalAsync", &kp::Sequence::evalAsync) + .def("evalAwait", &kp::Sequence::evalAwait) + .def("isRunning", &kp::Sequence::isRunning) + .def("isRecording", &kp::Sequence::isRecording) + .def("isInit", &kp::Sequence::isInit) + .def("recordOpTensorCreate", &kp::Sequence::record) + .def("recordOpTensorCopy", &kp::Sequence::record) + .def("recordOpTensorSyncDevice", &kp::Sequence::record) + .def("recordOpTensorSyncLocal", &kp::Sequence::record) + .def("recordOpAlgoMult", &kp::Sequence::record) + .def("recordOpAlgoBaseFile", &kp::Sequence::record) + .def("recordOpAlgoBaseData", &kp::Sequence::record>) + .def("recordOpAlgoLhsRhsOut", &kp::Sequence::record); + + py::class_(m, "Manager") + .def(py::init()) + .def(py::init( + [](uint32_t physicalDeviceIndex) { + return std::unique_ptr(new kp::Manager(physicalDeviceIndex)); + })) + .def(py::init( + [](uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices) { + return std::unique_ptr(new kp::Manager(physicalDeviceIndex, familyQueueIndices)); + })) + .def("getOrCreateManagedSequence", &kp::Manager::getOrCreateManagedSequence) + .def("createManagedSequence", &kp::Manager::createManagedSequence, + py::arg("name"), py::arg("queueIndex") = 0) + .def("buildTensor", &kp::Manager::buildTensor, + py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice) + .def("evalOpAsync", &kp::Manager::evalOpAsync) + .def("evalOpAsyncDefault", &kp::Manager::evalOpAsyncDefault) + .def("evalOpDefaultTensorCreate", &kp::Manager::evalOpDefault) + .def("evalOpDefaultTensorCopy", &kp::Manager::evalOpDefault) + .def("evalOpDefaultTensorSyncDevice", &kp::Manager::evalOpDefault) + .def("evalOpDefaultTensorSyncLocal", &kp::Manager::evalOpDefault) + .def("evalOpDefaultAlgoMult", &kp::Manager::evalOpDefault) + .def("evalOpDefaultAlgoBaseFile", &kp::Manager::evalOpDefault) + .def("evalOpDefaultAlgoBaseData", &kp::Manager::evalOpDefault>) + .def("evalOpDefaultAlgoLhsRhsOut", &kp::Manager::evalOpDefault) + .def("evalOpTensorCreate", &kp::Manager::evalOp) + .def("evalOpTensorCopy", &kp::Manager::evalOp) + .def("evalOpTensorSyncDevice", &kp::Manager::evalOp) + .def("evalOpTensorSyncLocal", &kp::Manager::evalOp) + .def("evalOpAlgoMult", &kp::Manager::evalOp) + .def("evalOpAlgoBaseFile", &kp::Manager::evalOp) + .def("evalOpAlgoBaseData", &kp::Manager::evalOp>) + .def("evalOpAlgoLhsRhsOut", &kp::Manager::evalOp) + .def("evalOpAsyncDefaultTensorCreate", &kp::Manager::evalOpAsyncDefault) + .def("evalOpAsyncDefaultTensorCopy", &kp::Manager::evalOpAsyncDefault) + .def("evalOpAsyncDefaultTensorSyncDevice", &kp::Manager::evalOpAsyncDefault) + .def("evalOpAsyncDefaultTensorSyncLocal", &kp::Manager::evalOpAsyncDefault) + .def("evalOpAsyncDefaultAlgoMult", &kp::Manager::evalOpAsyncDefault) + .def("evalOpAsyncDefaultAlgoBaseFile", &kp::Manager::evalOpAsyncDefault) + .def("evalOpAsyncDefaultAlgoBaseData", &kp::Manager::evalOpAsyncDefault>) + .def("evalOpAsyncDefaultAlgoLhsRhsOut", &kp::Manager::evalOpAsyncDefault) + .def("evalOpAsyncTensorCreate", &kp::Manager::evalOpAsync) + .def("evalOpAsyncTensorCopy", &kp::Manager::evalOpAsync) + .def("evalOpAsyncTensorSyncDevice", &kp::Manager::evalOpAsync) + .def("evalOpAsyncTensorSyncLocal", &kp::Manager::evalOpAsync) + .def("evalOpAsync", &kp::Manager::evalOpAsync) + .def("evalOpAsyncAlgoBaseFile", &kp::Manager::evalOpAsync) + .def("evalOpAsyncAlgoBase", &kp::Manager::evalOpAsync>) + .def("evalOpAsyncAlgoLhsRhsOut", &kp::Manager::evalOpAsync); + #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; From 3036cbd95f448d1139f409327f73ef1b9364721f Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 20:27:17 +0000 Subject: [PATCH 20/39] Added tests for python bindings in python --- python/test/test_kompute.py | 108 ++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 python/test/test_kompute.py diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py new file mode 100644 index 000000000..058e906f2 --- /dev/null +++ b/python/test/test_kompute.py @@ -0,0 +1,108 @@ + +from komputepy import Tensor, Manager, Sequence + +def test_opmult(): + """ + Test basic OpMult operation + """ + + tensor_in_a = Tensor([2, 2, 2]) + tensor_in_b = Tensor([1, 2, 3]) + tensor_out = Tensor([0, 0, 0]) + + mgr = Manager() + + mgr.evalOpDefaultTensorCreate([tensor_in_a, tensor_in_b, tensor_out]) + + mgr.evalOpDefaultAlgoMult([tensor_in_a, tensor_in_b, tensor_out]) + + mgr.evalOpDefaultTensorSyncLocal([tensor_out]) + + assert tensor_out.data() == [2.0, 4.0, 6.0] + +def test_opalgobase_data(): + """ + Test basic OpAlgoBase operation + """ + + tensor_in_a = Tensor([2, 2, 2]) + tensor_in_b = Tensor([1, 2, 3]) + tensor_out = Tensor([0, 0, 0]) + + mgr = Manager() + + shaderData = """ + #version 450 + + layout (local_size_x = 1) in; + + // The input tensors bind index is relative to index in parameter passed + layout(set = 0, binding = 0) buffer bina { float tina[]; }; + layout(set = 0, binding = 1) buffer binb { float tinb[]; }; + layout(set = 0, binding = 2) buffer bout { float tout[]; }; + + void main() { + uint index = gl_GlobalInvocationID.x; + tout[index] = tina[index] * tinb[index]; + } + """ + + mgr.evalOpDefaultTensorCreate([tensor_in_a, tensor_in_b, tensor_out]) + + mgr.evalOpDefaultAlgoBaseData([tensor_in_a, tensor_in_b, tensor_out], list(shaderData)) + + mgr.evalOpDefaultTensorSyncLocal([tensor_out]) + + assert tensor_out.data() == [2.0, 4.0, 6.0] + + +def test_opalgobase_file(): + """ + Test basic OpAlgoBase operation + """ + + tensor_in_a = Tensor([2, 2, 2]) + tensor_in_b = Tensor([1, 2, 3]) + tensor_out = Tensor([0, 0, 0]) + + mgr = Manager() + + shaderFilePath = "../../shaders/glsl/opmult.comp" + + mgr.evalOpDefaultTensorCreate([tensor_in_a, tensor_in_b, tensor_out]) + + mgr.evalOpDefaultAlgoBaseFile([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) + + mgr.evalOpDefaultTensorSyncLocal([tensor_out]) + + assert tensor_out.data() == [2.0, 4.0, 6.0] + +def test_sequence(): + """ + Test basic OpAlgoBase operation + """ + + tensor_in_a = Tensor([2, 2, 2]) + tensor_in_b = Tensor([1, 2, 3]) + tensor_out = Tensor([0, 0, 0]) + + mgr = Manager() + + shaderFilePath = "../../shaders/glsl/opmult.comp" + + mgr.evalOpDefaultTensorCreate([tensor_in_a, tensor_in_b, tensor_out]) + + seq = mgr.createManagedSequence("op") + + seq.begin() + seq.recordOpAlgoBaseFile([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) + seq.end() + + seq.eval() + + mgr.evalOpDefaultTensorSyncLocal([tensor_out]) + + assert tensor_out.data() == [2.0, 4.0, 6.0] + +if __name__ == "__main__": + test_sequence() From 1f614a87e44feaff866d05639734b0be9739b847 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 20:56:03 +0000 Subject: [PATCH 21/39] Reformatted --- single_include/kompute/Kompute.hpp | 5 +- src/Algorithm.cpp | 24 ++++++-- src/Manager.cpp | 12 ++-- src/OpAlgoBase.cpp | 88 ++++++++++++++++------------- src/OpAlgoLhsRhsOut.cpp | 38 ++++++------- src/Sequence.cpp | 10 +++- src/Tensor.cpp | 13 +++-- src/include/kompute/Manager.hpp | 5 +- test/TestLogisticRegression.cpp | 10 ++-- test/TestMultipleAlgoExecutions.cpp | 15 +++-- test/TestOpAlgoLoopsPassingData.cpp | 1 - 11 files changed, 125 insertions(+), 96 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index c417182c2..932375cd4 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1315,8 +1315,9 @@ class Manager * @param queueIndex The queue to use from the available queues * @return Weak pointer to the manager owned sequence resource */ - std::shared_ptr createManagedSequence(std::string sequenceName = "", - uint32_t queueIndex = 0); + std::shared_ptr createManagedSequence( + std::string sequenceName = "", + uint32_t queueIndex = 0); /** * Function that evaluates operation against named sequence. diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 70092a3d6..eb0be22a8 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -34,7 +34,9 @@ Algorithm::~Algorithm() SPDLOG_ERROR("Kompute Algorithm Error requested to destroy " "pipeline but it is null"); } - this->mDevice->destroy(*this->mPipeline, (vk::Optional)nullptr); + this->mDevice->destroy( + *this->mPipeline, + (vk::Optional)nullptr); } if (this->mFreePipelineCache) { @@ -43,7 +45,9 @@ Algorithm::~Algorithm() SPDLOG_ERROR("Kompute Algorithm Error requested to destroy " "pipeline cache but it is null"); } - this->mDevice->destroy(*this->mPipelineCache, (vk::Optional)nullptr); + this->mDevice->destroy( + *this->mPipelineCache, + (vk::Optional)nullptr); } if (this->mFreePipelineLayout) { @@ -52,7 +56,9 @@ Algorithm::~Algorithm() SPDLOG_ERROR("Kompute Algorithm Error requested to destroy " "pipeline layout but it is null"); } - this->mDevice->destroy(*this->mPipelineLayout, (vk::Optional)nullptr); + this->mDevice->destroy( + *this->mPipelineLayout, + (vk::Optional)nullptr); } if (this->mFreeShaderModule) { @@ -61,7 +67,9 @@ Algorithm::~Algorithm() SPDLOG_ERROR("Kompute Algorithm Error requested to destroy shader " "module but it is null"); } - this->mDevice->destroy(*this->mShaderModule, (vk::Optional)nullptr); + this->mDevice->destroy( + *this->mShaderModule, + (vk::Optional)nullptr); } if (this->mFreeDescriptorSet) { @@ -80,7 +88,9 @@ Algorithm::~Algorithm() SPDLOG_ERROR("Kompute Algorithm Error requested to destroy " "descriptor set layout but it is null"); } - this->mDevice->destroy(*this->mDescriptorSetLayout, (vk::Optional)nullptr); + this->mDevice->destroy( + *this->mDescriptorSetLayout, + (vk::Optional)nullptr); } if (this->mFreeDescriptorPool) { @@ -89,7 +99,9 @@ Algorithm::~Algorithm() SPDLOG_ERROR("Kompute Algorithm Error requested to destroy " "descriptor pool but it is null"); } - this->mDevice->destroy(*this->mDescriptorPool, (vk::Optional)nullptr); + this->mDevice->destroy( + *this->mDescriptorPool, + (vk::Optional)nullptr); } } diff --git a/src/Manager.cpp b/src/Manager.cpp index 5c7a2d3be..b763f2eb0 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -59,8 +59,10 @@ Manager::~Manager() } if (this->mManagedSequences.size()) { - SPDLOG_DEBUG("Kompute Manager explicitly running destructor for managed sequences"); - for (const std::pair> &sqPair : this->mManagedSequences) { + SPDLOG_DEBUG("Kompute Manager explicitly running destructor for " + "managed sequences"); + for (const std::pair>& sqPair : + this->mManagedSequences) { sqPair.second->~Sequence(); } this->mManagedSequences.clear(); @@ -68,7 +70,8 @@ Manager::~Manager() if (this->mFreeDevice) { SPDLOG_INFO("Destroying device"); - this->mDevice->destroy((vk::Optional)nullptr); + this->mDevice->destroy( + (vk::Optional)nullptr); SPDLOG_DEBUG("Kompute Manager Destroyed Device"); } @@ -89,7 +92,8 @@ Manager::~Manager() #endif if (this->mFreeInstance) { - this->mInstance->destroy((vk::Optional)nullptr); + this->mInstance->destroy( + (vk::Optional)nullptr); SPDLOG_DEBUG("Kompute Manager Destroyed Instance"); } } diff --git a/src/OpAlgoBase.cpp b/src/OpAlgoBase.cpp index 99e3a9ac1..68e22de3b 100644 --- a/src/OpAlgoBase.cpp +++ b/src/OpAlgoBase.cpp @@ -10,13 +10,14 @@ OpAlgoBase::OpAlgoBase() } OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - KomputeWorkgroup komputeWorkgroup) + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + KomputeWorkgroup komputeWorkgroup) : OpBase(physicalDevice, device, commandBuffer, tensors, false) { - SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); + SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", + tensors.size()); // The dispatch size is set up based on either explicitly provided template // parameters or by default it would take the shape and size of the tensors @@ -29,38 +30,42 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, komputeWorkgroup.z > 0 ? komputeWorkgroup.z : 1 }; } else { - this->mKomputeWorkgroup = {tensors[0]->size(), 1, 1}; + this->mKomputeWorkgroup = { tensors[0]->size(), 1, 1 }; } SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}", - this->mKomputeWorkgroup.x, - this->mKomputeWorkgroup.y, - this->mKomputeWorkgroup.z); + this->mKomputeWorkgroup.x, + this->mKomputeWorkgroup.y, + this->mKomputeWorkgroup.z); this->mAlgorithm = std::make_shared(device, commandBuffer); } OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - std::string shaderFilePath, - KomputeWorkgroup komputeWorkgroup) + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + std::string shaderFilePath, + KomputeWorkgroup komputeWorkgroup) : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) { - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); + SPDLOG_DEBUG( + "Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", + shaderFilePath); this->mShaderFilePath = shaderFilePath; } OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const std::vector& shaderDataRaw, - KomputeWorkgroup komputeWorkgroup) + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + const std::vector& shaderDataRaw, + KomputeWorkgroup komputeWorkgroup) : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) { - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); + SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw " + "data length: {}", + shaderDataRaw.size()); this->mShaderDataRaw = shaderDataRaw; } @@ -78,11 +83,13 @@ OpAlgoBase::init() if (this->mTensors.size() < 1) { throw std::runtime_error( "Kompute OpAlgoBase called with less than 1 tensor"); - } + } for (std::shared_ptr tensor : this->mTensors) { - if(!tensor->isInit()) { - throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised."); + if (!tensor->isInit()) { + throw std::runtime_error( + "Kompute OpAlgoBase validation failed; all tensor parameters " + "must be initialised."); } } @@ -110,7 +117,9 @@ OpAlgoBase::record() vk::PipelineStageFlagBits::eComputeShader); } - this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup.x, this->mKomputeWorkgroup.y, this->mKomputeWorkgroup.z); + this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup.x, + this->mKomputeWorkgroup.y, + this->mKomputeWorkgroup.z); } void @@ -125,17 +134,19 @@ OpAlgoBase::postEval() SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); } -std::vector OpAlgoBase::fetchSpirvBinaryData() +std::vector +OpAlgoBase::fetchSpirvBinaryData() { - SPDLOG_WARN( - "Kompute OpAlgoBase Running shaders directly from spirv file"); + SPDLOG_WARN("Kompute OpAlgoBase Running shaders directly from spirv file"); if (this->mShaderFilePath.size()) { std::ifstream fileStream(this->mShaderFilePath, - std::ios::binary | std::ios::in | std::ios::ate); + std::ios::binary | std::ios::in | + std::ios::ate); if (!fileStream.good()) { - throw std::runtime_error("Error reading file: " + this->mShaderFilePath); + throw std::runtime_error("Error reading file: " + + this->mShaderFilePath); } size_t shaderFileSize = fileStream.tellg(); @@ -144,19 +155,16 @@ std::vector OpAlgoBase::fetchSpirvBinaryData() fileStream.read(shaderDataRaw, shaderFileSize); fileStream.close(); - SPDLOG_WARN( - "Kompute OpAlgoBase fetched {} bytes", shaderFileSize); + SPDLOG_WARN("Kompute OpAlgoBase fetched {} bytes", shaderFileSize); - return std::vector(shaderDataRaw, - shaderDataRaw + shaderFileSize); - } - else if (this->mShaderDataRaw.size()) { + return std::vector(shaderDataRaw, shaderDataRaw + shaderFileSize); + } else if (this->mShaderDataRaw.size()) { return this->mShaderDataRaw; - } - else { - throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided"); + } else { + throw std::runtime_error( + "Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither " + "filepath nor data provided"); } } } - diff --git a/src/OpAlgoLhsRhsOut.cpp b/src/OpAlgoLhsRhsOut.cpp index 444ec63a3..ab759fed8 100644 --- a/src/OpAlgoLhsRhsOut.cpp +++ b/src/OpAlgoLhsRhsOut.cpp @@ -9,13 +9,14 @@ OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); } -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors, - KomputeWorkgroup komputeWorkgroup) +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut( + std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup) // The inheritance is initialised with the copyOutputData to false given that - // this depencendant class handles the transfer of data via staging buffers in + // this depencendant class handles the transfer of data via staging buffers in // a granular way. : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) { @@ -36,18 +37,19 @@ OpAlgoLhsRhsOut::init() throw std::runtime_error( "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); } else if (this->mTensors.size() > 3) { - SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); + SPDLOG_WARN( + "Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); } this->mTensorLHS = this->mTensors[0]; this->mTensorRHS = this->mTensors[1]; this->mTensorOutput = this->mTensors[2]; - if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && this->mTensorOutput->isInit())) { throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + + "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. " + "LHS: " + std::to_string(this->mTensorLHS->isInit()) + " RHS: " + std::to_string(this->mTensorRHS->isInit()) + " Output: " + std::to_string(this->mTensorOutput->isInit())); @@ -56,7 +58,8 @@ OpAlgoLhsRhsOut::init() if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && this->mTensorRHS->size() == this->mTensorOutput->size())) { throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + + "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size " + "LHS: " + std::to_string(this->mTensorLHS->size()) + " RHS: " + std::to_string(this->mTensorRHS->size()) + " Output: " + std::to_string(this->mTensorOutput->size())); @@ -65,8 +68,7 @@ OpAlgoLhsRhsOut::init() this->mTensorOutputStaging = std::make_shared( this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); - this->mTensorOutputStaging->init( - this->mPhysicalDevice, this->mDevice); + this->mTensorOutputStaging->init(this->mPhysicalDevice, this->mDevice); SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); @@ -96,10 +98,9 @@ OpAlgoLhsRhsOut::record() vk::PipelineStageFlagBits::eHost, vk::PipelineStageFlagBits::eComputeShader); - this->mAlgorithm->recordDispatch( - this->mKomputeWorkgroup.x, - this->mKomputeWorkgroup.y, - this->mKomputeWorkgroup.z); + this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup.x, + this->mKomputeWorkgroup.y, + this->mKomputeWorkgroup.z); // Barrier to ensure the shader code is executed before buffer read this->mTensorOutput->recordBufferMemoryBarrier( @@ -110,9 +111,7 @@ OpAlgoLhsRhsOut::record() vk::PipelineStageFlagBits::eTransfer); this->mTensorOutputStaging->recordCopyFrom( - this->mCommandBuffer, - this->mTensorOutput, - true); + this->mCommandBuffer, this->mTensorOutput, true); } void @@ -126,4 +125,3 @@ OpAlgoLhsRhsOut::postEval() } } - diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 0f6eccfd2..b27c547be 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -28,7 +28,8 @@ Sequence::~Sequence() SPDLOG_DEBUG("Kompute Sequence Destructor started"); if (!this->mIsInit) { - SPDLOG_WARN("Kompute Sequence destructor called but sequence is not initialized."); + SPDLOG_WARN("Kompute Sequence destructor called but sequence is not " + "initialized."); return; } @@ -60,7 +61,9 @@ Sequence::~Sequence() this->mIsInit = false; return; } - this->mDevice->destroy(*this->mCommandPool, (vk::Optional)nullptr); + this->mDevice->destroy( + *this->mCommandPool, + (vk::Optional)nullptr); SPDLOG_DEBUG("Kompute Sequence Destroyed CommandPool"); } @@ -196,7 +199,8 @@ Sequence::evalAwait(uint64_t waitFor) vk::Result result = this->mDevice->waitForFences(1, &this->mFence, VK_TRUE, waitFor); - this->mDevice->destroy(this->mFence, (vk::Optional)nullptr); + this->mDevice->destroy( + this->mFence, (vk::Optional)nullptr); this->mIsRunning = false; diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 299622ee4..214ac2eb0 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -12,8 +12,9 @@ Tensor::Tensor() Tensor::Tensor(const std::vector& data, TensorTypes tensorType) { #if DEBUG - SPDLOG_DEBUG( - "Kompute Tensor constructor data length: {}, and type: {}", data.size(), tensorType); + SPDLOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}", + data.size(), + tensorType); #endif this->mData = data; @@ -350,7 +351,9 @@ Tensor::freeMemoryDestroyGPUResources() "Kompose Tensor expected to free buffer but got null buffer"); } else { SPDLOG_DEBUG("Kompose Tensor destroying buffer"); - this->mDevice->destroy(*this->mBuffer, (vk::Optional)nullptr); + this->mDevice->destroy( + *this->mBuffer, + (vk::Optional)nullptr); this->mBuffer = nullptr; } } @@ -361,7 +364,9 @@ Tensor::freeMemoryDestroyGPUResources() "Kompose Tensor expected to free buffer but got null memory"); } else { SPDLOG_DEBUG("Kompose Tensor freeing memory"); - this->mDevice->freeMemory(*this->mMemory, (vk::Optional)nullptr); + this->mDevice->freeMemory( + *this->mMemory, + (vk::Optional)nullptr); this->mDevice = nullptr; } } diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 98e8e82c5..8c689ba57 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -77,8 +77,9 @@ class Manager * @param queueIndex The queue to use from the available queues * @return Weak pointer to the manager owned sequence resource */ - std::shared_ptr createManagedSequence(std::string sequenceName = "", - uint32_t queueIndex = 0); + std::shared_ptr createManagedSequence( + std::string sequenceName = "", + uint32_t queueIndex = 0); /** * Function that evaluates operation against named sequence. diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 91dd1f430..eda6ca635 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -31,8 +31,7 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) { kp::Manager mgr; - std::shared_ptr sqTensor = - mgr.createManagedSequence(); + std::shared_ptr sqTensor = mgr.createManagedSequence(); sqTensor->begin(); sqTensor->record(params); @@ -76,7 +75,7 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) EXPECT_LT(bIn->data()[0], 0.0); EXPECT_LT(bIn->data()[0], 0.0); - //SPDLOG_WARN("Result wIn: {}, bIn: {}, loss: {}", + // SPDLOG_WARN("Result wIn: {}, bIn: {}, loss: {}", // wIn->data(), // bIn->data(), // lOut->data()); @@ -114,8 +113,7 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) { kp::Manager mgr; - std::shared_ptr sqTensor = - mgr.createManagedSequence(); + std::shared_ptr sqTensor = mgr.createManagedSequence(); sqTensor->begin(); sqTensor->record(params); @@ -158,7 +156,7 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) EXPECT_GT(wIn->data()[1], 1.0); EXPECT_LT(bIn->data()[0], 0.0); - //SPDLOG_WARN("Result wIn: {}, bIn: {}, loss: {}", + // SPDLOG_WARN("Result wIn: {}, bIn: {}, loss: {}", // wIn->data(), // bIn->data(), // lOut->data()); diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index f45367313..11e94caa4 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -70,20 +70,20 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) // Then perform the computations sq->begin(); - sq->record( - { tensorA }, std::vector(shader.begin(), shader.end())); + sq->record({ tensorA }, + std::vector(shader.begin(), shader.end())); sq->end(); sq->eval(); sq->begin(); - sq->record( - { tensorA }, std::vector(shader.begin(), shader.end())); + sq->record({ tensorA }, + std::vector(shader.begin(), shader.end())); sq->end(); sq->eval(); sq->begin(); - sq->record( - { tensorA }, std::vector(shader.begin(), shader.end())); + sq->record({ tensorA }, + std::vector(shader.begin(), shader.end())); sq->end(); sq->eval(); @@ -112,7 +112,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) })"); { - std::shared_ptr sq = + std::shared_ptr sq = mgr.getOrCreateManagedSequence("newSequence"); sq->begin(); @@ -211,7 +211,6 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) sq->eval(); } - { std::shared_ptr sq = mgr.getOrCreateManagedSequence("newSequence3"); diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index 35a08e02a..bd7727790 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -30,7 +30,6 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) } )"); - { std::shared_ptr sq = mgr.getOrCreateManagedSequence("default"); From 96cd1e3c92fefedba7d2b57b51076e052bed1a04 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 21:24:20 +0000 Subject: [PATCH 22/39] Updated function names --- python/src/main.cpp | 125 +++++++++++++++++++++++++------------------- 1 file changed, 71 insertions(+), 54 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 9573b8074..3cc3e214e 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -8,9 +8,9 @@ namespace py = pybind11; PYBIND11_MODULE(komputepy, m) { py::enum_(m, "TensorTypes") - .value("eDevice", kp::Tensor::TensorTypes::eDevice) - .value("eStaging", kp::Tensor::TensorTypes::eStaging) - .value("eStorage", kp::Tensor::TensorTypes::eStorage) + .value("device", kp::Tensor::TensorTypes::eDevice) + .value("staging", kp::Tensor::TensorTypes::eStaging) + .value("storage", kp::Tensor::TensorTypes::eStorage) .export_values(); py::class_>(m, "Tensor") @@ -22,26 +22,36 @@ PYBIND11_MODULE(komputepy, m) { [](const std::vector& data, kp::Tensor::TensorTypes tensorTypes) { return std::unique_ptr(new kp::Tensor(data, tensorTypes)); })) - .def("data", &kp::Tensor::data); + .def("data", &kp::Tensor::data) + .def("size", &kp::Tensor::size) + .def("tensor_type", &kp::Tensor::tensorType) + .def("is_init", &kp::Tensor::isInit) + .def("set_data", &kp::Tensor::setData) + .def("map_data_from_host", &kp::Tensor::mapDataFromHostMemory) + .def("map_data_into_host", &kp::Tensor::mapDataIntoHostMemory); py::class_>(m, "Sequence") .def("init", &kp::Sequence::init) + // record .def("begin", &kp::Sequence::begin) .def("end", &kp::Sequence::end) + // eval .def("eval", &kp::Sequence::eval) - .def("evalAsync", &kp::Sequence::evalAsync) - .def("evalAwait", &kp::Sequence::evalAwait) - .def("isRunning", &kp::Sequence::isRunning) - .def("isRecording", &kp::Sequence::isRecording) - .def("isInit", &kp::Sequence::isInit) - .def("recordOpTensorCreate", &kp::Sequence::record) - .def("recordOpTensorCopy", &kp::Sequence::record) - .def("recordOpTensorSyncDevice", &kp::Sequence::record) - .def("recordOpTensorSyncLocal", &kp::Sequence::record) - .def("recordOpAlgoMult", &kp::Sequence::record) - .def("recordOpAlgoBaseFile", &kp::Sequence::record) - .def("recordOpAlgoBaseData", &kp::Sequence::record>) - .def("recordOpAlgoLhsRhsOut", &kp::Sequence::record); + .def("eval_async", &kp::Sequence::evalAsync) + .def("eval_await", &kp::Sequence::evalAwait) + // status + .def("is_running", &kp::Sequence::isRunning) + .def("is_rec", &kp::Sequence::isRecording) + .def("is_init", &kp::Sequence::isInit) + // record + .def("record_tensor_create", &kp::Sequence::record) + .def("record_tensor_copy", &kp::Sequence::record) + .def("record_tensor_sync_device", &kp::Sequence::record) + .def("record_tensor_sync_local", &kp::Sequence::record) + .def("record_algo_mult", &kp::Sequence::record) + .def("record_algo_file", &kp::Sequence::record) + .def("record_algo_data", &kp::Sequence::record>) + .def("record_algo_lro", &kp::Sequence::record); py::class_(m, "Manager") .def(py::init()) @@ -53,45 +63,52 @@ PYBIND11_MODULE(komputepy, m) { [](uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices) { return std::unique_ptr(new kp::Manager(physicalDeviceIndex, familyQueueIndices)); })) - .def("getOrCreateManagedSequence", &kp::Manager::getOrCreateManagedSequence) - .def("createManagedSequence", &kp::Manager::createManagedSequence, + .def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence) + .def("create_sequence", &kp::Manager::createManagedSequence, py::arg("name"), py::arg("queueIndex") = 0) - .def("buildTensor", &kp::Manager::buildTensor, + .def("build_tensor", &kp::Manager::buildTensor, py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice) - .def("evalOpAsync", &kp::Manager::evalOpAsync) - .def("evalOpAsyncDefault", &kp::Manager::evalOpAsyncDefault) - .def("evalOpDefaultTensorCreate", &kp::Manager::evalOpDefault) - .def("evalOpDefaultTensorCopy", &kp::Manager::evalOpDefault) - .def("evalOpDefaultTensorSyncDevice", &kp::Manager::evalOpDefault) - .def("evalOpDefaultTensorSyncLocal", &kp::Manager::evalOpDefault) - .def("evalOpDefaultAlgoMult", &kp::Manager::evalOpDefault) - .def("evalOpDefaultAlgoBaseFile", &kp::Manager::evalOpDefault) - .def("evalOpDefaultAlgoBaseData", &kp::Manager::evalOpDefault>) - .def("evalOpDefaultAlgoLhsRhsOut", &kp::Manager::evalOpDefault) - .def("evalOpTensorCreate", &kp::Manager::evalOp) - .def("evalOpTensorCopy", &kp::Manager::evalOp) - .def("evalOpTensorSyncDevice", &kp::Manager::evalOp) - .def("evalOpTensorSyncLocal", &kp::Manager::evalOp) - .def("evalOpAlgoMult", &kp::Manager::evalOp) - .def("evalOpAlgoBaseFile", &kp::Manager::evalOp) - .def("evalOpAlgoBaseData", &kp::Manager::evalOp>) - .def("evalOpAlgoLhsRhsOut", &kp::Manager::evalOp) - .def("evalOpAsyncDefaultTensorCreate", &kp::Manager::evalOpAsyncDefault) - .def("evalOpAsyncDefaultTensorCopy", &kp::Manager::evalOpAsyncDefault) - .def("evalOpAsyncDefaultTensorSyncDevice", &kp::Manager::evalOpAsyncDefault) - .def("evalOpAsyncDefaultTensorSyncLocal", &kp::Manager::evalOpAsyncDefault) - .def("evalOpAsyncDefaultAlgoMult", &kp::Manager::evalOpAsyncDefault) - .def("evalOpAsyncDefaultAlgoBaseFile", &kp::Manager::evalOpAsyncDefault) - .def("evalOpAsyncDefaultAlgoBaseData", &kp::Manager::evalOpAsyncDefault>) - .def("evalOpAsyncDefaultAlgoLhsRhsOut", &kp::Manager::evalOpAsyncDefault) - .def("evalOpAsyncTensorCreate", &kp::Manager::evalOpAsync) - .def("evalOpAsyncTensorCopy", &kp::Manager::evalOpAsync) - .def("evalOpAsyncTensorSyncDevice", &kp::Manager::evalOpAsync) - .def("evalOpAsyncTensorSyncLocal", &kp::Manager::evalOpAsync) - .def("evalOpAsync", &kp::Manager::evalOpAsync) - .def("evalOpAsyncAlgoBaseFile", &kp::Manager::evalOpAsync) - .def("evalOpAsyncAlgoBase", &kp::Manager::evalOpAsync>) - .def("evalOpAsyncAlgoLhsRhsOut", &kp::Manager::evalOpAsync); + // Await functions + .def("eval_await", &kp::Manager::evalOpAwait, + py::arg("sequenceName"), py::arg("waitFor") = UINT64_MAX) + .def("eval_await_def", &kp::Manager::evalOpAwaitDefault, + py::arg("waitFor") = UINT64_MAX) + // eval default + .def("eval_tensor_create_def", &kp::Manager::evalOpDefault) + .def("eval_tensor_copy_def", &kp::Manager::evalOpDefault) + .def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault) + .def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault) + .def("eval_algo_mult_def", &kp::Manager::evalOpDefault) + .def("eval_algo_file_def", &kp::Manager::evalOpDefault) + .def("eval_algo_data_def", &kp::Manager::evalOpDefault>) + .def("eval_algo_lro_def", &kp::Manager::evalOpDefault) + // eval + .def("eval_tensor_create", &kp::Manager::evalOp) + .def("eval_tensor_copy", &kp::Manager::evalOp) + .def("eval_tensor_sync_device", &kp::Manager::evalOp) + .def("eval_tensor_sync_local", &kp::Manager::evalOp) + .def("eval_algo_mult", &kp::Manager::evalOp) + .def("eval_algo_file", &kp::Manager::evalOp) + .def("eval_algo_data", &kp::Manager::evalOp>) + .def("eval_algo_lro", &kp::Manager::evalOp) + // eval async default + .def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault) + .def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault) + .def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault) + .def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault) + .def("eval_async_algo_mult_def", &kp::Manager::evalOpAsyncDefault) + .def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault) + .def("eval_async_algo_data_def", &kp::Manager::evalOpAsyncDefault>) + .def("eval_async_algo_lro_def", &kp::Manager::evalOpAsyncDefault) + // eval async + .def("eval_tensor_create", &kp::Manager::evalOpAsync) + .def("eval_tensor_copy", &kp::Manager::evalOpAsync) + .def("eval_tensor_sync_device", &kp::Manager::evalOpAsync) + .def("eval_tensor_sync_local", &kp::Manager::evalOpAsync) + .def("eval_algo_mult", &kp::Manager::evalOpAsync) + .def("eval_algo_file", &kp::Manager::evalOpAsync) + .def("eval_algo_data", &kp::Manager::evalOpAsync>) + .def("eval_algo_lro", &kp::Manager::evalOpAsync); #ifdef VERSION_INFO From 85b39baf1944599b0d386bed077a4797e63abc7e Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 21:24:28 +0000 Subject: [PATCH 23/39] Updated tests --- python/test/test_kompute.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 058e906f2..7b85de47b 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -12,11 +12,11 @@ def test_opmult(): mgr = Manager() - mgr.evalOpDefaultTensorCreate([tensor_in_a, tensor_in_b, tensor_out]) + mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) - mgr.evalOpDefaultAlgoMult([tensor_in_a, tensor_in_b, tensor_out]) + mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out]) - mgr.evalOpDefaultTensorSyncLocal([tensor_out]) + mgr.eval_tensor_sync_local_def([tensor_out]) assert tensor_out.data() == [2.0, 4.0, 6.0] @@ -47,11 +47,11 @@ def test_opalgobase_data(): } """ - mgr.evalOpDefaultTensorCreate([tensor_in_a, tensor_in_b, tensor_out]) + mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) - mgr.evalOpDefaultAlgoBaseData([tensor_in_a, tensor_in_b, tensor_out], list(shaderData)) + mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderData)) - mgr.evalOpDefaultTensorSyncLocal([tensor_out]) + mgr.eval_tensor_sync_local_def([tensor_out]) assert tensor_out.data() == [2.0, 4.0, 6.0] @@ -69,11 +69,11 @@ def test_opalgobase_file(): shaderFilePath = "../../shaders/glsl/opmult.comp" - mgr.evalOpDefaultTensorCreate([tensor_in_a, tensor_in_b, tensor_out]) + mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) - mgr.evalOpDefaultAlgoBaseFile([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) + mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) - mgr.evalOpDefaultTensorSyncLocal([tensor_out]) + mgr.eval_tensor_sync_local_def([tensor_out]) assert tensor_out.data() == [2.0, 4.0, 6.0] @@ -82,26 +82,28 @@ def test_sequence(): Test basic OpAlgoBase operation """ + mgr = Manager(0, [2]) + tensor_in_a = Tensor([2, 2, 2]) tensor_in_b = Tensor([1, 2, 3]) tensor_out = Tensor([0, 0, 0]) - mgr = Manager() + mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + + seq = mgr.create_sequence("op") shaderFilePath = "../../shaders/glsl/opmult.comp" - - mgr.evalOpDefaultTensorCreate([tensor_in_a, tensor_in_b, tensor_out]) - - seq = mgr.createManagedSequence("op") + mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) + mgr.eval_await_def() seq.begin() - seq.recordOpAlgoBaseFile([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) + seq.record_tensor_sync_local([tensor_in_a]) + seq.record_tensor_sync_local([tensor_in_b]) + seq.record_tensor_sync_local([tensor_out]) seq.end() seq.eval() - mgr.evalOpDefaultTensorSyncLocal([tensor_out]) - assert tensor_out.data() == [2.0, 4.0, 6.0] if __name__ == "__main__": From 02406d46ca43d90be8c8a45cb04387533abcb92d Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 21:24:39 +0000 Subject: [PATCH 24/39] Updated readme to reflect python example --- README.md | 85 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2ff7e0f9a..f75f466bc 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@

Vulkan Kompute

-

The General Purpose Vulkan Compute Framework.

+

The General Purpose Vulkan Compute Framework for C++ and Python.

@@ -29,10 +29,10 @@ ## Principles & Features -* [Single header](#setup) library for simple import to your project -* [Documentation](https://kompute.cc) leveraging doxygen and sphinx -* [Asynchronous & parallel processing](#asynchronous-and-parallel-operations) capabilities with multi-queue command submission -* [Non-Vulkan naming conventions](#architectural-overview) to disambiguate Vulkan vs Kompute components +* [Single header](#setup) for simple import with flexible build-system configuration +* Multi-language support with C++ as core SDK as well as [optimized Python bindings](#python-package) +* [Asynchronous & parallel processing](#asynchronous-and-parallel-operations) support through GPU family queues +* [Mobile enabled](#mobile-enabled) with examples in Android studio across several architectures * BYOV: [Bring-your-own-Vulkan design](#motivations) to play nice with existing Vulkan applications * Explicit relationships for GPU and host [memory ownership and memory management](https://kompute.cc/overview/memory-management.html) * [Short code examples](#simple-examples) showing the core features @@ -118,7 +118,7 @@ int main() { mgr.evalOpAwaitDefault(); // 5. Create managed sequence to submit batch operations to the CPU - std::shared_ptr sq = mgr.getOrCreateManagedSequence("seq").lock(); + std::shared_ptr sq = mgr.getOrCreateManagedSequence("seq"); // 5.1. Explicitly begin recording batch commands sq->begin(); @@ -255,13 +255,69 @@ You can also access the Date: Mon, 2 Nov 2020 21:47:05 +0000 Subject: [PATCH 25/39] Updated documentaion to include python --- docs/conf.py | 4 +++ docs/index.rst | 5 ++-- docs/overview/python-package.rst | 44 ++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 docs/overview/python-package.rst diff --git a/docs/conf.py b/docs/conf.py index 2c6eb74e0..2daab8833 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,6 +16,9 @@ # -- Project information ----------------------------------------------------- +import sys +import os +import komputepy project = 'Vulkan Kompute' copyright = '2020, The Institute for Ethical AI & Machine Learning' @@ -31,6 +34,7 @@ release = '0.4.0' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + "sphinx.ext.autodoc", # Creates .nojekyll config 'sphinx.ext.githubpages', # Integrates with doxygen diff --git a/docs/index.rst b/docs/index.rst index 60d01f21b..57f1a1271 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,13 +11,14 @@ Index :maxdepth: 2 :titlesonly: - Class Documentation and C++ Reference - Advanced Examples + Simple & Advanced Examples Asynchronous & Parallel Operations Memory Management Principles Build System Deep Dive Converting GLSL/HLSL Shaders to C++ Headers Mobile App Integration (Android) Game Engine Integration (Godot Engine) + Python Class Documentation & Reference + C++ Class Documentation & Reference Code Index diff --git a/docs/overview/python-package.rst b/docs/overview/python-package.rst new file mode 100644 index 000000000..92dc8ca38 --- /dev/null +++ b/docs/overview/python-package.rst @@ -0,0 +1,44 @@ + + +Python Class Documentation & Reference +======== + +This section provides a breakdown of the Python classes and what each of their functions provide. +Below is a diagram that provides insights on the relationship between Vulkan Kompute objects and Vulkan resources, which primarily encompass ownership of either CPU and/or GPU memory. + +.. image:: ../images/kompute-architecture.jpg + :width: 70% + +Manager +------- + +The Kompute Manager provides a high level interface to simplify interaction with underlying :class:`komputepy.Sequence` of Operations. + +.. autoclass:: komputepy.Manager + :members: + + +Sequence +------- + +The Kompute Sequence consists of batches of Kompute Operations, which are executed on a respective GPU queue. The execution of sequences can be synchronous or asynchronous, and it can be coordinated through its respective Vulkan Fence. + +.. autoclass:: komputepy.Sequence + :members: + + +Tensor +------- + +The Kompute Tensor is the atomic unit in Kompute, and it is used primarily for handling Host and GPU Device data. + +.. autoclass:: komputepy.Tensor + :members: + + +TensorType +------- + +.. automodule:: komputepy + :members: + From b23e04e1a497d3afc3e5506f5c775c613bbd40c6 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 2 Nov 2020 21:47:26 +0000 Subject: [PATCH 26/39] Added documentation to reference python --- python/src/main.cpp | 185 +++++++++++++++++++++++++++----------------- 1 file changed, 113 insertions(+), 72 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 3cc3e214e..34a0e6bbc 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -7,109 +7,150 @@ namespace py = pybind11; PYBIND11_MODULE(komputepy, m) { - py::enum_(m, "TensorTypes") - .value("device", kp::Tensor::TensorTypes::eDevice) - .value("staging", kp::Tensor::TensorTypes::eStaging) - .value("storage", kp::Tensor::TensorTypes::eStorage) + py::enum_(m, "TensorTypes", "Enum with GPU memory types for Tensor.") + .value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.") + .value("staging", kp::Tensor::TensorTypes::eStaging, "Tensor used for transfer of data to device.") + .value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.") .export_values(); - py::class_>(m, "Tensor") + py::class_>(m, "Tensor", "Structured data used in GPU operations.") .def(py::init( [](const std::vector& data) { return std::unique_ptr(new kp::Tensor(data)); - })) + }), "Initialiser with only list of data components.") .def(py::init( [](const std::vector& data, kp::Tensor::TensorTypes tensorTypes) { return std::unique_ptr(new kp::Tensor(data, tensorTypes)); - })) - .def("data", &kp::Tensor::data) - .def("size", &kp::Tensor::size) - .def("tensor_type", &kp::Tensor::tensorType) - .def("is_init", &kp::Tensor::isInit) - .def("set_data", &kp::Tensor::setData) - .def("map_data_from_host", &kp::Tensor::mapDataFromHostMemory) - .def("map_data_into_host", &kp::Tensor::mapDataIntoHostMemory); + }), "Initialiser with list of data components and tensor GPU memory type.") + .def("data", &kp::Tensor::data, "Retrieves the data as a list containing the local Tensor memory data.") + .def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.") + .def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.") + .def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.") + .def("set_data", &kp::Tensor::setData, "Overrides the data in the local Tensor memory.") + .def("map_data_from_host", &kp::Tensor::mapDataFromHostMemory, "Maps data into GPU memory from tensor local data.") + .def("map_data_into_host", &kp::Tensor::mapDataIntoHostMemory, "Maps data from GPU memory into tensor local data."); py::class_>(m, "Sequence") - .def("init", &kp::Sequence::init) + .def("init", &kp::Sequence::init, "Initialises Vulkan resources within sequence using provided device.") // record - .def("begin", &kp::Sequence::begin) - .def("end", &kp::Sequence::end) + .def("begin", &kp::Sequence::begin, "Clears previous commands and starts recording commands in sequence which can be run in batch.") + .def("end", &kp::Sequence::end, "Stops listening and recording for new commands.") // eval - .def("eval", &kp::Sequence::eval) - .def("eval_async", &kp::Sequence::evalAsync) - .def("eval_await", &kp::Sequence::evalAwait) + .def("eval", &kp::Sequence::eval, "Executes the currently recorded commands synchronously by waiting on Vulkan Fence.") + .def("eval_async", &kp::Sequence::evalAsync, "Executes the currently recorded commands asynchronously.") + .def("eval_await", &kp::Sequence::evalAwait, "Waits until the execution finishes using Vulkan Fence.") // status - .def("is_running", &kp::Sequence::isRunning) - .def("is_rec", &kp::Sequence::isRecording) - .def("is_init", &kp::Sequence::isInit) + .def("is_running", &kp::Sequence::isRunning, "Checks whether the Sequence operations are currently still executing.") + .def("is_rec", &kp::Sequence::isRecording, "Checks whether the Sequence is currently in recording mode.") + .def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized") // record - .def("record_tensor_create", &kp::Sequence::record) - .def("record_tensor_copy", &kp::Sequence::record) - .def("record_tensor_sync_device", &kp::Sequence::record) - .def("record_tensor_sync_local", &kp::Sequence::record) - .def("record_algo_mult", &kp::Sequence::record) - .def("record_algo_file", &kp::Sequence::record) - .def("record_algo_data", &kp::Sequence::record>) - .def("record_algo_lro", &kp::Sequence::record); + .def("record_tensor_create", &kp::Sequence::record, + "Records operation to create and initialise tensor GPU memory and buffer") + .def("record_tensor_copy", &kp::Sequence::record, + "Records operation to copy one tensor to one or many tensors") + .def("record_tensor_sync_device", &kp::Sequence::record, + "Records operation to sync tensor from local memory to GPU memory") + .def("record_tensor_sync_local", &kp::Sequence::record, + "Records operation to sync tensor(s) from GPU memory to local memory using staging tensors") + .def("record_algo_mult", &kp::Sequence::record, + "Records operation to run multiplication compute shader to two input tensors and an output tensor") + .def("record_algo_file", &kp::Sequence::record, + "Records an operation using a custom shader provided from a shader path") + .def("record_algo_data", &kp::Sequence::record>, + "Records an operation using a custom shader provided as raw string or spirv bytes") + .def("record_algo_lro", &kp::Sequence::record, + "Records operation to run left right out operation with custom shader"); py::class_(m, "Manager") - .def(py::init()) + .def(py::init(), "Default initializer uses device 0 and first compute compatible GPU queueFamily") .def(py::init( [](uint32_t physicalDeviceIndex) { return std::unique_ptr(new kp::Manager(physicalDeviceIndex)); - })) + }), "Manager initialiser can provide specified device index but will use first compute compatible GPU queueFamily") .def(py::init( [](uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices) { return std::unique_ptr(new kp::Manager(physicalDeviceIndex, familyQueueIndices)); - })) - .def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence) + }), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.") + .def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name") .def("create_sequence", &kp::Manager::createManagedSequence, - py::arg("name"), py::arg("queueIndex") = 0) + py::arg("name"), py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues") .def("build_tensor", &kp::Manager::buildTensor, - py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice) + py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, + "Build and initialise tensor") // Await functions .def("eval_await", &kp::Manager::evalOpAwait, - py::arg("sequenceName"), py::arg("waitFor") = UINT64_MAX) + py::arg("sequenceName"), py::arg("waitFor") = UINT64_MAX, + "Awaits for asynchronous operation on a named Sequence") .def("eval_await_def", &kp::Manager::evalOpAwaitDefault, - py::arg("waitFor") = UINT64_MAX) + py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created") // eval default - .def("eval_tensor_create_def", &kp::Manager::evalOpDefault) - .def("eval_tensor_copy_def", &kp::Manager::evalOpDefault) - .def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault) - .def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault) - .def("eval_algo_mult_def", &kp::Manager::evalOpDefault) - .def("eval_algo_file_def", &kp::Manager::evalOpDefault) - .def("eval_algo_data_def", &kp::Manager::evalOpDefault>) - .def("eval_algo_lro_def", &kp::Manager::evalOpDefault) + .def("eval_tensor_create_def", &kp::Manager::evalOpDefault, + "Evaluates operation to create and initialise tensor GPU memory and buffer with new anonymous Sequence") + .def("eval_tensor_copy_def", &kp::Manager::evalOpDefault, + "Evaluates operation to copy one tensor to one or many tensors with new anonymous Sequence") + .def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault, + "Evaluates operation to sync tensor from local memory to GPU memory with new anonymous Sequence") + .def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault, + "Evaluates operation to sync tensor(s) from GPU memory to local memory using staging tensors with new anonymous Sequence") + .def("eval_algo_mult_def", &kp::Manager::evalOpDefault, + "Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with new anonymous Sequence") + .def("eval_algo_file_def", &kp::Manager::evalOpDefault, + "Evaluates an operation using a custom shader provided from a shader path with new anonymous Sequence") + .def("eval_algo_data_def", &kp::Manager::evalOpDefault>, + "Evaluates an operation using a custom shader provided as raw string or spirv bytes with new anonymous Sequence") + .def("eval_algo_lro_def", &kp::Manager::evalOpDefault, + "Evaluates operation to run left right out operation with custom shader with new anonymous Sequence") // eval - .def("eval_tensor_create", &kp::Manager::evalOp) - .def("eval_tensor_copy", &kp::Manager::evalOp) - .def("eval_tensor_sync_device", &kp::Manager::evalOp) - .def("eval_tensor_sync_local", &kp::Manager::evalOp) - .def("eval_algo_mult", &kp::Manager::evalOp) - .def("eval_algo_file", &kp::Manager::evalOp) - .def("eval_algo_data", &kp::Manager::evalOp>) - .def("eval_algo_lro", &kp::Manager::evalOp) + .def("eval_tensor_create", &kp::Manager::evalOp, + "Evaluates operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence") + .def("eval_tensor_copy", &kp::Manager::evalOp, + "Evaluates operation to copy one tensor to one or many tensors with explicitly named Sequence") + .def("eval_tensor_sync_device", &kp::Manager::evalOp, + "Evaluates operation to sync tensor from local memory to GPU memory with explicitly named Sequence") + .def("eval_tensor_sync_local", &kp::Manager::evalOp, + "Evaluates operation to sync tensor(s) from GPU memory to local memory using staging tensors with explicitly named Sequence") + .def("eval_algo_mult", &kp::Manager::evalOp, + "Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence") + .def("eval_algo_file", &kp::Manager::evalOp, + "Evaluates an operation using a custom shader provided from a shader path with explicitly named Sequence") + .def("eval_algo_data", &kp::Manager::evalOp>, + "Evaluates an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence") + .def("eval_algo_lro", &kp::Manager::evalOp, + "Evaluates operation to run left right out operation with custom shader with explicitly named Sequence") // eval async default - .def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault) - .def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault) - .def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault) - .def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault) - .def("eval_async_algo_mult_def", &kp::Manager::evalOpAsyncDefault) - .def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault) - .def("eval_async_algo_data_def", &kp::Manager::evalOpAsyncDefault>) - .def("eval_async_algo_lro_def", &kp::Manager::evalOpAsyncDefault) + .def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault, + "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with anonymous Sequence") + .def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault, + "Evaluates asynchronously operation to copy one tensor to one or many tensors with anonymous Sequence") + .def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault, + "Evaluates asynchronously operation to sync tensor from local memory to GPU memory with anonymous Sequence") + .def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault, + "Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory using staging tensors with anonymous Sequence") + .def("eval_async_algo_mult_def", &kp::Manager::evalOpAsyncDefault, + "Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with anonymous Sequence") + .def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault, + "Evaluates asynchronously an operation using a custom shader provided from a shader path with anonymous Sequence") + .def("eval_async_algo_data_def", &kp::Manager::evalOpAsyncDefault>, + "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence") + .def("eval_async_algo_lro_def", &kp::Manager::evalOpAsyncDefault, + "Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence") // eval async - .def("eval_tensor_create", &kp::Manager::evalOpAsync) - .def("eval_tensor_copy", &kp::Manager::evalOpAsync) - .def("eval_tensor_sync_device", &kp::Manager::evalOpAsync) - .def("eval_tensor_sync_local", &kp::Manager::evalOpAsync) - .def("eval_algo_mult", &kp::Manager::evalOpAsync) - .def("eval_algo_file", &kp::Manager::evalOpAsync) - .def("eval_algo_data", &kp::Manager::evalOpAsync>) - .def("eval_algo_lro", &kp::Manager::evalOpAsync); - + .def("eval_async_tensor_create", &kp::Manager::evalOpAsync, + "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence") + .def("eval_async_tensor_copy", &kp::Manager::evalOpAsync, + "Evaluates asynchronously operation to copy one tensor to one or many tensors with explicitly named Sequence") + .def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync, + "Evaluates asynchronously operation to sync tensor from local memory to GPU memory with explicitly named Sequence") + .def("eval_async_tensor_sync_local", &kp::Manager::evalOpAsync, + "Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory using staging tensors with explicitly named Sequence") + .def("eval_async_algo_mult", &kp::Manager::evalOpAsync, + "Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence") + .def("eval_async_algo_file", &kp::Manager::evalOpAsync, + "Evaluates asynchronously an operation using a custom shader provided from a shader path with explicitly named Sequence") + .def("eval_async_algo_data", &kp::Manager::evalOpAsync>, + "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence") + .def("eval_async_algo_lro", &kp::Manager::evalOpAsync, + "Evaluates asynchronously operation to run left right out operation with custom shader with explicitly named Sequence"); #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; From 991cfdcbcc0f0de2fa51a792d541b3aba2370d90 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 2 Nov 2020 21:57:10 +0000 Subject: [PATCH 27/39] Updated to use kp instead of komputepy for module name --- docs/conf.py | 2 +- docs/overview/python-package.rst | 10 +++++----- python/src/main.cpp | 2 +- python/test/test_kompute.py | 2 +- setup.py | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 2daab8833..f1255e3d8 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,7 +18,7 @@ # -- Project information ----------------------------------------------------- import sys import os -import komputepy +import kp project = 'Vulkan Kompute' copyright = '2020, The Institute for Ethical AI & Machine Learning' diff --git a/docs/overview/python-package.rst b/docs/overview/python-package.rst index 92dc8ca38..0a8eb7a23 100644 --- a/docs/overview/python-package.rst +++ b/docs/overview/python-package.rst @@ -12,9 +12,9 @@ Below is a diagram that provides insights on the relationship between Vulkan Kom Manager ------- -The Kompute Manager provides a high level interface to simplify interaction with underlying :class:`komputepy.Sequence` of Operations. +The Kompute Manager provides a high level interface to simplify interaction with underlying :class:`kp.Sequence` of Operations. -.. autoclass:: komputepy.Manager +.. autoclass:: kp.Manager :members: @@ -23,7 +23,7 @@ Sequence The Kompute Sequence consists of batches of Kompute Operations, which are executed on a respective GPU queue. The execution of sequences can be synchronous or asynchronous, and it can be coordinated through its respective Vulkan Fence. -.. autoclass:: komputepy.Sequence +.. autoclass:: kp.Sequence :members: @@ -32,13 +32,13 @@ Tensor The Kompute Tensor is the atomic unit in Kompute, and it is used primarily for handling Host and GPU Device data. -.. autoclass:: komputepy.Tensor +.. autoclass:: kp.Tensor :members: TensorType ------- -.. automodule:: komputepy +.. automodule:: kp :members: diff --git a/python/src/main.cpp b/python/src/main.cpp index 34a0e6bbc..0f10ea349 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -5,7 +5,7 @@ namespace py = pybind11; -PYBIND11_MODULE(komputepy, m) { +PYBIND11_MODULE(kp, m) { py::enum_(m, "TensorTypes", "Enum with GPU memory types for Tensor.") .value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.") diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 7b85de47b..43baf77d1 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -1,5 +1,5 @@ -from komputepy import Tensor, Manager, Sequence +from kp import Tensor, Manager, Sequence def test_opmult(): """ diff --git a/setup.py b/setup.py index e09673a97..0b5db2f9c 100644 --- a/setup.py +++ b/setup.py @@ -65,12 +65,12 @@ class CMakeBuild(build_ext): subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) setup( - name='komputepy', + name='kp', version='0.0.1', author='Alejandro Saucedo', - description='Blazing fast, mobile-enabled, asynchronous, and optimized for advanced GPU processing usecases.', + description='Vulkan Kompute: Blazing fast, mobile-enabled, asynchronous, and optimized for advanced GPU processing usecases.', long_description='', - ext_modules=[CMakeExtension('komputepy')], + ext_modules=[CMakeExtension('kp')], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, ) From 214a43ad59a51c7fc61d2001e1e6ff232624ab68 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 08:00:20 +0000 Subject: [PATCH 28/39] Updated build to rename kp --- python/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 6ef7fde4b..5f3036986 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,11 +1,11 @@ add_subdirectory(pybind11) -pybind11_add_module(komputepy src/main.cpp) +pybind11_add_module(kp src/main.cpp) include_directories( ${PROJECT_SOURCE_DIR}/single_include/) target_link_libraries( - komputepy PRIVATE + kp PRIVATE kompute::kompute) From 958bf3f3c97a57a88cc648e47e93d05fc2a2ff96 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 08:00:38 +0000 Subject: [PATCH 29/39] Added python overview to documentation --- docs/index.rst | 3 +- docs/overview/python-package.rst | 101 +++++++++++++++++++++-------- docs/overview/python-reference.rst | 44 +++++++++++++ 3 files changed, 120 insertions(+), 28 deletions(-) create mode 100644 docs/overview/python-reference.rst diff --git a/docs/index.rst b/docs/index.rst index 57f1a1271..340b3458b 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,13 +12,14 @@ Index :titlesonly: Simple & Advanced Examples + Python Package Overview Asynchronous & Parallel Operations Memory Management Principles Build System Deep Dive Converting GLSL/HLSL Shaders to C++ Headers Mobile App Integration (Android) Game Engine Integration (Godot Engine) - Python Class Documentation & Reference + Python Class Documentation & Reference C++ Class Documentation & Reference Code Index diff --git a/docs/overview/python-package.rst b/docs/overview/python-package.rst index 0a8eb7a23..74e0cba91 100644 --- a/docs/overview/python-package.rst +++ b/docs/overview/python-package.rst @@ -1,44 +1,91 @@ - -Python Class Documentation & Reference +Python Package Overview ======== -This section provides a breakdown of the Python classes and what each of their functions provide. +This section provides an overview of the Python Package from a functionality perspective. If you wish to see all the classes and their respective functions you can find that in the `Python Class Reference Section `_. + Below is a diagram that provides insights on the relationship between Vulkan Kompute objects and Vulkan resources, which primarily encompass ownership of either CPU and/or GPU memory. .. image:: ../images/kompute-architecture.jpg :width: 70% -Manager -------- +Python Components +^^^^^^^^ -The Kompute Manager provides a high level interface to simplify interaction with underlying :class:`kp.Sequence` of Operations. +The Python package exposes three main classes: -.. autoclass:: kp.Manager - :members: +* :class:`kp.Manager` - Manages all high level Vulkan and Kompute resources created +* :class:`kp.Sequence` - Contains a set of recorded operations that can be reused +* :class:`kp.Tensor` - Core data component to manage GPU and host data used in operations + +One thing that you will notice is that the class :class:`kp::OpBase` and all its relevant operator subclasses are not exposed in Python. + +This is primarily because the way to interact with the operations are through the respective :class:`kp.Manager` and :class:`kp.Sequence` functions. + +More specifically, it can be through the following functions: + +* mgr.eval_ - Runs operation under an existing named sequence +* mgr.eval__def - Runs operation under a new anonymous sequence +* mgr.eval_async_ - Runs operation asynchronously under an existing named sequence +* mgr.eval_async__def - Runs operation asynchronously under a new anonymous sequence +* seq.record_ - Records operation in sequence (requires sequence to be in recording mode) + +You can see these operations being used in the `Simple Python example `_ and in the `Extended Python Example `_. + +Kompute Operation Capabilities +^^^^^ + +Handling multiple capabilites of processing can be done by compute shaders being loaded into separate sequences. The example below shows how this can be done: + +.. code-block:: python + :linenos: + from kp import Manager + + # We'll assume we have the shader data available + from my_spv_shader_data import mult_shader, sum_shader + + mgr = Manager() + + t1 = mgr.build_tensor([2,2,2]) + t2 = mgr.build_tensor([1,2,3]) + t3 = mgr.build_tensor([1,2,3]) + + # Create multiple separate sequences + sq_mult = mgr.create_sequence("SQ_MULT") + sq_sum = mgr.create_sequence("SQ_SUM") + sq_sync = mgr.create_sequence("SQ_SYNC") + + # Initialize sq_mult + sq_mult.begin() + sq_mult.record_algo_data([t1, t2, t3], add_shader) + sq_mult.end() + + sq_sum.begin() + sq_sum.record_algo_data([t3, t2, t1], sum_shader) + sq_sum.end() + + sq_sync.begin() + sq_sync.record_tensor_sync_local([t1, t3]) + sq_sync.end() + + # Run multiple iterations + for i in range(10): + sq_mult.eval() + sq_sum.eval() + + sq_sync.eval() + + print(t1.data(), t2.data(), t3.data()) -Sequence -------- +Package Installation +^^^^^^^^^ -The Kompute Sequence consists of batches of Kompute Operations, which are executed on a respective GPU queue. The execution of sequences can be synchronous or asynchronous, and it can be coordinated through its respective Vulkan Fence. +The package can be installed through the top level `setup.py` by running: -.. autoclass:: kp.Sequence - :members: +``` +pip install . +``` -Tensor -------- - -The Kompute Tensor is the atomic unit in Kompute, and it is used primarily for handling Host and GPU Device data. - -.. autoclass:: kp.Tensor - :members: - - -TensorType -------- - -.. automodule:: kp - :members: diff --git a/docs/overview/python-reference.rst b/docs/overview/python-reference.rst new file mode 100644 index 000000000..0a8eb7a23 --- /dev/null +++ b/docs/overview/python-reference.rst @@ -0,0 +1,44 @@ + + +Python Class Documentation & Reference +======== + +This section provides a breakdown of the Python classes and what each of their functions provide. +Below is a diagram that provides insights on the relationship between Vulkan Kompute objects and Vulkan resources, which primarily encompass ownership of either CPU and/or GPU memory. + +.. image:: ../images/kompute-architecture.jpg + :width: 70% + +Manager +------- + +The Kompute Manager provides a high level interface to simplify interaction with underlying :class:`kp.Sequence` of Operations. + +.. autoclass:: kp.Manager + :members: + + +Sequence +------- + +The Kompute Sequence consists of batches of Kompute Operations, which are executed on a respective GPU queue. The execution of sequences can be synchronous or asynchronous, and it can be coordinated through its respective Vulkan Fence. + +.. autoclass:: kp.Sequence + :members: + + +Tensor +------- + +The Kompute Tensor is the atomic unit in Kompute, and it is used primarily for handling Host and GPU Device data. + +.. autoclass:: kp.Tensor + :members: + + +TensorType +------- + +.. automodule:: kp + :members: + From e78b425f6662b50c6e6c62a099742811ef3f7396 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 08:02:19 +0000 Subject: [PATCH 30/39] Added readme for python documentation --- README.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f75f466bc..9e63007d4 100644 --- a/README.md +++ b/README.md @@ -257,7 +257,7 @@ You can also access the Date: Tue, 3 Nov 2020 08:04:37 +0000 Subject: [PATCH 31/39] Updated version to 0.4.1 --- CMakeLists.txt | 2 +- README.md | 2 +- docs/conf.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f1338b87..454876d4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.4.1) -project(kompute VERSION 0.4.2) +project(kompute VERSION 0.4.1) set(CMAKE_CXX_STANDARD 14) diff --git a/README.md b/README.md index 9e63007d4..b745ccfbf 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -![GitHub](https://img.shields.io/badge/Version-0.4.0-green.svg) +![GitHub](https://img.shields.io/badge/Version-0.4.1-green.svg) ![GitHub](https://img.shields.io/badge/C++-14—20-purple.svg) ![GitHub](https://img.shields.io/badge/Build-cmake-red.svg) ![GitHub](https://img.shields.io/badge/Python-3.5—3.8-blue.svg) diff --git a/docs/conf.py b/docs/conf.py index f1255e3d8..1771846e3 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,7 +25,7 @@ copyright = '2020, The Institute for Ethical AI & Machine Learning' author = 'Alejandro Saucedo' # The full version, including alpha/beta/rc tags -release = '0.4.0' +release = '0.4.1' # -- General configuration --------------------------------------------------- From b636a80d069cf334403dbd3b9d7d0e8421284dca Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 08:42:27 +0000 Subject: [PATCH 32/39] Updated array example --- examples/array_multiplication/CMakeLists.txt | 24 +++++++++++-- examples/array_multiplication/README.md | 38 +++----------------- examples/array_multiplication/src/Main.cpp | 11 +++++- src/Manager.cpp | 0 4 files changed, 36 insertions(+), 37 deletions(-) mode change 100644 => 100755 src/Manager.cpp diff --git a/examples/array_multiplication/CMakeLists.txt b/examples/array_multiplication/CMakeLists.txt index 5aeebb450..63c58a842 100644 --- a/examples/array_multiplication/CMakeLists.txt +++ b/examples/array_multiplication/CMakeLists.txt @@ -3,6 +3,7 @@ project(kompute_array_mult VERSION 0.1.0) set(CMAKE_CXX_STANDARD 14) +option(KOMPUTE_ARR_OPT_INSTALLED_KOMPUTE "Enable if you prefer to use your installed Kompute library" 0) option(KOMPUTE_OPT_ENABLE_SPDLOG "Extra compile flags for Kompute, see docs for full list" 0) set(KOMPUTE_EXTRA_CXX_FLAGS "" CACHE STRING "Extra compile flags for Kompute, see docs for full list") @@ -10,13 +11,30 @@ set(KOMPUTE_EXTRA_CXX_FLAGS "" CACHE STRING "Extra compile flags for Kompute, se set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG=1 ${KOMPUTE_EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DRELEASE=1 ${KOMPUTE_EXTRA_CXX_FLAGS}") -find_package(kompute REQUIRED) +if(KOMPUTE_ARR_OPT_INSTALLED_KOMPUTE) + find_package(kompute REQUIRED) +else() + add_subdirectory(../../ ${CMAKE_CURRENT_BINARY_DIR}/kompute_build) +endif() + find_package(Vulkan REQUIRED) +if(KOMPUTE_OPT_ENABLE_SPDLOG) + find_package(spdlog REQUIRED) +endif() + add_executable(kompute_array_mult src/Main.cpp) target_link_libraries(kompute_array_mult kompute::kompute - Vulkan::Vulkan -) + Vulkan::Vulkan) + +include_directories( + ../../single_include/) + +if(KOMPUTE_OPT_ENABLE_SPDLOG) + target_link_libraries(kompute_array_mult + spdlog::spdlog) +endif() + diff --git a/examples/array_multiplication/README.md b/examples/array_multiplication/README.md index 9838b7217..2a1ab8ae1 100644 --- a/examples/array_multiplication/README.md +++ b/examples/array_multiplication/README.md @@ -25,35 +25,12 @@ For the other libraries, because they are optional you can just make sure you bu Alternatively you can use package managers such as vcpkg to help you install them, although to simplify things you can start without the dependencies first. -## Set Up Vulkan Kompute Dependency - -You have multiple options to set up Vulkan Kompute. The easiest is to perform a local installation. - -For this, you will want to go to the main repo and run the following cmake command, which will configure it without SPDLOG by default. - -``` -cmake \ - -Bbuild -``` - -You can pass the following optional parameters based on your desired configuration: -* If you wish to install with spdlog support you just have to pass `-DKOMPUTE_ENABLE_SPDLOG=1`. -* If you wish to perform the installation on the local folder instead of in your system you can use `-DCMAKE_INSTALL_PREFIX="build/src/CMakeFiles/Export/"` which will basically ensure that the final files are created in the local directory. -* If you are using a package manager such as `vcpkg` make sure you pass the `-DCMAKE_TOOLCHAIN_FILE=` parameter - -Then you can proceed to run the installation: - -* For Windows / Visual Studio you just have to build `INSTALL.vcxproj` -* For Linux you can just run the `install` target via `make -C build install` - -You also have the option to build as `Release` or `Debug` - just make sure that you build your example with the same build/debug flags as required. - ## Building the example -Now that you've set up the dependencies / installation of Vulkan Kompute you can build this example. - You will notice that it's a standalone project, so you can re-use it for your application. +This project has the option to either import the Kompute dependency relative to the project or use your existing installation of Kompute. + To build you just need to run the cmake command in this folder as follows: ``` @@ -61,14 +38,9 @@ cmake \ -Bbuild ``` -Make sure to pass the required flags depending on the configuration above: -* If you built with Debug make sure you build your example with Debug as well -* If you installed in the local folder, make sure you pass the CMAKE_PREFIX_PATH pointing to the respective folder (e.g. `-DCMAKE_PREFIX_PATH=../../build/src/CMakeFiles/Export/lib/cmake/kompute/` if parent folder is main repo). -* If you built Vulkan Kompute with spdlog enabled, make sure to pass `-DKOMPUTE_OPT_ENABLE_SPDLOG=1` +You can pass the following optional parameters based on your desired configuration: +* If you wish to install with spdlog support you just have to pass `-DKOMPUTE_OPT_ENABLE_SPDLOG=1`. * If you are using a package manager such as `vcpkg` make sure you pass the `-DCMAKE_TOOLCHAIN_FILE=` parameter +* If you wish to load shader from raw glsl string instead of spirv bytes you can use `-DKOMPUTE_ANDROID_SHADER_FROM_STRING` -Now you just have to build your application as above: - -* For Windows / Visual Studio you just have to build and run `kompute_array_mult.vcxproj` -* For Linux you can just run the `kompute_array_mult` target via `make -C build kompute_array_mult` diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index f3587cae8..14b58cba9 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -18,6 +18,7 @@ int main() auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }); auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }); +#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING std::string shader(R"( // The version to use #version 450 @@ -37,9 +38,17 @@ int main() } )"); - mgr.evalOpDefault>( + mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, std::vector(shader.begin(), shader.end())); +#else + mgr.evalOpDefault( + { tensorInA, tensorInB, tensorOut }, + std::vector( + kp::shader_data::shaders_glsl_opmult_comp_spv, + kp::shader_data::shaders_glsl_opmult_comp_spv + + kp::shader_data::shaders_glsl_opmult_comp_spv_len)); +#endif mgr.evalOpDefault({tensorOut}); diff --git a/src/Manager.cpp b/src/Manager.cpp old mode 100644 new mode 100755 From 5fbb4ce6f6c00fc72dfd2f91b016a72e2a374516 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 08:59:32 +0000 Subject: [PATCH 33/39] Added function in sequence freeMemoryDestroyGPUResources to de-init --- single_include/kompute/Kompute.hpp | 5 ++ src/Manager.cpp | 2 +- src/Sequence.cpp | 88 +++++++++++++++++------------- src/include/kompute/Sequence.hpp | 5 ++ 4 files changed, 62 insertions(+), 38 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 932375cd4..c1dfd8762 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1100,6 +1100,11 @@ class Sequence */ bool isInit(); + /** + * Destroys and frees the GPU resources which include the buffer and memory. + */ + void freeMemoryDestroyGPUResources(); + /** * Record function for operation to be added to the GPU queue in batch. This * template requires classes to be derived from the OpBase class. This diff --git a/src/Manager.cpp b/src/Manager.cpp index b763f2eb0..df9d64db6 100755 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -63,7 +63,7 @@ Manager::~Manager() "managed sequences"); for (const std::pair>& sqPair : this->mManagedSequences) { - sqPair.second->~Sequence(); + sqPair.second->freeMemoryDestroyGPUResources(); } this->mManagedSequences.clear(); } diff --git a/src/Sequence.cpp b/src/Sequence.cpp index b27c547be..4f01891c4 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -28,46 +28,13 @@ Sequence::~Sequence() SPDLOG_DEBUG("Kompute Sequence Destructor started"); if (!this->mIsInit) { - SPDLOG_WARN("Kompute Sequence destructor called but sequence is not " - "initialized."); + SPDLOG_INFO("Kompute Sequence destructor called but sequence is not " + "initialized so no need to removing GPU resources."); return; } - - if (!this->mDevice) { - SPDLOG_ERROR( - "Kompute Sequence destructor reached with null Device pointer"); - this->mIsInit = false; - return; + else { + this->freeMemoryDestroyGPUResources(); } - - if (this->mFreeCommandBuffer) { - SPDLOG_INFO("Freeing CommandBuffer"); - if (!this->mCommandBuffer) { - SPDLOG_ERROR("Kompute Sequence destructor reached with null " - "CommandPool pointer"); - this->mIsInit = false; - return; - } - this->mDevice->freeCommandBuffers( - *this->mCommandPool, 1, this->mCommandBuffer.get()); - SPDLOG_DEBUG("Kompute Sequence Freed CommandBuffer"); - } - - if (this->mFreeCommandPool) { - SPDLOG_INFO("Destroying CommandPool"); - if (this->mCommandPool == nullptr) { - SPDLOG_ERROR("Kompute Sequence destructor reached with null " - "CommandPool pointer"); - this->mIsInit = false; - return; - } - this->mDevice->destroy( - *this->mCommandPool, - (vk::Optional)nullptr); - SPDLOG_DEBUG("Kompute Sequence Destroyed CommandPool"); - } - - this->mIsInit = false; } void @@ -234,6 +201,53 @@ Sequence::isInit() return this->mIsInit; } +void +Sequence::freeMemoryDestroyGPUResources() +{ + if (!this->mIsInit) { + SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called " + "but Sequence is not initialized so there's no relevant GPU resources."); + return; + } + + if (!this->mDevice) { + SPDLOG_ERROR( + "Kompute Sequence freeMemoryDestroyGPUResources called with null Device pointer"); + this->mIsInit = false; + return; + } + + if (this->mFreeCommandBuffer) { + SPDLOG_INFO("Freeing CommandBuffer"); + if (!this->mCommandBuffer) { + SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called with null " + "CommandPool pointer"); + this->mIsInit = false; + return; + } + this->mDevice->freeCommandBuffers( + *this->mCommandPool, 1, this->mCommandBuffer.get()); + SPDLOG_DEBUG("Kompute Sequence Freed CommandBuffer"); + } + + if (this->mFreeCommandPool) { + SPDLOG_INFO("Destroying CommandPool"); + if (this->mCommandPool == nullptr) { + SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called with null " + "CommandPool pointer"); + this->mIsInit = false; + return; + } + this->mDevice->destroy( + *this->mCommandPool, + (vk::Optional)nullptr); + SPDLOG_DEBUG("Kompute Sequence Destroyed CommandPool"); + } + + this->mIsInit = false; + +} + void Sequence::createCommandPool() { diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 314de6657..09247fe3f 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -106,6 +106,11 @@ class Sequence */ bool isInit(); + /** + * Destroys and frees the GPU resources which include the buffer and memory. + */ + void freeMemoryDestroyGPUResources(); + /** * Record function for operation to be added to the GPU queue in batch. This * template requires classes to be derived from the OpBase class. This From 157b6592dd82a55d6cd728bf5eb2aef3b5c179bf Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 09:00:03 +0000 Subject: [PATCH 34/39] Updated docstring --- src/include/kompute/Sequence.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 09247fe3f..5d483c27a 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -107,7 +107,8 @@ class Sequence bool isInit(); /** - * Destroys and frees the GPU resources which include the buffer and memory. + * Destroys and frees the GPU resources which include the buffer and memory + * and sets the sequence as init=False. */ void freeMemoryDestroyGPUResources(); From 5822850ef2085330e89ce1bde8f6126fe9cfa6d1 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 18:04:03 +0000 Subject: [PATCH 35/39] Updated logistic regression model --- examples/logistic_regression/CMakeLists.txt | 19 +++--- examples/logistic_regression/README.md | 65 +++++---------------- examples/logistic_regression/src/Main.cpp | 14 ++++- 3 files changed, 37 insertions(+), 61 deletions(-) diff --git a/examples/logistic_regression/CMakeLists.txt b/examples/logistic_regression/CMakeLists.txt index b12e8227f..f918bbf21 100644 --- a/examples/logistic_regression/CMakeLists.txt +++ b/examples/logistic_regression/CMakeLists.txt @@ -3,6 +3,7 @@ project(kompute_linear_reg VERSION 0.1.0) set(CMAKE_CXX_STANDARD 14) +option(KOMPUTE_ARR_OPT_INSTALLED_KOMPUTE "Enable if you prefer to use your installed Kompute library" 0) option(KOMPUTE_OPT_ENABLE_SPDLOG "Extra compile flags for Kompute, see docs for full list" 0) set(KOMPUTE_EXTRA_CXX_FLAGS "" CACHE STRING "Extra compile flags for Kompute, see docs for full list") @@ -14,12 +15,16 @@ endif() set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG=1 ${KOMPUTE_EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DRELEASE=1 ${KOMPUTE_EXTRA_CXX_FLAGS}") -find_package(kompute REQUIRED) +if(KOMPUTE_ARR_OPT_INSTALLED_KOMPUTE) + find_package(kompute REQUIRED) +else() + add_subdirectory(../../ ${CMAKE_CURRENT_BINARY_DIR}/kompute_build) +endif() + find_package(Vulkan REQUIRED) if(KOMPUTE_OPT_ENABLE_SPDLOG) find_package(spdlog REQUIRED) - find_package(fmt REQUIRED) endif() add_executable(kompute_linear_reg @@ -30,11 +35,11 @@ target_link_libraries(kompute_linear_reg Vulkan::Vulkan ) +include_directories( + ../../single_include/) + if(KOMPUTE_OPT_ENABLE_SPDLOG) - target_link_libraries(kompute_linear_reg - kompute::kompute - fmt::fmt - spdlog::spdlog - ) + target_link_libraries(kompute_array_mult + spdlog::spdlog) endif() diff --git a/examples/logistic_regression/README.md b/examples/logistic_regression/README.md index 29aa89c01..0de7ee30a 100644 --- a/examples/logistic_regression/README.md +++ b/examples/logistic_regression/README.md @@ -6,54 +6,12 @@ This example is structured such that you will be able to extend it for your proj It contains a cmake build configuration that can be used in your production applications. -## Pre-requisites - -In order to run this example, you will need the following dependencies: - -* REQUIRED - + Vulkan Kompute library must be accessible - + The Vulkan SDK must be installed -* OPTIONAL - + SPDLOG - for logging - + FMT - for text formatting - -We will cover how you can install Vulkan Kompute in the next section. - -For the Vulkan SDK, the simplest way to install it is through [their website](https://vulkan.lunarg.com/sdk/home). You just have to follow the instructions for the relevant platform. - -For the other libraries, because they are optional you can just make sure you build and install Kompute with these disabled (this will be covered in more detail below). - -Alternatively you can use package managers such as vcpkg to help you install them, although to simplify things you can start without the dependencies first. - -## Set Up Vulkan Kompute Dependency - -You have multiple options to set up Vulkan Kompute. The easiest is to perform a local installation. - -For this, you will want to go to the main repo and run the following cmake command, which will configure it without SPDLOG by default. - -``` -cmake \ - -Bbuild -``` - -You can pass the following optional parameters based on your desired configuration: -* If you wish to install with spdlog support you just have to pass `-DKOMPUTE_ENABLE_SPDLOG=1`. -* If you wish to perform the installation on the local folder instead of in your system you can use `-DCMAKE_INSTALL_PREFIX="build/src/CMakeFiles/Export/"` which will basically ensure that the final files are created in the local directory. -* If you are using a package manager such as `vcpkg` make sure you pass the `-DCMAKE_TOOLCHAIN_FILE=` parameter - -Then you can proceed to run the installation: - -* For Windows / Visual Studio you just have to build `INSTALL.vcxproj` -* For Linux you can just run the `install` target via `make -C build install` - -You also have the option to build as `Release` or `Debug` - just make sure that you build your example with the same build/debug flags as required. - ## Building the example -Now that you've set up the dependencies / installation of Vulkan Kompute you can build this example. - You will notice that it's a standalone project, so you can re-use it for your application. +This project has the option to either import the Kompute dependency relative to the project or use your existing installation of Kompute. + To build you just need to run the cmake command in this folder as follows: ``` @@ -61,14 +19,19 @@ cmake \ -Bbuild ``` -Make sure to pass the required flags depending on the configuration above: -* If you built with Debug make sure you build your example with Debug as well -* If you installed in the local folder, make sure you pass the CMAKE_PREFIX_PATH pointing to the respective folder (e.g. `-DCMAKE_PREFIX_PATH=../../build/src/CMakeFiles/Export/lib/cmake/kompute/` if parent folder is main repo). -* If you built Vulkan Kompute with spdlog enabled, make sure to pass `-DKOMPUTE_OPT_ENABLE_SPDLOG=1` +You can pass the following optional parameters based on your desired configuration: +* If you wish to install with spdlog support you just have to pass `-DKOMPUTE_OPT_ENABLE_SPDLOG=1`. * If you are using a package manager such as `vcpkg` make sure you pass the `-DCMAKE_TOOLCHAIN_FILE=` parameter +* If you wish to load shader from raw glsl string instead of spirv bytes you can use `-DKOMPUTE_ANDROID_SHADER_FROM_STRING` -Now you just have to build your application as above: +## Pre-requisites -* For Windows / Visual Studio you just have to build and run `kompute_linear_reg.vcxproj` -* For Linux you can just run the `kompute_linear_reg` target via `make -C build kompute_linear_reg` +In order to run this example, you will need the following dependencies: + +* REQUIRED + + The Vulkan SDK must be installed +* OPTIONAL + + Vulkan Kompute library must be accessible (by default it uses the source directory) + + SPDLOG - for logging + + FMT - for text formatting diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index 853fa9d67..d3b8b3557 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -36,22 +36,30 @@ int main() kp::Manager mgr; std::shared_ptr sqTensor = - mgr.createManagedSequence().lock(); + mgr.createManagedSequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence().lock(); + std::shared_ptr sq = mgr.createManagedSequence(); // Record op algo base sq->begin(); sq->record({ wIn, bIn }); - sq->record>( +#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING + sq->record( params, "shaders/glsl/logistic_regression.comp"); +#else + sq->record( + params, std::vector( + kp::shader_data::shaders_glsl_logisticregression_comp_spv, + kp::shader_data::shaders_glsl_logisticregression_comp_spv + + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); +#endif sq->record({ wOutI, wOutJ, bOut, lOut }); From 8285f2f878222893bdd4d07ad86d2079685ffd99 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 18:04:29 +0000 Subject: [PATCH 36/39] Updated logistic regression model --- .../kompute_summator/KomputeSummatorNode.h | 2 +- .../gdnative_shared/src/KomputeSummator.cpp | 22 +++++-------------- .../gdnative_shared/src/KomputeSummator.hpp | 2 +- .../kompute_model_ml/KomputeModelMLNode.cpp | 8 +++---- .../gdnative_shared/src/KomputeModelML.cpp | 8 +++---- 5 files changed, 16 insertions(+), 26 deletions(-) diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.h b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.h index 5bc201a90..1d94da9a5 100644 --- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.h +++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.h @@ -24,7 +24,7 @@ protected: private: kp::Manager mManager; - std::weak_ptr mSequence; + std::shared_ptr mSequence; std::shared_ptr mPrimaryTensor; std::shared_ptr mSecondaryTensor; }; diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp index f64e0d088..788486e82 100644 --- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp +++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp @@ -16,12 +16,7 @@ void KomputeSummator::add(float value) { // Set the new data in the local device this->mSecondaryTensor->setData({value}); // Execute recorded sequence - if (std::shared_ptr sq = this->mSequence.lock()) { - sq->eval(); - } - else { - throw std::runtime_error("Sequence pointer no longer available"); - } + this->mSequence->eval(); } void KomputeSummator::reset() { @@ -38,9 +33,7 @@ void KomputeSummator::_init() { this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq"); // We now record the steps in the sequence - if (std::shared_ptr sq = this->mSequence.lock()) { - std::string shader(R"( #version 450 @@ -55,26 +48,23 @@ void KomputeSummator::_init() { } )"); - sq->begin(); + this->mSequence->begin(); // First we ensure secondary tensor loads to GPU // No need to sync the primary tensor as it should not be changed - sq->record( + this->mSequence->record( { this->mSecondaryTensor }); // Then we run the operation with both tensors - sq->record>( + this->mSequence->record( { this->mPrimaryTensor, this->mSecondaryTensor }, std::vector(shader.begin(), shader.end())); // We map the result back to local - sq->record( + this->mSequence->record( { this->mPrimaryTensor }); - sq->end(); - } - else { - throw std::runtime_error("Sequence pointer no longer available"); + this->mSequence->end(); } } diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.hpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.hpp index 9131e7f57..7f6b42e82 100644 --- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.hpp +++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.hpp @@ -26,7 +26,7 @@ public: private: kp::Manager mManager; - std::weak_ptr mSequence; + std::shared_ptr mSequence; std::shared_ptr mPrimaryTensor; std::shared_ptr mSecondaryTensor; }; diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp index fe0a911a5..f583d910f 100644 --- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp +++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp @@ -51,14 +51,14 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) { kp::Manager mgr; std::shared_ptr sqTensor = - mgr.createManagedSequence().lock(); + mgr.createManagedSequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence().lock(); + std::shared_ptr sq = mgr.createManagedSequence(); // Record op algo base sq->begin(); @@ -67,11 +67,11 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) { #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING // Newer versions of Android are able to use shaderc to read raw string - sq->record>( + sq->record( params, std::vector(LR_SHADER.begin(), LR_SHADER.end())); #else // Older versions of Android require the SPIRV binary directly - sq->record>( + sq->record( params, std::vector( kp::shader_data::shaders_glsl_logisticregression_comp_spv, kp::shader_data::shaders_glsl_logisticregression_comp_spv diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp index 174398501..4135e83ed 100644 --- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp +++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp @@ -56,14 +56,14 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) { { std::shared_ptr sqTensor = - mgr.createManagedSequence().lock(); + mgr.createManagedSequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence().lock(); + std::shared_ptr sq = mgr.createManagedSequence(); // Record op algo base sq->begin(); @@ -72,11 +72,11 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) { #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING // Newer versions of Android are able to use shaderc to read raw string - sq->record>( + sq->record( params, std::vector(LR_SHADER.begin(), LR_SHADER.end())); #else // Older versions of Android require the SPIRV binary directly - sq->record>( + sq->record( params, std::vector( kp::shader_data::shaders_glsl_logisticregression_comp_spv, kp::shader_data::shaders_glsl_logisticregression_comp_spv From 88df1b312da492330f3a3d752aeb7e1bf5b79c53 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 18:04:39 +0000 Subject: [PATCH 37/39] Updated logistic regression model --- .../android-simple/app/src/main/cpp/KomputeModelML.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp index a7a18c849..e22f2aa00 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp @@ -44,14 +44,14 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, { std::shared_ptr sqTensor = - mgr.createManagedSequence().lock(); + mgr.createManagedSequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence().lock(); + std::shared_ptr sq = mgr.createManagedSequence(); // Record op algo base sq->begin(); @@ -60,11 +60,11 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING // Newer versions of Android are able to use shaderc to read raw string - sq->record>( + sq->record( params, std::vector(LR_SHADER.begin(), LR_SHADER.end())); #else // Older versions of Android require the SPIRV binary directly - sq->record>( + sq->record( params, std::vector( kp::shader_data::shaders_glsl_logisticregression_comp_spv, kp::shader_data::shaders_glsl_logisticregression_comp_spv From 53e1a3aa54c467a4be4b93e125fe5bd54b943dd9 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 18:04:54 +0000 Subject: [PATCH 38/39] Updated array example --- examples/array_multiplication/CMakeLists.txt | 4 ++ examples/array_multiplication/README.md | 39 ++++++++++---------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/examples/array_multiplication/CMakeLists.txt b/examples/array_multiplication/CMakeLists.txt index 63c58a842..0b648382e 100644 --- a/examples/array_multiplication/CMakeLists.txt +++ b/examples/array_multiplication/CMakeLists.txt @@ -7,6 +7,10 @@ option(KOMPUTE_ARR_OPT_INSTALLED_KOMPUTE "Enable if you prefer to use your insta option(KOMPUTE_OPT_ENABLE_SPDLOG "Extra compile flags for Kompute, see docs for full list" 0) set(KOMPUTE_EXTRA_CXX_FLAGS "" CACHE STRING "Extra compile flags for Kompute, see docs for full list") +if(KOMPUTE_OPT_ENABLE_SPDLOG) + set(KOMPUTE_EXTRA_CXX_FLAGS "${KOMPUTE_EXTRA_CXX_FLAGS} -DKOMPUTE_ENABLE_SPDLOG=1") +endif() + # It is necessary to pass the DEBUG or RELEASE flag accordingly to Kompute set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG=1 ${KOMPUTE_EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DRELEASE=1 ${KOMPUTE_EXTRA_CXX_FLAGS}") diff --git a/examples/array_multiplication/README.md b/examples/array_multiplication/README.md index 2a1ab8ae1..931c7d639 100644 --- a/examples/array_multiplication/README.md +++ b/examples/array_multiplication/README.md @@ -6,25 +6,6 @@ This example is structured such that you will be able to extend it for your proj It contains a cmake build configuration that can be used in your production applications. -## Pre-requisites - -In order to run this example, you will need the following dependencies: - -* REQUIRED - + Vulkan Kompute library must be accessible - + The Vulkan SDK must be installed -* OPTIONAL - + SPDLOG - for logging - + FMT - for text formatting - -We will cover how you can install Vulkan Kompute in the next section. - -For the Vulkan SDK, the simplest way to install it is through [their website](https://vulkan.lunarg.com/sdk/home). You just have to follow the instructions for the relevant platform. - -For the other libraries, because they are optional you can just make sure you build and install Kompute with these disabled (this will be covered in more detail below). - -Alternatively you can use package managers such as vcpkg to help you install them, although to simplify things you can start without the dependencies first. - ## Building the example You will notice that it's a standalone project, so you can re-use it for your application. @@ -43,4 +24,24 @@ You can pass the following optional parameters based on your desired configurati * If you are using a package manager such as `vcpkg` make sure you pass the `-DCMAKE_TOOLCHAIN_FILE=` parameter * If you wish to load shader from raw glsl string instead of spirv bytes you can use `-DKOMPUTE_ANDROID_SHADER_FROM_STRING` +## Pre-requisites + +In order to run this example, you will need the following dependencies: + +* REQUIRED + + The Vulkan SDK must be installed +* OPTIONAL + + Vulkan Kompute library must be accessible (by default it uses the source directory) + + SPDLOG - for logging + + FMT - for text formatting + +We will cover how you can install Vulkan Kompute in the next section. + +For the Vulkan SDK, the simplest way to install it is through [their website](https://vulkan.lunarg.com/sdk/home). You just have to follow the instructions for the relevant platform. + +For the other libraries, because they are optional you can just make sure you build and install Kompute with these disabled (this will be covered in more detail below). + +Alternatively you can use package managers such as vcpkg to help you install them, although to simplify things you can start without the dependencies first. + + From 3811ef2dba4c8453b893b8570867f7b94779c8ff Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 3 Nov 2020 18:05:37 +0000 Subject: [PATCH 39/39] Updated docstrings --- single_include/kompute/Kompute.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index c1dfd8762..3ae98b483 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1101,7 +1101,8 @@ class Sequence bool isInit(); /** - * Destroys and frees the GPU resources which include the buffer and memory. + * Destroys and frees the GPU resources which include the buffer and memory + * and sets the sequence as init=False. */ void freeMemoryDestroyGPUResources();