sync changes from nomic-ai/llama.cpp

This commit is contained in:
Jared Van Bortel 2023-12-13 17:51:59 -05:00
parent 7ac0862445
commit 2d0a8abc64
20 changed files with 478 additions and 372 deletions

View file

@ -80,18 +80,10 @@ endmacro()
message(STATUS "General purpose GPU compute framework built on Vulkan")
message(STATUS "=======================================================")
# Enable or disable targets
kompute_option(KOMPUTE_OPT_BUILD_TESTS "Enable if you want to build tests." OFF)
kompute_option(KOMPUTE_OPT_CODE_COVERAGE "Enable if you want code coverage." OFF)
kompute_option(KOMPUTE_OPT_BUILD_DOCS "Enable if you want to build documentation." OFF)
kompute_option(KOMPUTE_OPT_INSTALL "Enable if you want to enable installation." OFF)
# Build options
kompute_option(KOMPUTE_OPT_BUILD_PYTHON "Enable if you want to build python bindings." OFF)
kompute_log_level(KOMPUTE_OPT_LOG_LEVEL "Internally we use Spdlog or fmt for logging, depending on the value of 'KOMPUTE_OPT_USE_SPDLOG'. The log level used can be changed here. Possible values: 'Trace', 'Debug', 'Info', 'Warn', 'Error', 'Critical', 'Off', 'Default'. If set to 'Off' logging will be deactivated completely. If set to 'Default', the log level will be set to 'Info' for release builds and 'Debug' else." "Default")
kompute_log_level(KOMPUTE_OPT_LOG_LEVEL "Internally we use Spdlog or fmt for logging, depending on the value of 'KOMPUTE_OPT_USE_SPDLOG'. The log level used can be changed here. Possible values: 'Trace', 'Debug', 'Info', 'Warn', 'Error', 'Critical', 'Off', 'Default'. If set to 'Off' logging will be deactivated completely. If set to 'Default', the log level will be set to 'Info' for release builds and 'Debug' else." "Off")
kompute_option(KOMPUTE_OPT_USE_SPDLOG "If enabled, logging via KP_LOG_<DEBUG, INFO, etc...> will happen through Spdlog instead of plan fmt." OFF)
kompute_option(KOMPUTE_OPT_ANDROID_BUILD "Enable android compilation flags required." OFF)
kompute_option(KOMPUTE_OPT_DISABLE_VK_DEBUG_LAYERS "Explicitly disable debug layers even on debug." OFF)
kompute_option(KOMPUTE_OPT_DISABLE_VK_DEBUG_LAYERS "Explicitly disable debug layers even on debug." ON)
kompute_option(KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK "Whether to check if your driver supports the Vulkan Header version you are linking against. This might be useful in case you build shared on a different system than you run later." OFF)
kompute_option(KOMPUTE_OPT_BUILD_SHADERS "Rebuilds all compute shaders during compilation and does not use the already precompiled versions. Requires glslangValidator to be installed on your system." OFF)
@ -99,8 +91,6 @@ kompute_option(KOMPUTE_OPT_BUILD_SHADERS "Rebuilds all compute shaders during co
kompute_option(KOMPUTE_OPT_USE_BUILT_IN_SPDLOG "Use the built-in version of Spdlog. Requires 'KOMPUTE_OPT_USE_SPDLOG' to be set to ON in order to have any effect." ON)
kompute_option(KOMPUTE_OPT_SPDLOG_ASYNC_MODE "If spdlog is enabled this allows for selecting whether the default logger setup creates sync or async logger" OFF)
kompute_option(KOMPUTE_OPT_USE_BUILT_IN_FMT "Use the built-in version of fmt." ON)
kompute_option(KOMPUTE_OPT_USE_BUILT_IN_GOOGLE_TEST "Use the built-in version of GoogleTest." ON)
kompute_option(KOMPUTE_OPT_USE_BUILT_IN_PYBIND11 "Use the built-in version of pybind11." ON)
kompute_option(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER "Use the built-in version of Vulkan Headers. This could be helpful in case your system Vulkan Headers are too new for your driver. If you set this to OFF, please make sure your system Vulkan Headers are supported by your driver." ON)
kompute_option_string(KOMPUTE_OPT_BUILT_IN_VULKAN_HEADER_TAG "The git tag used for the built-in Vulkan Headers when 'KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER' is enabled. A list of tags can be found here: https://github.com/KhronosGroup/Vulkan-Headers/tags" "v1.3.231")
message(STATUS "=======================================================")
@ -118,50 +108,36 @@ include(cmake/check_vulkan_version.cmake)
include(FetchContent)
# Vulkan Header
# We don't import Vulkan library if Android build as it is built dynamically
# Otherwise it is expected that the Vulkan SDK and dependencies are installed
# Has to happen AFTER using the build-in Vulkan headers to prevent multiple targets with the name Vulkan::Headers
if(KOMPUTE_OPT_ANDROID_BUILD)
add_library(vulkanAndroid INTERFACE)
set(VULKAN_INCLUDE_DIR ${ANDROID_NDK}/sources/third_party/vulkan/src/include)
target_sources(vulkanAndroid INTERFACE ${VULKAN_INCLUDE_DIR}/vulkan/vulkan.hpp)
target_include_directories(vulkanAndroid INTERFACE ${VULKAN_INCLUDE_DIR})
if(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER)
FetchContent_Declare(vulkan_header GIT_REPOSITORY https://github.com/KhronosGroup/Vulkan-Headers.git
GIT_TAG ${KOMPUTE_OPT_BUILT_IN_VULKAN_HEADER_TAG}) # Source: https://github.com/KhronosGroup/Vulkan-Headers/tags
FetchContent_MakeAvailable(vulkan_header)
target_compile_definitions(vulkanAndroid INTERFACE VK_NO_PROTOTYPES=1)
target_compile_definitions(vulkanAndroid INTERFACE VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
else()
if(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER)
FetchContent_Declare(vulkan_header GIT_REPOSITORY https://github.com/KhronosGroup/Vulkan-Headers.git
GIT_TAG ${KOMPUTE_OPT_BUILT_IN_VULKAN_HEADER_TAG}) # Source: https://github.com/KhronosGroup/Vulkan-Headers/tags
FetchContent_MakeAvailable(vulkan_header)
if(NOT KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK)
# Ensure the driver supports this Vulkan version
check_vulkan_version(INCLUDE_DIR "${vulkan_header_SOURCE_DIR}/include")
endif()
endif()
find_package(Vulkan REQUIRED)
if(Vulkan_FOUND AND NOT TARGET Vulkan::Headers)
add_library(Vulkan::Headers INTERFACE IMPORTED)
set_target_properties(Vulkan::Headers PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${Vulkan_INCLUDE_DIRS}")
endif()
if(NOT KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER AND NOT KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK)
if(NOT KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK)
# Ensure the driver supports this Vulkan version
check_vulkan_version(INCLUDE_DIR ${Vulkan_INCLUDE_DIR})
check_vulkan_version(INCLUDE_DIR "${vulkan_header_SOURCE_DIR}/include")
endif()
endif()
find_package(Vulkan REQUIRED)
if(Vulkan_FOUND AND NOT TARGET Vulkan::Headers)
add_library(Vulkan::Headers INTERFACE IMPORTED)
set_target_properties(Vulkan::Headers PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${Vulkan_INCLUDE_DIRS}")
endif()
if(NOT KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER AND NOT KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK)
# Ensure the driver supports this Vulkan version
check_vulkan_version(INCLUDE_DIR ${Vulkan_INCLUDE_DIR})
endif()
# Spdlog
if(KOMPUTE_OPT_USE_SPDLOG)
add_compile_definitions(KOMPUTE_OPT_USE_SPDLOG=1)
if(NOT KOMPUTE_OPT_LOG_LEVEL_DISABLED)
if(KOMPUTE_OPT_USE_BUILT_IN_SPDLOG)
set(SPDLOG_INSTALL ${KOMPUTE_OPT_INSTALL})
set(SPDLOG_BUILD_SHARED ${BUILD_SHARED_LIBS})
FetchContent_Declare(spdlog GIT_REPOSITORY https://github.com/gabime/spdlog.git
@ -175,86 +151,25 @@ endif()
# fmt
if(KOMPUTE_OPT_USE_BUILT_IN_FMT)
set(FMT_INSTALL ${KOMPUTE_OPT_INSTALL})
FetchContent_Declare(fmt GIT_REPOSITORY https://github.com/fmtlib/fmt.git
GIT_TAG 8.1.1) # Source: https://github.com/fmtlib/fmt/releases
GIT_TAG 10.0.0) # Source: https://github.com/fmtlib/fmt/releases
FetchContent_MakeAvailable(fmt)
else()
find_package(fmt REQUIRED)
endif()
# GoogleTest
if(KOMPUTE_OPT_BUILD_TESTS)
if(KOMPUTE_OPT_USE_BUILT_IN_GOOGLE_TEST)
FetchContent_Declare(googletest GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG release-1.11.0) # Source: https://github.com/google/googletest/releases
# Use a shared C runtime in case we build shared
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
add_library(gtest_int INTERFACE)
target_link_libraries(gtest_int INTERFACE gtest)
target_include_directories(gtest_int INTERFACE ${googletest_SOURCE_DIR}/include)
add_library(GTest::GTest ALIAS gtest_int)
# Group under the "tests/gtest" project folder in IDEs such as Visual Studio.
set_property(TARGET gtest PROPERTY FOLDER "tests/gtest")
set_property(TARGET gtest_main PROPERTY FOLDER "tests/gtest")
else()
find_package(GTest CONFIG REQUIRED)
endif()
endif()
# pybind11
if(KOMPUTE_OPT_BUILD_PYTHON)
if(KOMPUTE_OPT_USE_BUILT_IN_PYBIND11)
FetchContent_Declare(pybind GIT_REPOSITORY https://github.com/pybind/pybind11.git
GIT_TAG v2.9.2) # Source: https://github.com/pybind/pybind11/releases
FetchContent_MakeAvailable(pybind)
else()
find_package(pybind11 REQUIRED)
endif()
find_package(PythonLibs REQUIRED)
endif()
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
# ####################################################
# Preprocessor Macros
# ####################################################
if(KOMPUTE_OPT_ANDROID_BUILD)
add_compile_definitions(VK_USE_PLATFORM_ANDROID_KHR=1)
endif()
if(KOMPUTE_OPT_BUILD_PYTHON)
add_compile_definitions(KOMPUTE_BUILD_PYTHON=1)
endif()
if(KOMPUTE_OPT_DISABLE_VK_DEBUG_LAYERS)
add_compile_definitions(KOMPUTE_DISABLE_VK_DEBUG_LAYERS=1)
endif()
# ####################################################
# Misc Options
# ####################################################
if(KOMPUTE_OPT_INSTALL)
# Enable install parameters for glslang (overrides parameters passed)
# When install is enabled the glslang libraries become shared
set(ENABLE_GLSLANG_INSTALL ON CACHE BOOL "Enables install of glslang" FORCE)
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic -Werror")
endif()
if(KOMPUTE_OPT_CODE_COVERAGE)
if(NOT UNIX)
message(FATAL_ERROR "KOMPUTE_OPT_CODE_COVERAGE can only be enabled in unix based systems due to limitation on gcov.")
endif()
include(cmake/code_coverage.cmake)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic -Werror -Wno-error=array-bounds")
endif()
# If glslang is cloned, then SPIRV/GlslangToSpv.h will be used instead of glslang/SPIRV/GlslangToSpv.h
@ -269,18 +184,6 @@ function(kompute_make KOMPUTE_MAKE_TARGET)
COMMAND make -C ${PROJECT_SOURCE_DIR} ${KOMPUTE_MAKE_TARGET})
endfunction()
add_executable(xxd external/bin/xxd.c)
add_subdirectory(src)
if(KOMPUTE_OPT_BUILD_TESTS)
enable_testing()
add_subdirectory(test)
endif()
if(KOMPUTE_OPT_BUILD_DOCS)
set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/config" ${CMAKE_MODULE_PATH})
add_subdirectory(docs)
endif()
if(KOMPUTE_OPT_BUILD_PYTHON)
add_subdirectory(python)
endif()

1
scripts/convert_shaders.py Normal file → Executable file
View file

@ -1,3 +1,4 @@
#!/usr/bin/env python3
"""
Script to handle conversion of compute shaders to spirv and to headers
"""

View file

@ -49,18 +49,6 @@ Algorithm::destroy()
this->mPipeline = nullptr;
}
if (this->mFreePipelineCache && this->mPipelineCache) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
if (!this->mPipelineCache) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline cache but it is null");
}
this->mDevice->destroy(
*this->mPipelineCache,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPipelineCache = nullptr;
}
if (this->mFreePipelineLayout && this->mPipelineLayout) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
if (!this->mPipelineLayout) {
@ -91,17 +79,6 @@ Algorithm::destroy()
void
Algorithm::freeParameters()
{
if (this->mFreeDescriptorSet && this->mDescriptorSet) {
KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set");
if (!this->mDescriptorSet) {
KP_LOG_WARN(
"Kompute Algorithm Error requested to free descriptor set");
}
this->mDevice->freeDescriptorSets(
*this->mDescriptorPool, 1, this->mDescriptorSet.get());
this->mDescriptorSet = nullptr;
}
if (this->mFreeDescriptorSetLayout && this->mDescriptorSetLayout) {
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
if (!this->mDescriptorSetLayout) {
@ -113,43 +90,16 @@ Algorithm::freeParameters()
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mDescriptorSetLayout = nullptr;
}
if (this->mFreeDescriptorPool && this->mDescriptorPool) {
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool");
if (!this->mDescriptorPool) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"descriptor pool but it is null");
}
this->mDevice->destroy(
*this->mDescriptorPool,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mDescriptorPool = nullptr;
}
}
void
Algorithm::createParameters()
{
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
std::vector<vk::DescriptorPoolSize> descriptorPoolSizes = {
vk::DescriptorPoolSize(
vk::DescriptorType::eStorageBuffer,
static_cast<uint32_t>(this->mTensors.size()) // Descriptor count
)
};
vk::DescriptorPoolCreateInfo descriptorPoolInfo(
vk::DescriptorPoolCreateFlags(VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT),
1, // Max sets
static_cast<uint32_t>(descriptorPoolSizes.size()),
descriptorPoolSizes.data());
KP_LOG_DEBUG("Kompute Algorithm creating descriptor pool");
this->mDescriptorPool = std::make_shared<vk::DescriptorPool>();
this->mDevice->createDescriptorPool(
&descriptorPoolInfo, nullptr, this->mDescriptorPool.get());
this->mFreeDescriptorPool = true;
if (!*this->mDescriptorPool) {
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
return;
}
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
for (size_t i = 0; i < this->mTensors.size(); i++) {
@ -168,9 +118,15 @@ Algorithm::createParameters()
KP_LOG_DEBUG("Kompute Algorithm creating descriptor set layout");
this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
this->mDevice->createDescriptorSetLayout(
vk::Result result = this->mDevice->createDescriptorSetLayout(
&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
this->mFreeDescriptorSetLayout = true;
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to create descriptor set layout. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSetLayout = true;
KP_LOG_DEBUG("Successfully allocated descriptor set layout.");
}
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
*this->mDescriptorPool,
@ -179,8 +135,67 @@ Algorithm::createParameters()
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
this->mDescriptorSet.get());
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
}
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
for (size_t i = 0; i < this->mTensors.size(); i++) {
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
vk::DescriptorBufferInfo descriptorBufferInfo =
this->mTensors[i]->constructDescriptorBufferInfo();
computeWriteDescriptorSets.push_back(
vk::WriteDescriptorSet(*this->mDescriptorSet,
i, // Destination binding
0, // Destination array element
1, // Descriptor count
vk::DescriptorType::eStorageBuffer,
nullptr, // Descriptor image info
&descriptorBufferInfo));
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
nullptr);
}
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
}
void
Algorithm::updateParameters()
{
KP_LOG_DEBUG("Kompute Algorithm updateParameters started");
if (!*this->mDescriptorPool) {
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
return;
}
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
*this->mDescriptorPool,
1, // Descriptor set layout count
this->mDescriptorSetLayout.get());
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
vk::Result result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
this->mDescriptorSet.get());
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
}
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
@ -281,25 +296,15 @@ Algorithm::createPipeline()
"main",
&specializationInfo);
static std::shared_ptr<vk::PipelineCache> globalPipelineCache = std::make_shared<vk::PipelineCache>();
if(!*globalPipelineCache) {
vk::PipelineCacheCreateInfo pipelineCacheInfo =
vk::PipelineCacheCreateInfo();
this->mDevice->createPipelineCache(
&pipelineCacheInfo, nullptr, globalPipelineCache.get());
}
vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(),
shaderStage,
*this->mPipelineLayout,
vk::Pipeline(),
0);
this->mFreePipelineCache = false;
#ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE
vk::ResultValue<vk::Pipeline> pipelineResult =
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo);
this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo);
if (pipelineResult.result != vk::Result::eSuccess) {
throw std::runtime_error("Failed to create pipeline result: " +
@ -311,7 +316,7 @@ Algorithm::createPipeline()
this->mFreePipeline = true;
#else
vk::Pipeline pipeline =
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo)
this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo)
.value;
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
this->mFreePipeline = true;
@ -373,7 +378,6 @@ Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer)
void
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
{
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
// The dispatch size is set up based on either explicitly provided template

View file

@ -8,14 +8,16 @@ endif()
cmake_minimum_required(VERSION 3.20)
add_library(kompute Algorithm.cpp
add_library(kompute STATIC Algorithm.cpp
Manager.cpp
OpAlgoDispatch.cpp
OpMemoryBarrier.cpp
OpTensorCopy.cpp
OpTensorFill.cpp
OpTensorSyncDevice.cpp
OpTensorSyncLocal.cpp
OpBufferSyncDevice.cpp
OpBufferSyncLocal.cpp
Sequence.cpp
Tensor.cpp
Core.cpp)
@ -26,7 +28,8 @@ add_library(kompute::kompute ALIAS kompute)
set_target_properties(kompute
PROPERTIES
VERSION ${${PROJECT_NAME}_VERSION}
SOVERSION ${${PROJECT_NAME}_VERSION_MAJOR})
SOVERSION ${${PROJECT_NAME}_VERSION_MAJOR}
POSITION_INDEPENDENT_CODE TRUE)
# Import GNU common install directory variables
include(GNUInstallDirs)
@ -44,8 +47,8 @@ configure_package_config_file(${PROJECT_SOURCE_DIR}/cmake/komputeConfig.cmake.in
"${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake"
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute)
install(FILES ${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake
${PROJECT_BINARY_DIR}/kompute/komputeConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute)
#install(FILES ${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake
# ${PROJECT_BINARY_DIR}/kompute/komputeConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute)
# ####################################################
# Linking
@ -55,12 +58,12 @@ if(KOMPUTE_OPT_ANDROID_BUILD)
android
kp_logger
kp_shader
fmt::fmt)
fmt::fmt-header-only)
else()
target_link_libraries(kompute PUBLIC Vulkan::Vulkan
target_link_libraries(kompute PUBLIC
kp_logger
kp_shader
fmt::fmt)
fmt::fmt-header-only)
endif()
if(KOMPUTE_OPT_BUILD_PYTHON)
@ -71,6 +74,8 @@ endif()
if(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER)
target_link_libraries(kompute PUBLIC Vulkan-Headers)
else()
target_link_libraries(kompute PUBLIC Vulkan::Headers)
endif()
# ####################################################

View file

@ -2,7 +2,6 @@
#include "kompute/Core.hpp"
#if VK_USE_PLATFORM_ANDROID_KHR
#ifndef KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
#define KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
/**
@ -13,7 +12,6 @@
**/
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
#endif // !KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
#endif // VK_USE_PLATFORM_ANDROID_KHR
namespace kp {
} // namespace kp

View file

@ -33,13 +33,6 @@ debugMessageCallback(VkDebugReportFlagsEXT /*flags*/,
#endif
Manager::Manager()
: Manager(0)
{
}
Manager::Manager(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices,
const std::vector<std::string>& desiredExtensions)
{
this->mManageResources = true;
@ -47,26 +40,15 @@ Manager::Manager(uint32_t physicalDeviceIndex,
#if !KOMPUTE_OPT_LOG_LEVEL_DISABLED
logger::setupLogger();
#endif
this->createInstance();
this->createDevice(
familyQueueIndices, physicalDeviceIndex, desiredExtensions);
}
Manager::Manager(std::shared_ptr<vk::Instance> instance,
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
void Manager::initializeDevice(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices,
const std::vector<std::string>& desiredExtensions)
{
this->mManageResources = false;
this->mInstance = instance;
this->mPhysicalDevice = physicalDevice;
this->mDevice = device;
// Make sure the logger is setup
#if !KOMPUTE_OPT_LOG_LEVEL_DISABLED
logger::setupLogger();
#endif
this->createDevice(
familyQueueIndices, physicalDeviceIndex, desiredExtensions);
}
Manager::~Manager()
@ -98,15 +80,14 @@ Manager::destroy()
this->mManagedSequences.clear();
}
if (this->mManageResources && this->mManagedAlgorithms.size()) {
if (this->mManageResources && !this->mManagedAlgorithmsMap.empty()) {
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
for (const std::weak_ptr<Algorithm>& weakAlgorithm :
this->mManagedAlgorithms) {
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
for (const auto& kv : this->mManagedAlgorithmsMap) {
if (std::shared_ptr<Algorithm> algorithm = kv.second) {
algorithm->destroy();
}
}
this->mManagedAlgorithms.clear();
this->mManagedAlgorithmsMap.clear();
}
if (this->mManageResources && this->mManagedTensors.size()) {
@ -119,6 +100,18 @@ Manager::destroy()
this->mManagedTensors.clear();
}
if (this->mPipelineCache) {
KP_LOG_DEBUG("Kompute Manager Destroying pipeline cache");
if (!this->mPipelineCache) {
KP_LOG_WARN("Kompute Manager Error requested to destroy "
"pipeline cache but it is null");
}
this->mDevice->destroy(
*this->mPipelineCache,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPipelineCache = nullptr;
}
if (this->mFreeDevice) {
KP_LOG_INFO("Destroying device");
this->mDevice->destroy(
@ -179,6 +172,16 @@ Manager::createInstance()
applicationExtensions.data();
}
try {
mDynamicLoader = std::make_shared<vk::DynamicLoader>();
} catch (const std::exception & err) {
return;
}
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr =
mDynamicLoader->getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
KP_LOG_DEBUG("Kompute Manager adding debug validation layers");
// We'll identify the layers that are supported
@ -233,20 +236,18 @@ Manager::createInstance()
}
#endif
#if VK_USE_PLATFORM_ANDROID_KHR
vk::DynamicLoader dl;
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr =
dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
#endif // VK_USE_PLATFORM_ANDROID_KHR
this->mInstance = std::make_shared<vk::Instance>();
vk::createInstance(
vk::Result r = vk::createInstance(
&computeInstanceCreateInfo, nullptr, this->mInstance.get());
if (r != vk::Result::eSuccess) {
KP_LOG_ERROR(
"Kompute Manager Error allocating vulkan instance", vk::to_string(r));
this->mInstance = nullptr;
this->mFreeInstance = false;
return;
}
#if VK_USE_PLATFORM_ANDROID_KHR
VULKAN_HPP_DEFAULT_DISPATCHER.init(*this->mInstance);
#endif // VK_USE_PLATFORM_ANDROID_KHR
KP_LOG_DEBUG("Kompute Manager Instance Created");
@ -261,7 +262,7 @@ Manager::createInstance()
(PFN_vkDebugReportCallbackEXT)debugMessageCallback;
debugCreateInfo.flags = debugFlags;
this->mDebugDispatcher.init(*this->mInstance, &vkGetInstanceProcAddr);
this->mDebugDispatcher.init(*this->mInstance, vkGetInstanceProcAddr);
this->mDebugReportCallback =
this->mInstance->createDebugReportCallbackEXT(
debugCreateInfo, nullptr, this->mDebugDispatcher);
@ -278,12 +279,14 @@ Manager::clear()
end(this->mManagedTensors),
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
end(this->mManagedTensors));
this->mManagedAlgorithms.erase(
std::remove_if(
begin(this->mManagedAlgorithms),
end(this->mManagedAlgorithms),
[](std::weak_ptr<Algorithm> t) { return t.expired(); }),
end(this->mManagedAlgorithms));
for (auto it = this->mManagedAlgorithmsMap.begin();
it != this->mManagedAlgorithmsMap.end();) {
if (it->second) {
it = this->mManagedAlgorithmsMap.erase(it);
} else {
++it;
}
}
this->mManagedSequences.erase(
std::remove_if(begin(this->mManagedSequences),
end(this->mManagedSequences),
@ -338,7 +341,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
KP_LOG_INFO("Using physical device index {} found {}",
physicalDeviceIndex,
physicalDeviceProperties.deviceName);
physicalDeviceProperties.deviceName.data());
if (familyQueueIndices.empty()) {
// Find compute queue
@ -413,17 +416,39 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
fmt::join(validExtensions, ", "));
}
vk::PhysicalDeviceFeatures features;
features.shaderInt16 = true;
vk::PhysicalDeviceVulkan11Features features11;
features11.uniformAndStorageBuffer16BitAccess = true;
features11.storageBuffer16BitAccess = true;
features11.pNext = nullptr;
vk::PhysicalDeviceVulkan12Features features12;
features12.storageBuffer8BitAccess = true;
features12.uniformAndStorageBuffer8BitAccess = true;
features12.shaderFloat16 = true;
features12.shaderInt8 = true;
features12.pNext = &features11;
vk::DeviceCreateInfo deviceCreateInfo(vk::DeviceCreateFlags(),
deviceQueueCreateInfos.size(),
deviceQueueCreateInfos.data(),
{},
{},
validExtensions.size(),
validExtensions.data());
validExtensions.data(),
&features);
deviceCreateInfo.pNext = &features12;
this->mDevice = std::make_shared<vk::Device>();
physicalDevice.createDevice(
vk::Result r = physicalDevice.createDevice(
&deviceCreateInfo, nullptr, this->mDevice.get());
if (r != vk::Result::eSuccess) {
KP_LOG_ERROR("Kompute Manager could not create device");
}
KP_LOG_DEBUG("Kompute Manager device created");
for (const uint32_t& familyQueueIndex : this->mComputeQueueFamilyIndices) {
@ -439,6 +464,12 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
}
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
mPipelineCache = std::make_shared<vk::PipelineCache>();
vk::PipelineCacheCreateInfo pipelineCacheInfo =
vk::PipelineCacheCreateInfo();
this->mDevice->createPipelineCache(
&pipelineCacheInfo, nullptr, mPipelineCache.get());
}
std::shared_ptr<Sequence>

View file

@ -24,9 +24,9 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
this->mAlgorithm->getTensors()) {
tensor->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eComputeShader);
}

43
src/OpBufferSyncLocal.cpp Normal file
View file

@ -0,0 +1,43 @@
// SPDX-License-Identifier: Apache-2.0
#include "kompute/operations/OpBufferSyncLocal.hpp"
namespace kp {
OpBufferSyncLocal::OpBufferSyncLocal(
vk::Buffer *primaryBuffer,
vk::Buffer *stagingBuffer,
vk::DeviceSize size)
: mPrimaryBuffer(primaryBuffer)
, mStagingBuffer(stagingBuffer)
, mSize(size)
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal constructor with params");
}
OpBufferSyncLocal::~OpBufferSyncLocal()
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal destructor started");
}
void
OpBufferSyncLocal::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal record called");
vk::BufferCopy copyRegion(0, 0, mSize);
commandBuffer.copyBuffer(*mPrimaryBuffer, *mStagingBuffer, copyRegion);
}
void
OpBufferSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal preEval called");
}
void
OpBufferSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal postEval called");
}
}

47
src/OpTensorFill.cpp Normal file
View file

@ -0,0 +1,47 @@
// SPDX-License-Identifier: Apache-2.0
#include "kompute/operations/OpTensorFill.hpp"
#include "kompute/Tensor.hpp"
namespace kp {
OpTensorFill::OpTensorFill(const std::vector<std::shared_ptr<Tensor>>& tensors)
{
KP_LOG_DEBUG("Kompute OpTensorFill constructor with params");
if (tensors.size() < 1) {
throw std::runtime_error(
"Kompute OpTensorFill called with less than 1 tensor");
}
this->mTensors = tensors;
}
OpTensorFill::~OpTensorFill()
{
KP_LOG_DEBUG("Kompute OpTensorFill destructor started");
}
void
OpTensorFill::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorFill record called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
this->mTensors[i]->recordFill(commandBuffer, 0);
}
}
void
OpTensorFill::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorFill preEval called");
}
void
OpTensorFill::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorFill postEval called");
}
}

View file

@ -163,6 +163,9 @@ Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
void
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer)
{
if (!this->mStagingBuffer)
return;
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize);
@ -178,6 +181,9 @@ Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer)
void
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer)
{
if (!this->mStagingBuffer)
return;
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize);
@ -201,6 +207,13 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion);
}
void
Tensor::recordFill(const vk::CommandBuffer &commandBuffer,
uint32_t fill)
{
commandBuffer.fillBuffer(*this->mPrimaryBuffer, mOffset, this->memorySize(), fill);
}
void
Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
@ -225,6 +238,9 @@ Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
if (!this->mStagingBuffer)
return;
KP_LOG_DEBUG("Kompute Tensor recording STAGING buffer memory barrier");
this->recordBufferMemoryBarrier(commandBuffer,
@ -389,69 +405,6 @@ Tensor::destroy()
return;
}
#if 0 // FIXME: This all moves outside of Kompute
// Unmap the current memory data
if (this->tensorType() != Tensor::TensorTypes::eStorage) {
this->unmapRawData();
}
if (this->mFreePrimaryBuffer) {
if (!this->mPrimaryBuffer) {
KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer "
"but got null buffer");
} else {
KP_LOG_DEBUG("Kompose Tensor destroying primary buffer");
this->mDevice->destroy(
*this->mPrimaryBuffer,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPrimaryBuffer = nullptr;
this->mFreePrimaryBuffer = false;
}
}
if (this->mFreeStagingBuffer) {
if (!this->mStagingBuffer) {
KP_LOG_WARN("Kompose Tensor expected to destroy staging buffer "
"but got null buffer");
} else {
KP_LOG_DEBUG("Kompose Tensor destroying staging buffer");
this->mDevice->destroy(
*this->mStagingBuffer,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mStagingBuffer = nullptr;
this->mFreeStagingBuffer = false;
}
}
if (this->mFreePrimaryMemory) {
if (!this->mPrimaryMemory) {
KP_LOG_WARN("Kompose Tensor expected to free primary memory but "
"got null memory");
} else {
KP_LOG_DEBUG("Kompose Tensor freeing primary memory");
this->mDevice->freeMemory(
*this->mPrimaryMemory,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPrimaryMemory = nullptr;
this->mFreePrimaryMemory = false;
}
}
if (this->mFreeStagingMemory) {
if (!this->mStagingMemory) {
KP_LOG_WARN("Kompose Tensor expected to free staging memory but "
"got null memory");
} else {
KP_LOG_DEBUG("Kompose Tensor freeing staging memory");
this->mDevice->freeMemory(
*this->mStagingMemory,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mStagingMemory = nullptr;
this->mFreeStagingMemory = false;
}
}
#endif
if (this->mDevice) {
this->mDevice = nullptr;
}

View file

@ -21,14 +21,16 @@ target_sources(kompute PRIVATE
kompute/operations/OpMemoryBarrier.hpp
kompute/operations/OpMult.hpp
kompute/operations/OpTensorCopy.hpp
kompute/operations/OpTensorFill.hpp
kompute/operations/OpTensorSyncDevice.hpp
kompute/operations/OpTensorSyncLocal.hpp
kompute/operations/OpBufferSyncDevice.hpp
kompute/operations/OpBufferSyncLocal.hpp
kompute/logger/Logger.hpp
)
install(DIRECTORY kompute DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
#install(DIRECTORY kompute DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
# ####################################################
# Logger
@ -42,4 +44,4 @@ target_sources(kp_logger PRIVATE
kompute/logger/Logger.hpp
)
install(DIRECTORY logger DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
#install(DIRECTORY logger DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

View file

@ -36,6 +36,8 @@ class Algorithm
*/
template<typename S = float, typename P = float>
Algorithm(std::shared_ptr<vk::Device> device,
vk::PipelineCache *pipelineCache,
vk::DescriptorPool *pool,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
@ -45,6 +47,8 @@ class Algorithm
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
this->mDevice = device;
this->mPipelineCache = pipelineCache;
this->mDescriptorPool = pool;
if (tensors.size() && spirv.size()) {
KP_LOG_INFO(
@ -198,29 +202,15 @@ class Algorithm
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
uint32_t size = pushConstants.size();
this->setPushConstants(pushConstants.data(), size, memorySize);
}
void updateDescriptors(vk::DescriptorPool *pool)
{
this->mDescriptorPool = pool;
this->setWorkgroup(
this->mWorkgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
this->createParameters(); // TODO: See if we can reduce this
// for (size_t i = 0; i < this->mTensors.size(); i++) {
// std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
// vk::DescriptorBufferInfo descriptorBufferInfo =
// this->mTensors[i]->constructDescriptorBufferInfo();
// computeWriteDescriptorSets.push_back(
// vk::WriteDescriptorSet(*this->mDescriptorSet,
// i, // Destination binding
// 0, // Destination array element
// 1, // Descriptor count
// vk::DescriptorType::eStorageBuffer,
// nullptr, // Descriptor image info
// &descriptorBufferInfo));
// this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
// nullptr);
// }
this->updateParameters(); // TODO: See if we can reduce this
}
/**
@ -306,16 +296,14 @@ class Algorithm
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
bool mFreeDescriptorSetLayout = false;
std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
bool mFreeDescriptorPool = false;
vk::DescriptorPool *mDescriptorPool = nullptr;
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
bool mFreeDescriptorSet = false;
std::shared_ptr<vk::ShaderModule> mShaderModule;
bool mFreeShaderModule = false;
std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
bool mFreePipelineLayout = false;
std::shared_ptr<vk::PipelineCache> mPipelineCache;
bool mFreePipelineCache = false;
vk::PipelineCache *mPipelineCache = nullptr;
std::shared_ptr<vk::Pipeline> mPipeline;
bool mFreePipeline = false;
@ -336,6 +324,7 @@ class Algorithm
// Parameters
void freeParameters();
void createParameters();
void updateParameters();
};
} // End namespace kp

View file

@ -15,7 +15,7 @@ typedef std::vector<float> Constants;
#define KOMPUTE_VK_API_MAJOR_VERSION 1
#endif // KOMPUTE_VK_API_MAJOR_VERSION
#ifndef KOMPUTE_VK_API_MINOR_VERSION
#define KOMPUTE_VK_API_MINOR_VERSION 1
#define KOMPUTE_VK_API_MINOR_VERSION 2
#endif // KOMPUTE_VK_API_MINOR_VERSION
#define KOMPUTE_VK_API_VERSION \
VK_MAKE_VERSION( \

View file

@ -14,6 +14,8 @@
#include "operations/OpTensorSyncDevice.hpp"
#include "operations/OpTensorSyncLocal.hpp"
#include "operations/OpBufferSyncDevice.hpp"
#include "operations/OpBufferSyncLocal.hpp"
#include "operations/OpTensorFill.hpp"
// Will be build by CMake and placed inside the build directory
#include "ShaderLogisticRegression.hpp"

View file

@ -20,14 +20,30 @@ class Manager
{
public:
/**
Base constructor and default used which creates the base resources
including choosing the device 0 by default.
Base constructor.
*/
Manager();
/**
* Similar to base constructor but allows for further configuration to use
* when creating the Vulkan resources.
* Manager destructor which would ensure all owned resources are destroyed
* unless explicitly stated that resources should not be destroyed or freed.
*/
~Manager();
bool hasInstance() const {
return this->mInstance.get();
}
bool hasDevice() const {
return this->mDevice.get();
}
bool hasVulkan() const {
return this->mDynamicLoader.get();
}
/**
* Initialize a device.
*
* @param physicalDeviceIndex The index of the physical device to use
* @param familyQueueIndices (Optional) List of queue indices to add for
@ -35,29 +51,10 @@ class Manager
* @param desiredExtensions The desired extensions to load from
* physicalDevice
*/
Manager(uint32_t physicalDeviceIndex,
void initializeDevice(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices = {},
const std::vector<std::string>& desiredExtensions = {});
/**
* Manager constructor which allows your own vulkan application to integrate
* with the kompute use.
*
* @param instance Vulkan compute instance to base this application
* @param physicalDevice Vulkan physical device to use for application
* @param device Vulkan logical device to use for all base resources
* @param physicalDeviceIndex Index for vulkan physical device used
*/
Manager(std::shared_ptr<vk::Instance> instance,
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device);
/**
* Manager destructor which would ensure all owned resources are destroyed
* unless explicitly stated that resources should not be destroyed or freed.
*/
~Manager();
/**
* Create a managed sequence that will be destroyed by this manager
* if it hasn't been destroyed by its reference count going to zero.
@ -147,6 +144,8 @@ class Manager
* @returns Shared pointer with initialised algorithm
*/
std::shared_ptr<Algorithm> algorithm(
const std::string &name,
vk::DescriptorPool *pool,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
@ -154,7 +153,7 @@ class Manager
const std::vector<float>& pushConstants = {})
{
return this->algorithm<>(
tensors, spirv, workgroup, specializationConstants, pushConstants);
name, pool, tensors, spirv, workgroup, specializationConstants, pushConstants);
}
/**
@ -173,6 +172,8 @@ class Manager
*/
template<typename S = float, typename P = float>
std::shared_ptr<Algorithm> algorithm(
const std::string &name,
vk::DescriptorPool *pool,
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
@ -184,6 +185,8 @@ class Manager
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
this->mDevice,
mPipelineCache.get(),
pool,
tensors,
spirv,
workgroup,
@ -191,12 +194,24 @@ class Manager
pushConstants) };
if (this->mManageResources) {
this->mManagedAlgorithms.push_back(algorithm);
this->mManagedAlgorithmsMap.insert({name, algorithm});
}
return algorithm;
}
bool hasAlgorithm(const std::string &name) const {
return mManagedAlgorithmsMap.find(name) != mManagedAlgorithmsMap.end();
}
std::shared_ptr<Algorithm> getAlgorithm(const std::string &name) const {
auto it = mManagedAlgorithmsMap.find(name);
if (it != mManagedAlgorithmsMap.end()) {
return it->second;
}
return nullptr;
}
/**
* Destroy the GPU resources and all managed resources by manager.
**/
@ -232,6 +247,7 @@ class Manager
std::shared_ptr<vk::Device> device() const { return mDevice; }
std::shared_ptr<vk::PhysicalDevice> physicalDevice() const { return mPhysicalDevice; }
std::shared_ptr<vk::PipelineCache> pipelineCache() const { return mPipelineCache; }
private:
// -------------- OPTIONALLY OWNED RESOURCES
@ -239,15 +255,17 @@ class Manager
bool mFreeInstance = false;
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
std::shared_ptr<vk::Device> mDevice = nullptr;
std::shared_ptr<vk::DynamicLoader> mDynamicLoader = nullptr;
bool mFreeDevice = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::weak_ptr<Tensor>> mManagedTensors;
std::vector<std::weak_ptr<Sequence>> mManagedSequences;
std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
std::unordered_map<std::string, std::shared_ptr<Algorithm>> mManagedAlgorithmsMap;
std::vector<uint32_t> mComputeQueueFamilyIndices;
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
std::shared_ptr<vk::PipelineCache> mPipelineCache;
bool mManageResources = false;
@ -259,7 +277,7 @@ class Manager
// Create functions
void createInstance();
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
uint32_t hysicalDeviceIndex = 0,
uint32_t physicalDeviceIndex = 0,
const std::vector<std::string>& desiredExtensions = {});
};

View file

@ -118,6 +118,9 @@ class Tensor
void recordCopyFrom(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<Tensor> copyFromTensor);
void recordFill(const vk::CommandBuffer &commandBuffer,
uint32_t fill);
/**
* Records a copy from the internal staging memory to the device memory
* using an optional barrier to wait for the operation. This function would
@ -271,6 +274,7 @@ class Tensor
vk::Buffer *bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion);
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
const vk::Buffer& buffer,
vk::AccessFlagBits srcAccessMask,

View file

@ -1,8 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {

View file

@ -0,0 +1,50 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/operations/OpBase.hpp"
namespace kp {
class OpBufferSyncLocal : public OpBase
{
public:
OpBufferSyncLocal(
vk::Buffer *primaryBuffer,
vk::Buffer *stagingBuffer,
vk::DeviceSize size);
/**
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpBufferSyncLocal() override;
/**
* For device buffers, it records the copy command for the buffer to copy
* the data from its staging to device memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
vk::Buffer *mPrimaryBuffer;
vk::Buffer *mStagingBuffer;
vk::DeviceSize mSize;
};
} // End namespace kp

View file

@ -0,0 +1,58 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that fills the tensor
*/
class OpTensorFill : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan
* resources and the tensors that will be used in the operation.
*
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorFill(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpTensorFill() override;
/**
* Records the fill command for tensor.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
};
} // End namespace kp

View file

@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20)
set(LOGGER_SOURCES Logger.cpp)
add_library(kp_logger ${LOGGER_SOURCES})
add_library(kp_logger STATIC ${LOGGER_SOURCES})
# Define log levels in code
add_compile_definitions(KOMPUTE_LOG_LEVEL_TRACE=0)