#pragma once #include "kompute/Core.hpp" namespace kp { /** * Structured data used in GPU operations. * * Tensors are the base building block in Kompute to perform operations across * GPUs. Each tensor would have a respective Vulkan memory and buffer, which * would be used to store their respective data. The tensors can be used for GPU * data storage or transfer. */ class Tensor { public: /** * Type for tensors created: Device allows memory to be transferred from * staging buffers. Staging are host memory visible. Storage are device * visible but are not set up to transfer or receive data (only for shader * storage). */ enum class TensorTypes { eDevice = 0, ///< Type is device memory, source and destination eHost = 1, ///< Type is host memory, source and destination eStorage = 2, ///< Type is Device memory (only) }; enum class TensorDataTypes { eBool = 0, eInt = 1, eUnsignedInt = 2, eFloat = 3, eDouble = 4, }; /** * Constructor with data provided which would be used to create the * respective vulkan buffer and memory. * * @param physicalDevice The physical device to use to fetch properties * @param device The device to use to create the buffer and memory from * @param data Non-zero-sized vector of data that will be used by the * tensor * @param tensorTypes Type for the tensor which is of type TensorTypes */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, void* data, uint32_t elementTotalCount, uint32_t elementMemorySize, const TensorDataTypes& dataType, const TensorTypes& tensorType = TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they * have been provided externally. */ virtual ~Tensor(); /** * Function to trigger reinitialisation of the tensor buffer and memory with * new data as well as new potential device type. * * @param data Vector of data to use to initialise vector from * @param tensorType The type to use for the tensor */ void rebuild(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize); /** * Destroys and frees the GPU resources which include the buffer and memory. */ void destroy(); /** * Check whether tensor is initialized based on the created gpu resources. * * @returns Boolean stating whether tensor is initialized */ bool isInit(); /** * Retrieve the tensor type of the Tensor * * @return Tensor type of tensor */ TensorTypes tensorType(); /** * Records a copy from the memory of the tensor provided to the current * thensor. This is intended to pass memory into a processing, to perform * a staging buffer transfer, or to gather output (between others). * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param copyFromTensor Tensor to copy the data from */ void recordCopyFrom(const vk::CommandBuffer& commandBuffer, std::shared_ptr copyFromTensor); /** * Records a copy from the internal staging memory to the device memory * using an optional barrier to wait for the operation. This function would * only be relevant for kp::Tensors of type eDevice. * * @param commandBuffer Vulkan Command Buffer to record the commands into */ void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer); /** * Records a copy from the internal device memory to the staging memory * using an optional barrier to wait for the operation. This function would * only be relevant for kp::Tensors of type eDevice. * * @param commandBuffer Vulkan Command Buffer to record the commands into */ void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer); /** * Records the buffer memory barrier into the primary buffer and command buffer which * ensures that relevant data transfers are carried out correctly. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param srcAccessMask Access flags for source access mask * @param dstAccessMask Access flags for destination access mask * @param scrStageMask Pipeline stage flags for source stage mask * @param dstStageMask Pipeline stage flags for destination stage mask */ void recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::AccessFlagBits srcAccessMask, vk::AccessFlagBits dstAccessMask, vk::PipelineStageFlagBits srcStageMask, vk::PipelineStageFlagBits dstStageMask); /** * Records the buffer memory barrier into the staging buffer and command buffer which * ensures that relevant data transfers are carried out correctly. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param srcAccessMask Access flags for source access mask * @param dstAccessMask Access flags for destination access mask * @param scrStageMask Pipeline stage flags for source stage mask * @param dstStageMask Pipeline stage flags for destination stage mask */ void recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::AccessFlagBits srcAccessMask, vk::AccessFlagBits dstAccessMask, vk::PipelineStageFlagBits srcStageMask, vk::PipelineStageFlagBits dstStageMask); /** * Constructs a vulkan descriptor buffer info which can be used to specify * and reference the underlying buffer component of the tensor without * exposing it. * * @return Descriptor buffer info with own buffer */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); /** * Returns the size/magnitude of the Tensor, which will be the total number * of elements across all dimensions * * @return Unsigned integer representing the total number of elements */ // TODO: move to cpp uint32_t size() { return this->mSize; } // TODO: move to cpp uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; } // TODO: move to cpp uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; } /** * Retrieve the underlying data type of the Tensor * * @return Data type of tensor of type kp::Tensor::TensorDataTypes */ TensorDataTypes dataType() { return this->mDataType; } void* rawData() { return this->mRawData; } // TODO: move to cpp template T* data() { return (T*)this->mRawData; } template std::vector vector() { return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; } /** * Sets / resets the vector data of the tensor. This function does not * perform any copies into GPU memory and is only performed on the host. */ void setRawData(const void* data) { // Copy data memcpy(this->mRawData, data, this->memorySize()); } protected: // -------------- ALWAYS OWNED RESOURCES TensorTypes mTensorType; TensorDataTypes mDataType; uint32_t mSize; uint32_t mDataTypeMemorySize; void* mRawData; private: void mapRawData() { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); std::shared_ptr hostVisibleMemory = nullptr; if (this->mTensorType == TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; } else { KP_LOG_WARN( "Kompute Tensor mapping data not supported on storage tensor"); return; } vk::DeviceSize bufferSize = this->memorySize(); // Given we request coherent host memory we don't need to invalidate / // flush this->mRawData = this->mDevice->mapMemory( *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); vk::MappedMemoryRange mappedMemoryRange( *hostVisibleMemory, 0, bufferSize); } void unmapRawData() { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); std::shared_ptr hostVisibleMemory = nullptr; if (this->mTensorType == TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; } else { KP_LOG_WARN( "Kompute Tensor mapping data not supported on storage tensor"); return; } vk::DeviceSize bufferSize = this->memorySize(); vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); this->mDevice->flushMappedMemoryRanges(1, &mappedRange); this->mDevice->unmapMemory(*hostVisibleMemory); } // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mPrimaryBuffer; bool mFreePrimaryBuffer = false; std::shared_ptr mStagingBuffer; bool mFreeStagingBuffer = false; std::shared_ptr mPrimaryMemory; bool mFreePrimaryMemory = false; std::shared_ptr mStagingMemory; bool mFreeStagingMemory = false; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); void allocateBindMemory(std::shared_ptr buffer, std::shared_ptr memory, vk::MemoryPropertyFlags memoryPropertyFlags); void recordCopyBuffer(const vk::CommandBuffer& commandBuffer, std::shared_ptr bufferFrom, std::shared_ptr bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion); void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, const vk::Buffer& buffer, vk::AccessFlagBits srcAccessMask, vk::AccessFlagBits dstAccessMask, vk::PipelineStageFlagBits srcStageMask, vk::PipelineStageFlagBits dstStageMask); // Private util functions vk::BufferUsageFlags getPrimaryBufferUsageFlags(); vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); }; // TODO: Limit T to be only float, bool, double, etc template class TensorT : public Tensor { public: TensorT(std::shared_ptr physicalDevice, std::shared_ptr device, const std::vector& data, const TensorTypes& tensorType = TensorTypes::eDevice) : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType(), tensorType) { KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size()); } ~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); } T* data() { return (T*)this->mRawData; } std::vector vector() { return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; } T& operator[](int index) { return *(((T*)this->mRawData) + index); } void setData(const std::vector& data) { KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size()); if (data.size() != this->mSize) { throw std::runtime_error( "Kompute TensorT Cannot set data of different sizes"); } Tensor::setRawData(data.data()); } TensorDataTypes dataType(); }; } // End namespace kp