#pragma once #include "kompute/Core.hpp" namespace kp { /** * Structured data used in GPU operations. * * Tensors are the base building block in Kompute to perform operations across * GPUs. Each tensor would have a respective Vulkan memory and buffer, which * would be used to store their respective data. The tensors can be used for GPU * data storage or transfer. */ class Tensor { public: /** * Type for tensors created: Device allows memory to be transferred from * staging buffers. Staging are host memory visible. Storage are device * visible but are not set up to transfer or receive data (only for shader * storage). */ enum class TensorTypes { eDevice = 0, ///< Type is device memory, source and destination eHost = 1, ///< Type is host memory, source and destination eStorage = 2, ///< Type is Device memory (only) }; enum class TensorDataTypes { eBool = 0, eInt = 1, eUnsignedInt = 2, eFloat = 3, eDouble = 4, }; /** * Constructor with data provided which would be used to create the * respective vulkan buffer and memory. * * @param physicalDevice The physical device to use to fetch properties * @param device The device to use to create the buffer and memory from * @param data Non-zero-sized vector of data that will be used by the * tensor * @param tensorTypes Type for the tensor which is of type TensorTypes */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, void* data, uint32_t elementTotalCount, uint32_t elementMemorySize, const TensorDataTypes& dataType = TensorDataTypes::eFloat, const TensorTypes& tensorType = TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they * have been provided externally. */ ~Tensor(); /** * Returns the size/magnitude of the Tensor, which will be the total number * of elements across all dimensions * * @return Unsigned integer representing the total number of elements */ // TODO: move to cpp virtual uint32_t size() { return this->mElementMemorySize; } // TODO: move to cpp virtual uint32_t memorySize() { return this->mSize * this->mElementMemorySize; } /** * Retrieve the underlying data type of the Tensor * * @return Data type of tensor of type kp::Tensor::TensorDataTypes */ virtual TensorDataTypes dataType() { return this->mDataType; } /** * Maps data from the Host Visible GPU memory into the data vector. It * requires the Tensor to be of staging type for it to work. */ virtual void mapDataFromHostMemory(); /** * Maps data from the data vector into the Host Visible GPU memory. It * requires the tensor to be of staging type for it to work. */ virtual void mapDataIntoHostMemory(); // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView // TODO: move to cpp void getRawData(void* data) { this->rawMapDataFromHostMemory(data); } /** * Function to trigger reinitialisation of the tensor buffer and memory with * new data as well as new potential device type. * * @param data Vector of data to use to initialise vector from * @param tensorType The type to use for the tensor */ void rebuild(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize, const TensorDataTypes& dataType = TensorDataTypes::eFloat, TensorTypes tensorType = TensorTypes::eDevice); /** * Destroys and frees the GPU resources which include the buffer and memory. */ void destroy(); /** * Check whether tensor is initialized based on the created gpu resources. * * @returns Boolean stating whether tensor is initialized */ bool isInit(); /** * Retrieve the tensor type of the Tensor * * @return Tensor type of tensor */ TensorTypes tensorType(); /** * Sets / resets the vector data of the tensor. This function does not * perform any copies into GPU memory and is only performed on the host. */ void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) { if (elementTotalCount * elementMemorySize != this->memorySize()) { throw std::runtime_error( "Kompute Tensor Cannot set data of different sizes"); } this->mSize = elementTotalCount; this->mElementMemorySize = elementMemorySize; this->rawMapDataIntoHostMemory(data); } /** * Records a copy from the memory of the tensor provided to the current * thensor. This is intended to pass memory into a processing, to perform * a staging buffer transfer, or to gather output (between others). * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param copyFromTensor Tensor to copy the data from * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ void recordCopyFrom(const vk::CommandBuffer& commandBuffer, std::shared_ptr copyFromTensor, bool createBarrier); /** * Records a copy from the internal staging memory to the device memory * using an optional barrier to wait for the operation. This function would * only be relevant for kp::Tensors of type eDevice. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer, bool createBarrier); /** * Records a copy from the internal device memory to the staging memory * using an optional barrier to wait for the operation. This function would * only be relevant for kp::Tensors of type eDevice. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer, bool createBarrier); /** * Records the buffer memory barrier into the command buffer which * ensures that relevant data transfers are carried out correctly. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param srcAccessMask Access flags for source access mask * @param dstAccessMask Access flags for destination access mask * @param scrStageMask Pipeline stage flags for source stage mask * @param dstStageMask Pipeline stage flags for destination stage mask */ void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::AccessFlagBits srcAccessMask, vk::AccessFlagBits dstAccessMask, vk::PipelineStageFlagBits srcStageMask, vk::PipelineStageFlagBits dstStageMask); /** * Constructs a vulkan descriptor buffer info which can be used to specify * and reference the underlying buffer component of the tensor without * exposing it. * * @return Descriptor buffer info with own buffer */ vk::DescriptorBufferInfo constructDescriptorBufferInfo(); private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mPrimaryBuffer; bool mFreePrimaryBuffer = false; std::shared_ptr mStagingBuffer; bool mFreeStagingBuffer = false; std::shared_ptr mPrimaryMemory; bool mFreePrimaryMemory = false; std::shared_ptr mStagingMemory; bool mFreeStagingMemory = false; // -------------- ALWAYS OWNED RESOURCES TensorTypes mTensorType; TensorDataTypes mDataType; uint32_t mSize; uint32_t mElementMemorySize; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); void allocateBindMemory(std::shared_ptr buffer, std::shared_ptr memory, vk::MemoryPropertyFlags memoryPropertyFlags); void recordCopyBuffer(const vk::CommandBuffer& commandBuffer, std::shared_ptr bufferFrom, std::shared_ptr bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier); // Private util functions vk::BufferUsageFlags getPrimaryBufferUsageFlags(); vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); void rawMapDataFromHostMemory(void* data) { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); std::shared_ptr hostVisibleMemory = nullptr; if (this->mTensorType == TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; } else { KP_LOG_WARN( "Kompute Tensor mapping data not supported on storage tensor"); return; } vk::DeviceSize bufferSize = this->memorySize(); void* mapped = this->mDevice->mapMemory( *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange); memcpy(data, mapped, bufferSize); this->mDevice->unmapMemory(*hostVisibleMemory); } void rawMapDataIntoHostMemory(void* data) { KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer"); std::shared_ptr hostVisibleMemory = nullptr; if (this->mTensorType == TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; } else { KP_LOG_WARN( "Kompute Tensor mapping data not supported on storage tensor"); return; } vk::DeviceSize bufferSize = this->memorySize(); void* mapped = this->mDevice->mapMemory( *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); memcpy(mapped, data, bufferSize); vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); this->mDevice->flushMappedMemoryRanges(1, &mappedRange); this->mDevice->unmapMemory(*hostVisibleMemory); } }; // TODO: Limit T to be only float, bool, double, etc template class TensorView: public Tensor { public: TensorView(std::shared_ptr physicalDevice, std::shared_ptr device, const std::vector& data, const TensorTypes& tensorType = TensorTypes::eDevice); ~TensorView(); void rebuild(const std::vector& data, TensorTypes tensorType = TensorTypes::eDevice) { this->mData = data; Tensor::rebuild(data.data(), data.size(), sizeof(T), this->dataType(), tensorType); } std::vector& data() { return this->mData; } T& operator[](int index) { return this->mData[index]; } void setData(const std::vector& data) { if (data.size() != this->mData.size()) { throw std::runtime_error( "Kompute TensorView Cannot set data of different sizes"); } this->mData = data; this->setRawData(this->mData.data(), this->mData.size(), sizeof(T), this->dataType()); } TensorDataTypes dataType() override; uint32_t size() override { return this->mData->size(); } uint32_t memorySize() override { return this->mData->size() * sizeof(T); } /** * Maps data from the Host Visible GPU memory into the data vector. It * requires the Tensor to be of staging type for it to work. */ void mapDataFromHostMemory() override { KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data"); this->rawMapDataFromHostMemory(this->mData.data()); } /** * Maps data from the data vector into the Host Visible GPU memory. It * requires the tensor to be of staging type for it to work. */ void mapDataIntoHostMemory() override { KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data"); this->rawMapDataIntoHostMemory(this->mData.data()); } private: // -------------- ALWAYS OWNED RESOURCES std::vector mData; }; template<> Tensor::TensorDataTypes TensorView::dataType() { return Tensor::TensorDataTypes::eBool; } template<> Tensor::TensorDataTypes TensorView::dataType() { return Tensor::TensorDataTypes::eInt; } template<> Tensor::TensorDataTypes TensorView::dataType() { return Tensor::TensorDataTypes::eUnsignedInt; } template<> Tensor::TensorDataTypes TensorView::dataType() { return Tensor::TensorDataTypes::eFloat; } template<> Tensor::TensorDataTypes TensorView::dataType() { return Tensor::TensorDataTypes::eDouble; } } // End namespace kp