llama-cpp-turboquant/src/include/kompute/Tensor.hpp
2021-03-09 08:06:27 +00:00

359 lines
13 KiB
C++

#pragma once
#include "kompute/Core.hpp"
namespace kp {
/**
* Structured data used in GPU operations.
*
* Tensors are the base building block in Kompute to perform operations across
* GPUs. Each tensor would have a respective Vulkan memory and buffer, which
* would be used to store their respective data. The tensors can be used for GPU
* data storage or transfer.
*/
class Tensor
{
public:
/**
* Type for tensors created: Device allows memory to be transferred from
* staging buffers. Staging are host memory visible. Storage are device
* visible but are not set up to transfer or receive data (only for shader
* storage).
*/
enum class TensorTypes
{
eDevice = 0, ///< Type is device memory, source and destination
eHost = 1, ///< Type is host memory, source and destination
eStorage = 2, ///< Type is Device memory (only)
};
enum class TensorDataTypes
{
eBool = 0,
eInt = 1,
eUnsignedInt = 2,
eFloat = 3,
eDouble = 4,
};
/**
* Constructor with data provided which would be used to create the
* respective vulkan buffer and memory.
*
* @param physicalDevice The physical device to use to fetch properties
* @param device The device to use to create the buffer and memory from
* @param data Non-zero-sized vector of data that will be used by the
* tensor
* @param tensorTypes Type for the tensor which is of type TensorTypes
*/
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize,
const TensorDataTypes& dataType,
const TensorTypes& tensorType = TensorTypes::eDevice);
/**
* Destructor which is in charge of freeing vulkan resources unless they
* have been provided externally.
*/
virtual ~Tensor();
/**
* Function to trigger reinitialisation of the tensor buffer and memory with
* new data as well as new potential device type.
*
* @param data Vector of data to use to initialise vector from
* @param tensorType The type to use for the tensor
*/
void rebuild(void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize);
/**
* Destroys and frees the GPU resources which include the buffer and memory.
*/
void destroy();
/**
* Check whether tensor is initialized based on the created gpu resources.
*
* @returns Boolean stating whether tensor is initialized
*/
bool isInit();
/**
* Retrieve the tensor type of the Tensor
*
* @return Tensor type of tensor
*/
TensorTypes tensorType();
/**
* Records a copy from the memory of the tensor provided to the current
* thensor. This is intended to pass memory into a processing, to perform
* a staging buffer transfer, or to gather output (between others).
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param copyFromTensor Tensor to copy the data from
*/
void recordCopyFrom(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<Tensor> copyFromTensor);
/**
* Records a copy from the internal staging memory to the device memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
*/
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer);
/**
* Records a copy from the internal device memory to the staging memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
*/
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer);
/**
* Records the buffer memory barrier into the primary buffer and command buffer which
* ensures that relevant data transfers are carried out correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Records the buffer memory barrier into the staging buffer and command buffer which
* ensures that relevant data transfers are carried out correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Constructs a vulkan descriptor buffer info which can be used to specify
* and reference the underlying buffer component of the tensor without
* exposing it.
*
* @return Descriptor buffer info with own buffer
*/
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
/**
* Returns the size/magnitude of the Tensor, which will be the total number
* of elements across all dimensions
*
* @return Unsigned integer representing the total number of elements
*/
// TODO: move to cpp
uint32_t size() { return this->mSize; }
// TODO: move to cpp
uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; }
// TODO: move to cpp
uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; }
/**
* Retrieve the underlying data type of the Tensor
*
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
*/
TensorDataTypes dataType() { return this->mDataType; }
void* rawData() { return this->mRawData; }
// TODO: move to cpp
template<typename T>
T* data()
{
return (T*)this->mRawData;
}
template<typename T>
std::vector<T> vector()
{
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
/**
* Sets / resets the vector data of the tensor. This function does not
* perform any copies into GPU memory and is only performed on the host.
*/
void setRawData(const void* data)
{
// Copy data
memcpy(this->mRawData, data, this->memorySize());
}
protected:
// -------------- ALWAYS OWNED RESOURCES
TensorTypes mTensorType;
TensorDataTypes mDataType;
uint32_t mSize;
uint32_t mDataTypeMemorySize;
void* mRawData;
private:
void mapRawData()
{
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
// Given we request coherent host memory we don't need to invalidate /
// flush
this->mRawData = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
vk::MappedMemoryRange mappedMemoryRange(
*hostVisibleMemory, 0, bufferSize);
}
void unmapRawData()
{
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}
vk::DeviceSize bufferSize = this->memorySize();
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*hostVisibleMemory);
}
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
std::shared_ptr<vk::Device> mDevice;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Buffer> mPrimaryBuffer;
bool mFreePrimaryBuffer = false;
std::shared_ptr<vk::Buffer> mStagingBuffer;
bool mFreeStagingBuffer = false;
std::shared_ptr<vk::DeviceMemory> mPrimaryMemory;
bool mFreePrimaryMemory = false;
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
bool mFreeStagingMemory = false;
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
vk::BufferUsageFlags bufferUsageFlags);
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
std::shared_ptr<vk::DeviceMemory> memory,
vk::MemoryPropertyFlags memoryPropertyFlags);
void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion);
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
const vk::Buffer& buffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
// Private util functions
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
};
// TODO: Limit T to be only float, bool, double, etc
template<typename T>
class TensorT : public Tensor
{
public:
TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<T>& data,
const TensorTypes& tensorType = TensorTypes::eDevice)
: Tensor(physicalDevice,
device,
(void*)data.data(),
data.size(),
sizeof(T),
this->dataType(),
tensorType)
{
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}",
data.size());
}
~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); }
T* data() { return (T*)this->mRawData; }
std::vector<T> vector()
{
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
T& operator[](int index) { return *(((T*)this->mRawData) + index); }
void setData(const std::vector<T>& data)
{
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}",
data.size());
if (data.size() != this->mSize) {
throw std::runtime_error(
"Kompute TensorT Cannot set data of different sizes");
}
Tensor::setRawData(data.data());
}
TensorDataTypes dataType();
};
} // End namespace kp