Fix second implementation, add benchmark script
* Third implementation is broken (WIP)
This commit is contained in:
parent
6f04eb9db2
commit
7f4ec27235
6 changed files with 347 additions and 40 deletions
97
examples/python_naive_matmul/matmul_plot.py
Normal file
97
examples/python_naive_matmul/matmul_plot.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
from argparse import ArgumentParser
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def plot_tensor(window_name: str, tensor: np.ndarray, coord_highlight: tuple[int, int] = None):
|
||||
font_size = 48
|
||||
image = np.zeros((tensor.shape[1] * font_size, tensor.shape[0] * font_size, 3), dtype=np.uint8)
|
||||
|
||||
for y in range(tensor.shape[1]):
|
||||
for x in range(tensor.shape[0]):
|
||||
if coord_highlight and x == coord_highlight[1] and y == coord_highlight[0]:
|
||||
cv2.putText(
|
||||
image, str(int(tensor[y, x])), (x * font_size, int((y + 0.8) * font_size)),
|
||||
cv2.FONT_HERSHEY_TRIPLEX, 1., (127, 127, 255))
|
||||
else:
|
||||
cv2.putText(
|
||||
image, str(int(tensor[y, x])), (x * font_size, int((y + 0.8) * font_size)),
|
||||
cv2.FONT_HERSHEY_TRIPLEX, 1., (255, 255, 255))
|
||||
|
||||
cv2.imshow(window_name, image)
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('tensor_size', type=int, help='Size of the square tensors')
|
||||
parser.add_argument('tile_size', type=int)
|
||||
parser.add_argument('local_size', type=int, nargs=2)
|
||||
parser.add_argument('workgroup', type=int, nargs=2)
|
||||
arguments = parser.parse_args()
|
||||
|
||||
tensor_size: int = arguments.tensor_size
|
||||
tile_size: int = arguments.tile_size
|
||||
local_size: tuple[int, int, int] = tuple(arguments.local_size)
|
||||
workgroup: tuple[int, int, int] = tuple(arguments.workgroup)
|
||||
|
||||
tensor_shape = (tensor_size, tensor_size)
|
||||
tensor_1 = np.triu(np.ones(tensor_shape))
|
||||
tensor_2 = np.triu(np.ones(tensor_shape))
|
||||
tensor_out = np.zeros(tensor_shape)
|
||||
tensor_test_1 = np.zeros(tensor_shape)
|
||||
tensor_test_2 = np.zeros(tensor_shape)
|
||||
tensor_test_3 = np.zeros(tensor_shape)
|
||||
tensor_test_4 = np.zeros(tensor_shape)
|
||||
tensor_test_5 = np.zeros(tensor_shape)
|
||||
|
||||
plot_tensor('tensor_1', tensor_1)
|
||||
plot_tensor('tensor_2', tensor_2)
|
||||
plot_tensor('tensor_out', tensor_out)
|
||||
plot_tensor('tensor_test_1', tensor_test_1)
|
||||
plot_tensor('tensor_test_2', tensor_test_2)
|
||||
plot_tensor('tensor_test_3', tensor_test_3)
|
||||
plot_tensor('tensor_test_4', tensor_test_4)
|
||||
plot_tensor('tensor_test_5', tensor_test_5)
|
||||
cv2.waitKey(-1)
|
||||
|
||||
print(f'{workgroup=} {local_size=}')
|
||||
for workgroup_x in range(workgroup[0]):
|
||||
for workgroup_y in range(workgroup[1]):
|
||||
for invocation_x in range(workgroup_x * local_size[0], (workgroup_x + 1) * local_size[0]):
|
||||
for invocation_y in range(workgroup_y * local_size[1], (workgroup_y + 1) * local_size[1]):
|
||||
row = invocation_x
|
||||
col = invocation_y
|
||||
globalRow = (tile_size * workgroup_x) + row
|
||||
globalCol = (tile_size * workgroup_y) + col
|
||||
try:
|
||||
tensor_out[row, col] = row
|
||||
tensor_test_1[row, col] = col
|
||||
tensor_test_2[row, col] = workgroup_x
|
||||
tensor_test_3[row, col] = workgroup_y
|
||||
tensor_test_4[row, col] = globalRow
|
||||
tensor_test_5[row, col] = globalCol
|
||||
plot_tensor('tensor_out', tensor_out, (row, col))
|
||||
plot_tensor('tensor_test_1', tensor_test_1, (row, col))
|
||||
plot_tensor('tensor_test_2', tensor_test_2, (row, col))
|
||||
plot_tensor('tensor_test_3', tensor_test_3, (row, col))
|
||||
plot_tensor('tensor_test_4', tensor_test_4, (row, col))
|
||||
plot_tensor('tensor_test_5', tensor_test_5, (row, col))
|
||||
cv2.waitKey(-1)
|
||||
except IndexError as error:
|
||||
print(f'{workgroup_x=} {workgroup_y=} {row=} {col=}')
|
||||
raise error
|
||||
|
||||
plot_tensor('tensor_1', tensor_1)
|
||||
plot_tensor('tensor_2', tensor_2)
|
||||
plot_tensor('tensor_out', tensor_out)
|
||||
plot_tensor('tensor_test_1', tensor_test_1)
|
||||
plot_tensor('tensor_test_2', tensor_test_2)
|
||||
plot_tensor('tensor_test_3', tensor_test_3)
|
||||
plot_tensor('tensor_test_4', tensor_test_4)
|
||||
plot_tensor('tensor_test_5', tensor_test_5)
|
||||
cv2.waitKey(-1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue