Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions PyTorchSimFrontend/extension_codecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def __init__(self, message="SPAD overflow occurred."):
super().__init__(message)

class TileSizeError(Exception):
def __init__(self, message="SPAD overflow occurred."):
def __init__(self, message="Tile size constraint violated."):
super().__init__(message)

class MLIRCodeCache:
Expand Down Expand Up @@ -243,12 +243,10 @@ def load(cls, source_code,
cycle_list = cyclesim.compile_and_simulate(os.path.join(write_path, cycle_binary_name), vectorlane_size, silent_mode=silent_mode)

# Create TOG
w_offset, x_offset = vectorlane_size, vectorlane_size
x_offset = vectorlane_size
if kwargs['loop_size'] is not None and kwargs['loop_size'][-3] < vectorlane_size:
x_offset = kwargs['loop_size'][-3]
if kwargs['loop_size'] is not None and kwargs['loop_size'][-1] < vectorlane_size:
w_offset = kwargs['loop_size'][-1]
w_offset = 0 # max(w_offset - x_offset, 0)
w_offset = 0
tile_graph_generator = tog_generator(origins)
tile_graph_generator.load_file(raw_tog_path)
tile_graph_generator.generate_tile_graph(
Expand Down
65 changes: 0 additions & 65 deletions PyTorchSimFrontend/extension_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,71 +113,6 @@ def generate_outer_product_matrix(a, b, M, K, N, prefix, dir_path):
address_matrix_c=address_matrix_b+(n_nonzeros*data_width)
return 0, address_matrix_b, address_matrix_c

def generate_inner_product_matrix(a, b, M, K, N, file_name, in_file_bitmap_a, in_file_bitmap_b):
data_width = 4
a_cpu = a.cpu()
b_cpu = b.cpu()
matrixA_size=int(M*K)
matrixB_size=int(N*K)
matrixC_size=int(M*N)

random.seed(a=0, version=2)

address_matrix_a = 0
with open(file_name, "w") as fd, open(in_file_bitmap_a, "w") as fbA, open(in_file_bitmap_b, "w") as fbB:
#generating matrixA
n_nonzeros=0
for m in range(M): # Row major
for k in range(K):
is_sparse = a_cpu[m,k]
if(torch.isclose(is_sparse, torch.zeros(1), atol=1e-1)):
if((m==(M-1)) and (k==(K-1))):
fbA.write(str(1))
else:
fbA.write(str(1)+","); #writing a 1 in bitmap
ba = bytearray(struct.pack(">f", is_sparse)) # generating list of bytes
my_int = int.from_bytes(ba, "big")
fd.write(str(my_int))
fd.write(",")
n_nonzeros+=1
else:
if((m==(M-1)) and (k==(K-1))): # this is to insert a comma
fbA.write(str(0))
# note no data element is inserted in this case
else:
# note no data element is inserted in this case
fbA.write(str(0)+",")

address_matrix_b=n_nonzeros*data_width
#Generating matrix B
n_nonzeros=0
bitmapB=list(range(0,matrixB_size))
for n in range(0,N): # Row major
for k in range(0,K):
is_sparse = b_cpu[k,n]
if(torch.isclose(is_sparse, torch.zeros(1), atol=1e-1)): # value is generated
bitmapB[k*N+n]=1
ba = bytearray(struct.pack(">f", float(is_sparse))) # generating list of bytes
my_int = int.from_bytes(ba, "big")
fd.write(str(my_int))
fd.write(",")
n_nonzeros+=1
else:
# no data element is inserted in this case
bitmapB[k*N+n]=0; #writing a 0
# writing the bitmapB in the appropiate order
for i in range(0, matrixB_size):
fbB.write(str(bitmapB[i]))
if(i < (matrixB_size-1)):
fbB.write(",")

fd.write(str(0)) # Adding a final 0 to the memory which will never be used. This is just to avoid having a last comma.
address_matrix_c=address_matrix_b+(n_nonzeros*data_width)
print("Offset matrix A: "+str(address_matrix_a))
print("Offset matrix B: "+str(address_matrix_b))
print("Offset matrix C: "+str(address_matrix_c))
return address_matrix_a, matrixA_size, matrixA_size+matrixB_size

def prepare_outer_product_matrix(a, b, out):
M, K, N = a.shape[0], b.shape[0], b.shape[1]

Expand Down
4 changes: 1 addition & 3 deletions PyTorchSimFrontend/mlir/mlir_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

from typing import Callable

import sympy

from torch.utils._sympy.value_ranges import ValueRanges
from torch._inductor.utils import (
get_sympy_Expr_dtype,
Expand Down Expand Up @@ -553,7 +551,7 @@ def get_name(self) -> str: return self.name
def get_tile_size(self): return list(self._tile_size)
def get_tile_stride(self): return list(self._tile_stride)
def get_numel(self) -> int :return math.prod(self._tile_size)
def get_nr_dim(self) -> str: return len(self._tile_size)
def get_nr_dim(self) -> int: return len(self._tile_size)
def get_reduction_numel(self): return reduce(mul, self.get_tile_size()[-1*self.nr_rdim:], 1)

def set_tile_size(self, tile_size, tile_axis_order=None, constraints=None):
Expand Down
4 changes: 2 additions & 2 deletions PyTorchSimFrontend/mlir/mlir_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def can_fuse_with_exceptions(self, node1: BaseSchedulerNode, node2: BaseSchedule
return False

if list(node1.read_writes.writes)[0].name in [dep.name for dep in node2.read_writes.reads]:
node1 = self.revert_group(node1)
self.revert_group(node1)
return True
return self.scheduler.can_fuse_origin(node1, node2)

Expand Down Expand Up @@ -182,7 +182,7 @@ def can_fuse_horizontal(self, node1, node2):
try:
stride = [i.strip()[:-1].split(",")[-1].strip() for i in str(node2.get_nodes()[0].node).split("\n") if "r0" in i][1]
stride = int(sympify(stride).coeff(target_symbol))
except:
except Exception:
return False

# We can't fuse dim=-1 & N == 1
Expand Down
Loading