Newer
Older
block_list = []
for level in range(num_levels):
block_list.append(get_block_size(point, layer, level))
bank_list.append(get_bank_size(point, layer, level))
return [bank_list, block_list]
def fit_in_level(cap, blocks, invalid_underutilized, level, memory_partitions):
"""
Check if the current level mem size >= current level loop blocking size
invalid_underutilized is used to exclude mapping points with too low memory utilization (< 50%)
#LMEI can later put the memory utilization threshold as a user defined parameter
# I/O/W example: [0,0,1] I is stored in memory 0, O is stored in memory 0, W is stored in memory 1
# leave last empty
# memory_partitions = [[0,1, 2],[0,0,1],[0,0,None]] #if 3 level do not contain weights [0, 0, None]
indices = [
index
for index, partition in enumerate(memory_partitions[level])
if partition == i
]
size = sum([blocks[j] for j in indices])
if size == 0:
continue
if (size > cap[i]) == True:
check_if_underutilized = 0
# print level, i, invalid_underutilized, memory_partitions[level+1][i], size, cap[i]
last_layer = []
for mem in indices:
if (
(size <= cap[i]) and (2 * size <= cap[i])
) == True: # if double the size fit then there will be a better to block partition that will utilized all memory,
# print "NO level: ", level,"blocks: ", blocks, "size: ", size, "cap: ", cap, "indices: ", indices, "last_layer", last_layer
check_if_underutilized += 1
else:
test = 1
else:
# print "OK level: ", level,"blocks: ", blocks, "size: ", size, "cap: ", cap, "indices: ", indices, "last_layer", last_layer
test = 2
if check_if_underutilized == len(cap):
return False
return True
else:
total_size = sum(blocks)
# for size,contain in zip(blocks, contains):
# if contain:
# total_size += size
# total_capacity = 0
# for size,contain in zip(cap, contains):
# if contain:
# total_capacity += size
# total_size = sum(blocks)
if invalid_underutilized:
def valid_partition_number(resource, partitioning, level):
max_parallelism = resource.parallelism(level).count
actual_parallelism = reduce(mul, partitioning[level], 1)
return actual_parallelism <= max_parallelism
def valid_partitioning_current_level(resource, point, layer, level, verbose=False):
valid_size = fit_in_level(
resource.buffer(level).capacity,
get_bank_size(point, layer, level),
resource.invalid_underutilized,
level,
resource.memory_partitions,
)
def valid_mapping_point_current_level(resource, point, layer, level, verbose=False):
if resource.paras[level].count > 1:
valid_size = fit_in_level(
resource.buffer(level).capacity,
get_bank_size(point, layer, level),
resource.invalid_underutilized,
level,
resource.memory_partitions,
)
else:
valid_size = fit_in_level(
resource.buffer(level).capacity,
get_block_size(point, layer, level),
resource.invalid_underutilized,
level,
resource.memory_partitions,
)
partitioning = list(zip(*(point.loop_partitionings)))
valid_para = valid_partition_number(resource, partitioning, level)
if verbose == 3:
print("Level ", level, ": Partitioned block size fit in bank: ", valid_size)
print("Level ", level, ": Partition number is valid: ", valid_para)
return valid_size and valid_para
def valid_partitioning(resource, point, layer, verbose=False):
para_level = resource.para_index
for level in para_level:
if not valid_partitioning_current_level(resource, point, layer, level, verbose):
return False
return True
def valid_blocking_size_current_level(resource, point, layer, level, verbose=False):
"""
Check if the blocking size of the current level fits in memory.
"""
if type(resource.buffer(level).capacity) is list:
capacity = copy.deepcopy(resource.buffer(level).capacity)
for i in range(len(capacity)):
capacity[i] = capacity[i] * resource.paras[level].count
return fit_in_level(
capacity,
get_block_size(point, layer, level),
(resource.invalid_underutilized and (level not in resource.para_index)),
level,
resource.memory_partitions,
)
return fit_in_level(
resource.buffer(level).capacity * resource.paras[level].count,
get_block_size(point, layer, level),
(resource.invalid_underutilized and (level not in resource.para_index)),
level,
resource.memory_partitions,
)
# get_block_size(point, layer, level), (level > min(resource.para_index)))
def valid_blocking_size(resource, point, layer, verbose=False):
for level in range(resource.buffer_levels()):
if not valid_blocking_size_current_level(
resource, point, layer, level, verbose
):
return False
return True
def valid_mapping_point(resource, point, layer, verbose=False):
for i in range(resource.buffer_levels()):
if not valid_mapping_point_current_level(resource, point, layer, i, verbose):
return False
return True
def get_total_access_cost(resource, array_cost):
total_access_cost = copy.deepcopy(resource.access_cost)
if not resource.array_access_cost:
return total_access_cost
para_index = [i for i, e in enumerate(resource.paras) if e.access_mode != 0]
addition_levels = len(para_index)
delta = 1
for i in range(addition_levels):
index = para_index[i]
delta += 1
return total_access_cost
def get_array_level_cost(
resource, point, layer_size, level, next_level_access, verbose=False
):
"""
Given next_level_access (above-level memory access)
calculate the current level (paralleled level) inter-PE data access
thus calculate the current level (paralleled level) inter-PE communication energy
i.e. the energy spent on interconnection
Specific to Systolic Array template.
level_access: [[close access for I/O/W],[far access on one dimension for I/O/W],[far access on another dimension]]
close access means data are passing from one PE to its neighbour PE
Far access means data need to jump from one PE to PEs far away from it.
Far jump happens because of dataflow spatial replication (e.g. 2D array -> kinds of 3D array)
# TODO add support for other access_mode # don't get it
# LMEI to distinguish O (partial sum) in buffer_access from A and W
assert resource.paras[level].count and resource.paras[level].access_mode
level_access, level_cost = get_array_access_and_cost(
level, resource.paras[level], next_level_access, point
)
total_cost = 0
for i in range(len(level_access)):
total_cost += level_access[i] * level_cost[i]
if verbose >= 3:
print("Level ", level, " array level access: ", level_access)
return total_cost
def get_array_and_curr_level_cost(resource, point, layer, level, verbose=False):
Get the energy from current level of memory access + inter-PE access
# LMEI to distinguish O (partial sum) in buffer_access from A and W
layer_size = get_layer_size(layer)
mac_capacity = resource.mac_capacity
level_access = [
get_if_access(level, point, layer, mac_capacity),
get_of_access(level, point, layer, mac_capacity),
get_fl_access(level, point, layer, mac_capacity),
]
[if_access, of_access, fl_access] = level_access
buffer_level_access = [if_access, of_access, fl_access]
# level_cost = sum(total_buffer_access) * resource.access_cost[level]
level_cost = 0
for i in range(len(buffer_level_access)):
index = resource.memory_partitions[level][i]
if index is not None:
level_cost += buffer_level_access[i] * resource.access_cost[level][index]
# operand_costs = [access_cost * num_accesses for access_cost,num_accesses in zip(total_buffer_access,resource.access_cost[level]) ]
# level_cost = sum(operand_costs)
if verbose >= 3:
print("Level ", level, " access: ", buffer_level_access)
# level_cost += get_array_level_cost(
# resource, point, layer_size, level - 1, level_access, verbose
# )
return level_cost
def get_level_cost(resource, point, layer, level, verbose=False):
Get the energy from current level of memory access
#LMEI to distinguish O (partial sum) in buffer_access from A and W
layer_size = get_layer_size(layer)
mac_capacity = resource.mac_capacity
if_accesses = get_if_access(resource, point, layer, mac_capacity)
of_accesses = get_of_access(resource, point, layer, mac_capacity)
fl_accesses = get_fl_access(resource, point, layer, mac_capacity)
buffer_access = list(zip(if_accesses, of_accesses, fl_accesses))
# Inputs, weights, and outputs may have different costs
# level_cost = sum(buffer_access) * resource.access_cost[level]
level_cost = 0
for i in range(3):
memory_partition = resource.memory_partitions[level][i]
level_cost += (
buffer_access[level][i] * resource.access_cost[level][memory_partition]
)
if verbose >= 3:
print("Level", level, " access: ", level_access)
return level_cost
def get_total_access(resource, point, layer, verbose=False):
layer_size = get_layer_size(layer)
access_list, array_cost = get_access(point, layer, resource)
if verbose >= 3:
print("access breakdown: ", access_list)
total_level_access = []
for i in range(len(access_list)):
if not isinstance(access_list[i][0], list):
buffer_access = list(map(mul, access_list[i], layer_size))
total_level_access.append(sum(buffer_access))
for j in range(len(access_list[i])):
buffer_access = list(map(mul, access_list[i][j], layer_size))
total_level_access.append(sum(buffer_access))
return total_level_access
def get_level_costs(resource, point, layer, verbose=False):
num_levels = resource.buffer_levels()
level_energy = []
for level in range(num_levels):
level_energy.append(get_level_cost(resource, point, layer, level))
para_index = [i for i, e in enumerate(resource.paras) if e.access_mode != 0]
delta = 1
for index in para_index:
array_energy = (
get_array_and_curr_level_cost(resource, point, layer, index + 1)
- level_energy[index + delta]
)
level_energy.insert(index + delta, array_energy)
delta += 1
return level_energy
def get_block_cost(resource, point, layer, verbose=False):
Get the cost of the given mapping point on given resource.
If the point is not feasible on the resource, return inf.
num_levels = resource.buffer_levels()
access_list, array_cost = get_access(point, layer, resource)
layer_size = get_layer_size(layer)
total_access_cost = get_total_access_cost(resource, array_cost)
assert len(total_access_cost) == len(access_list)
block_costs = [0.0, 0.0, 0.0]
for i in range(len(total_access_cost)):
buffer_access = [a * b for a, b in list(zip(access_list[i], layer_size))]
block_cost = [x * total_access_cost[i] for x in buffer_access]
block_costs = list(map(add, block_cost, block_costs))
if verbose:
bank_size_list, block_size_list = get_block_sizes(num_levels, point, layer)
print("bank_size_list: ", bank_size_list)
print("block_size_list: ", block_size_list)
print("layer_size: ", layer_size)
print("block costs: ", block_costs)
def get_cost(resource, point, layer, verbose=False):
Get the cost of the given mapping point on given resource.
If the point is not feasible on the resource, return inf.
"""
# TODO include static energy
# TODO support other access_mode
num_levels = resource.buffer_levels()
assert len(point.loop_blockings[0]) == num_levels, (
"number of blockings does not match with number of memory "
"levels: %d" % num_levels
)
access_list, array_cost = get_access(point, layer, resource)
total_access_cost = get_total_access_cost(resource, array_cost)
assert len(total_access_cost) == len(access_list)
total_cost = 0.0
for i in range(len(total_access_cost)):
if not isinstance(access_list[i][0], list):
total_cost += sum(
[access * total_access_cost[i][0] for access in access_list[i]]
)
for j in range(len(access_list[i])):
total_cost += access_list[i][j] * total_access_cost[i][j]
# print("total_access_cost", total_access_cost)
# print("access_list", access_list)
idx_adjust = 0
if len(total_access_cost) > 4:
idx_adjust = 1
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
layer_access_cost = (
total_access_cost[: 1 + idx_adjust] + total_access_cost[2 + idx_adjust :]
)
print(
"16b_Access_Energy_[RegisterFile(s),Buffer,DRAM]_(pJ): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
[item[0] for item in layer_access_cost],
[item[1] for item in layer_access_cost],
[item[2] for item in layer_access_cost],
)
)
print(
"PE_Access_Cost_(pJ): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
total_access_cost[1 + idx_adjust][0],
total_access_cost[1 + idx_adjust][1],
total_access_cost[1 + idx_adjust][2],
)
)
layer_num_access = access_list[: 1 + idx_adjust] + access_list[2 + idx_adjust :]
print(
"Tiles_Accessed_from_[RegisterFile(s),Buffer,DRAM]_in_Layer: \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
[item[0] for item in layer_num_access],
[item[1] for item in layer_num_access],
[item[2] for item in layer_num_access],
)
)
print(
"Tiles_Accessed_from_[RegisterFile(s),Buffer,DRAM]_PEs_in_Layer: \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
access_list[1 + idx_adjust][0],
access_list[1 + idx_adjust][1],
access_list[1 + idx_adjust][2],
)
)
bank_size_list, block_size_list = get_block_sizes(num_levels, point, layer)
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
# print("bank_size_list", bank_size_list)
# print("block_size_list", block_size_list)
print(
"Memory_Bank_Size_List_When_Parallelized/Unrolled_[RegisterFile(s),Buffer,DRAM]_(bytes): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
[item[0] for item in bank_size_list],
[item[1] for item in bank_size_list],
[item[2] for item in bank_size_list],
)
)
print(
"Memory_Block_Size_List_When_NOT_Parallelized/Unrolled_[RegisterFile(s),Buffer,DRAM]_(bytes): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
[item[0] for item in block_size_list],
[item[1] for item in block_size_list],
[item[2] for item in block_size_list],
)
)
print(
"Layer_Size_(number_of_pixels): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
layer_size[0], layer_size[1], layer_size[2]
)
)
# print('total cost: ', total_cost)
# return total_cost
return total_cost, total_access_cost, access_list