Skip to content
Snippets Groups Projects
resource.py 6.75 KiB
Newer Older
xuanyoya's avatar
xuanyoya committed
'''
Hardware resource types.
'''
#import numpy as np
from collections import namedtuple
from operator import mul
import math
from functools import reduce
xuanyoya's avatar
xuanyoya committed

class Buffer(namedtuple('Buffer',
                        ['capacity', 'access_cost', 'unit_static_cost'])):
    '''
    Buffer specification.

    Immutable type.

    Buffer attributes include capacity, access cost, unit static cost.

    Capacity is for a single buffer (If current level has parallelism,
xuanyoya's avatar
xuanyoya committed
    then it is the capacity of the buffer bank inside each parallel 
    units); access cost is the cost per access; 
    unit static cost is the static cost per time unit.
    '''
    pass

class Parallelism(namedtuple('Parallelism',
                             ['count', 'access_mode', 'array_access_cost', 'array_dim', 'array_width'])):
    '''
    Parallelism specification.

    Immutable type.

    Parallelism attributes include count and access_mode.

    Count is the number of parallel units. 

    Access mode is the mode of access non-private data, 
    for example, whether access neighborhood PE, or
xuanyoya's avatar
xuanyoya committed
    goes to next level buffer.

    Array access cost is the cost of accessing array level buffers.

    Array dimension is the dimension of PE array, whether it is 1D or 2D.

    Array width is the width of PE array, if 1D array, same as array dimension. 
    if 2D array, sqrt(array_dim)
    
    Note: shared buffer level is the level
    index of the lowest shared buffer for this parallelism.
    '''
    pass

class Resource(object):
    '''
    Hardware resource specification.
    Hardware resource includes buffer hierarchy and parallel processing units.

    buf_capacity_list:         [1st level buffer size, 2nd level ...] (UNIT: Byte)
    buf_access_cost_list:      [1st level mem per access cost, 2nd level ...] (UNIT: pJ)
    buf_unit_static_cost_list: [1st level mem static cost per time unit, 2nd level ...] (UNIT: pJ)
    para_count_list:           [1st level number of parallel units, 2nd level ...]
    mac_capacity:              [0, 1], determines whether MAC can buffer 1 output. (UNIT: Element)
    partition_mode:            (aka 'parallel mode' outside the class) determines hardware parallel template
                               ['0' for no parallelism, only hierarchical memory fetch,
                                '1' neighbour for parallel unit fetch,
                                '2' for broadcast.]
    array_access_cost:         (aka 'parallel cost' outside the class)
                               per access cost of fetching data from neighborhood PE
    array_dim:                 array dimension (right now support 1D & square-shape 2D)
    utilization_threshold:     # of utilized unit / # of total units @ paralleled level
    replication:               [True, False], whether allows another loop dimension (3rd) to be spatially unrolled
xuanyoya's avatar
xuanyoya committed
    '''

    def __init__(self, buf_capacity_list, buf_access_cost_list,
                 buf_unit_static_cost_list, para_count_list,  
                 mac_capacity=1, partition_mode=None, array_access_cost=None, 
                 array_dim = None, utilization_threshold = 0, replication=True,memory_partitions=[[0,0,0],[0,0,0],[0,0,0]],invalid_underutilized=True):
xuanyoya's avatar
xuanyoya committed

        # Buffers.
        assert len(buf_capacity_list) == len(buf_access_cost_list)
        assert len(buf_capacity_list) == len(buf_unit_static_cost_list)
        assert len(buf_capacity_list) == len(para_count_list)
        
        self.bufs = [Buffer(*t) for t in list(zip(buf_capacity_list, \
            buf_access_cost_list, buf_unit_static_cost_list))]
xuanyoya's avatar
xuanyoya committed

        self.num_levels = len(self.bufs)
xuanyoya's avatar
xuanyoya committed
        # Parallelism.
        array_access_costs = [None] * len(para_count_list)
        if not partition_mode :
            partition_mode = [0] * len(para_count_list)
        else :
            array_level = 0
            for i in range(self.num_levels):
xuanyoya's avatar
xuanyoya committed
                # when using non-default partition mode, the parallelism
                # count needs to be large than 1
                assert partition_mode[i] == 0 or para_count_list[i] <= 1 \
                       or (partition_mode[i] > 0 and para_count_list[i] > 1)
xuanyoya's avatar
xuanyoya committed
                if partition_mode[i] == 1 or partition_mode[i] == 2:
                    array_access_costs[i] = array_access_cost[array_level]
                    array_level += 1

        # "para_index" indicates which level do we have parallelism in
xuanyoya's avatar
xuanyoya committed
        self.para_index = [i for i, e in enumerate(para_count_list) if e != 1]

        # 2D array is default setting for paralleled level
        # Define 1D array in arch file manually if needed, e.g. "array_dim": [1, 1, 1] ([@ mem level 1, 2, 3])
xuanyoya's avatar
xuanyoya committed
        if not array_dim:
            array_dim = [2 if e != 1 else 1 for e in para_count_list]

        # LMEI always assume square-shape array, could change later
        array_width = [para_count_list[i] if array_dim[i] == 1 else int(math.sqrt(para_count_list[i])) for i in range(self.num_levels)]
xuanyoya's avatar
xuanyoya committed
 
        self.paras = [Parallelism(*t) for t in list(zip(para_count_list, \
            partition_mode, array_access_costs, array_dim, array_width))]
xuanyoya's avatar
xuanyoya committed
        self.access_cost = buf_access_cost_list
        # If list does not contain 3 separate access costs for (inputs, weights, psum)
        # assume they all have the same cost
        if type(buf_access_cost_list[0]) is not list:
            self.access_cost = [ [x]*3 for x in buf_access_cost_list ]
xuanyoya's avatar
xuanyoya committed
        self.mac_capacity = mac_capacity
        self.array_access_cost = array_access_cost
        self.para_count_list = para_count_list
        self.utilization_threshold = utilization_threshold
        self.memory_partitions = memory_partitions 
        self.memory_partitions.append([None]*3)#do not check for invalid_underutilized at last memory level
xuanyoya's avatar
xuanyoya committed
        self.replication = replication
        self.invalid_underutilized = invalid_underutilized
        

xuanyoya's avatar
xuanyoya committed

    @classmethod
    def arch(cls, info):
        return cls(info["capacity"], info["access_cost"], info["static_cost"],
                        info["parallel_count"], info["mac_capacity"], info["parallel_mode"],
                        info["parallel_cost"], info["array_dim"], info["utilization_threshold"], info["replication"],info["memory_partitions"], info['invalid_underutilized'])  
xuanyoya's avatar
xuanyoya committed

    def buffer_levels(self):
        '''
        Return total levels of buffers in the hierarchy.
        '''
        return self.num_levels

    def buffer(self, level):
        '''
        Return the specification of the buffer of the given level.
        '''
        return self.bufs[level]


    def parallelism(self, level):
        '''
        Return the specification of the parallelism of the given level.
        '''
        return self.paras[level]

    def total_parallelism(self):
        '''
        Return the specification of the total parallelism.
        '''
        return reduce(mul, self.para_count_list, 1)