Skip to content
Snippets Groups Projects
cost_model.py 51.1 KiB
Newer Older
    block_list = []
    for level in range(num_levels):
        block_list.append(get_block_size(point, layer, level))
        bank_list.append(get_bank_size(point, layer, level))

    return [bank_list, block_list]

sgauthamr2001's avatar
sgauthamr2001 committed

def fit_in_level(cap, blocks, invalid_underutilized, level, memory_partitions):
    """
    Check if the current level mem size >= current level loop blocking size
    invalid_underutilized is used to exclude mapping points with too low memory utilization (< 50%)
    #LMEI can later put the memory utilization threshold as a user defined parameter
sgauthamr2001's avatar
sgauthamr2001 committed
    """
    if type(cap) is list:
sgauthamr2001's avatar
sgauthamr2001 committed
        # I/O/W example: [0,0,1] I is stored in memory 0,  O is stored in memory 0,  W is stored in memory 1
        # leave last empty

        # memory_partitions = [[0,1, 2],[0,0,1],[0,0,None]] #if 3 level do not contain weights [0, 0, None]
sgauthamr2001's avatar
sgauthamr2001 committed
        # capacity =  [[2,2], [30000,30000], [1000000,1000000]]
        for i in range(len(cap)):
sgauthamr2001's avatar
sgauthamr2001 committed
            indices = [
                index
                for index, partition in enumerate(memory_partitions[level])
                if partition == i
            ]
            size = sum([blocks[j] for j in indices])
            if size == 0:
                continue
            if (size > cap[i]) == True:
sgauthamr2001's avatar
sgauthamr2001 committed
                return False  # it does not fit

            check_if_underutilized = 0

sgauthamr2001's avatar
sgauthamr2001 committed
            # print level, i, invalid_underutilized, memory_partitions[level+1][i], size, cap[i]
            if invalid_underutilized:
sgauthamr2001's avatar
sgauthamr2001 committed

                last_layer = []
                for mem in indices:
sgauthamr2001's avatar
sgauthamr2001 committed
                    last_layer.append(memory_partitions[level + 1][mem])
                if None not in last_layer:
sgauthamr2001's avatar
sgauthamr2001 committed
                    if (
                        (size <= cap[i]) and (2 * size <= cap[i])
                    ) == True:  # if double the size fit then there will be a better to block partition that will utilized all memory,
                        # print "NO level: ", level,"blocks: ",  blocks, "size: ", size, "cap: ", cap, "indices: ", indices, "last_layer", last_layer
                        check_if_underutilized += 1
sgauthamr2001's avatar
sgauthamr2001 committed
                    # print "OK level: ", level,"blocks: ",  blocks, "size: ", size, "cap: ", cap, "indices: ", indices, "last_layer", last_layer
                    test = 2

            if check_if_underutilized == len(cap):
                return False

        return True

    else:
        total_size = sum(blocks)
        # for size,contain in zip(blocks, contains):
        #     if contain:
        #         total_size += size

        # total_capacity = 0
        # for size,contain in zip(cap, contains):
        #     if contain:
        #         total_capacity += size

        # total_size = sum(blocks)
        if invalid_underutilized:
sgauthamr2001's avatar
sgauthamr2001 committed
            return (total_size <= cap) and (2 * total_size >= cap)
sgauthamr2001's avatar
sgauthamr2001 committed
            return total_size <= cap


def valid_partition_number(resource, partitioning, level):
    max_parallelism = resource.parallelism(level).count
    actual_parallelism = reduce(mul, partitioning[level], 1)
    return actual_parallelism <= max_parallelism

sgauthamr2001's avatar
sgauthamr2001 committed

def valid_partitioning_current_level(resource, point, layer, level, verbose=False):
sgauthamr2001's avatar
sgauthamr2001 committed
    valid_size = fit_in_level(
        resource.buffer(level).capacity,
        get_bank_size(point, layer, level),
        resource.invalid_underutilized,
        level,
        resource.memory_partitions,
    )
sgauthamr2001's avatar
sgauthamr2001 committed

def valid_mapping_point_current_level(resource, point, layer, level, verbose=False):
    if resource.paras[level].count > 1:
sgauthamr2001's avatar
sgauthamr2001 committed
        valid_size = fit_in_level(
            resource.buffer(level).capacity,
            get_bank_size(point, layer, level),
            resource.invalid_underutilized,
            level,
            resource.memory_partitions,
        )
    else:
        valid_size = fit_in_level(
            resource.buffer(level).capacity,
            get_block_size(point, layer, level),
            resource.invalid_underutilized,
            level,
            resource.memory_partitions,
        )

    partitioning = list(zip(*(point.loop_partitionings)))
    valid_para = valid_partition_number(resource, partitioning, level)

    if verbose == 3:
        print("Level ", level, ": Partitioned block size fit in bank: ", valid_size)
        print("Level ", level, ": Partition number is valid: ", valid_para)

    return valid_size and valid_para

sgauthamr2001's avatar
sgauthamr2001 committed

def valid_partitioning(resource, point, layer, verbose=False):
    para_level = resource.para_index
    for level in para_level:
        if not valid_partitioning_current_level(resource, point, layer, level, verbose):
            return False
    return True

sgauthamr2001's avatar
sgauthamr2001 committed

def valid_blocking_size_current_level(resource, point, layer, level, verbose=False):
sgauthamr2001's avatar
sgauthamr2001 committed
    """
    Check if the blocking size of the current level fits in memory.
    """
sgauthamr2001's avatar
sgauthamr2001 committed
    if level == resource.buffer_levels() - 1:
sgauthamr2001's avatar
sgauthamr2001 committed

    if type(resource.buffer(level).capacity) is list:
        capacity = copy.deepcopy(resource.buffer(level).capacity)
        for i in range(len(capacity)):
sgauthamr2001's avatar
sgauthamr2001 committed
            capacity[i] = capacity[i] * resource.paras[level].count
        return fit_in_level(
            capacity,
            get_block_size(point, layer, level),
            (resource.invalid_underutilized and (level not in resource.para_index)),
            level,
            resource.memory_partitions,
        )
sgauthamr2001's avatar
sgauthamr2001 committed
        return fit_in_level(
            resource.buffer(level).capacity * resource.paras[level].count,
            get_block_size(point, layer, level),
            (resource.invalid_underutilized and (level not in resource.para_index)),
            level,
            resource.memory_partitions,
        )
sgauthamr2001's avatar
sgauthamr2001 committed
        # get_block_size(point, layer, level), (level > min(resource.para_index)))


def valid_blocking_size(resource, point, layer, verbose=False):
    for level in range(resource.buffer_levels()):
sgauthamr2001's avatar
sgauthamr2001 committed
        if not valid_blocking_size_current_level(
            resource, point, layer, level, verbose
        ):
            return False
    return True


def valid_mapping_point(resource, point, layer, verbose=False):
    for i in range(resource.buffer_levels()):
        if not valid_mapping_point_current_level(resource, point, layer, i, verbose):
            return False
    return True

sgauthamr2001's avatar
sgauthamr2001 committed

def get_total_access_cost(resource, array_cost):
    total_access_cost = copy.deepcopy(resource.access_cost)

    if not resource.array_access_cost:
        return total_access_cost

    para_index = [i for i, e in enumerate(resource.paras) if e.access_mode != 0]
    addition_levels = len(para_index)

    delta = 1
    for i in range(addition_levels):
        index = para_index[i]
sgauthamr2001's avatar
sgauthamr2001 committed
        total_access_cost.insert(index + delta, array_cost[i])
        delta += 1
    return total_access_cost

sgauthamr2001's avatar
sgauthamr2001 committed

def get_array_level_cost(
    resource, point, layer_size, level, next_level_access, verbose=False
):
    """
    Given next_level_access (above-level memory access)
    calculate the current level (paralleled level) inter-PE data access
    thus calculate the current level (paralleled level) inter-PE communication energy
    i.e. the energy spent on interconnection

    Specific to Systolic Array template.

    level_access: [[close access for I/O/W],[far access on one dimension for I/O/W],[far access on another dimension]]
    close access means data are passing from one PE to its neighbour PE
    Far access means data need to jump from one PE to PEs far away from it.
    Far jump happens because of dataflow spatial replication (e.g. 2D array -> kinds of 3D array)
sgauthamr2001's avatar
sgauthamr2001 committed
    """

    # TODO add support for other access_mode # don't get it
    # LMEI to distinguish O (partial sum) in buffer_access from A and W

    assert resource.paras[level].count and resource.paras[level].access_mode

sgauthamr2001's avatar
sgauthamr2001 committed
    level_access, level_cost = get_array_access_and_cost(
        level, resource.paras[level], next_level_access, point
    )

    total_cost = 0
    for i in range(len(level_access)):
        total_cost += level_access[i] * level_cost[i]

    if verbose >= 3:
        print("Level ", level, " array level access: ", level_access)

    return total_cost


def get_array_and_curr_level_cost(resource, point, layer, level, verbose=False):
sgauthamr2001's avatar
sgauthamr2001 committed
    """
    Get the energy from current level of memory access + inter-PE access
sgauthamr2001's avatar
sgauthamr2001 committed
    """

    # LMEI to distinguish O (partial sum) in buffer_access from A and W

    layer_size = get_layer_size(layer)
    mac_capacity = resource.mac_capacity

sgauthamr2001's avatar
sgauthamr2001 committed
    level_access = [
        get_if_access(level, point, layer, mac_capacity),
        get_of_access(level, point, layer, mac_capacity),
        get_fl_access(level, point, layer, mac_capacity),
    ]

    [if_access, of_access, fl_access] = level_access

    buffer_level_access = [if_access, of_access, fl_access]
    # level_cost = sum(total_buffer_access) * resource.access_cost[level]
    level_cost = 0
    for i in range(len(buffer_level_access)):
        index = resource.memory_partitions[level][i]
        if index is not None:
            level_cost += buffer_level_access[i] * resource.access_cost[level][index]
    # operand_costs = [access_cost * num_accesses for access_cost,num_accesses in zip(total_buffer_access,resource.access_cost[level]) ]
    # level_cost = sum(operand_costs)

    if verbose >= 3:
        print("Level ", level, " access: ", buffer_level_access)

    # level_cost += get_array_level_cost(
    #     resource, point, layer_size, level - 1, level_access, verbose
    # )

    return level_cost


def get_level_cost(resource, point, layer, level, verbose=False):
sgauthamr2001's avatar
sgauthamr2001 committed
    """
    Get the energy from current level of memory access

    #LMEI to distinguish O (partial sum) in buffer_access from A and W
sgauthamr2001's avatar
sgauthamr2001 committed
    """

    layer_size = get_layer_size(layer)
    mac_capacity = resource.mac_capacity

    if_accesses = get_if_access(resource, point, layer, mac_capacity)
    of_accesses = get_of_access(resource, point, layer, mac_capacity)
    fl_accesses = get_fl_access(resource, point, layer, mac_capacity)

    buffer_access = list(zip(if_accesses, of_accesses, fl_accesses))
sgauthamr2001's avatar
sgauthamr2001 committed

    # Inputs, weights, and outputs may have different costs
    # level_cost = sum(buffer_access) * resource.access_cost[level]
    level_cost = 0
    for i in range(3):
        memory_partition = resource.memory_partitions[level][i]
        level_cost += (
            buffer_access[level][i] * resource.access_cost[level][memory_partition]
        )

    if verbose >= 3:
        print("Level", level, " access: ", level_access)
    return level_cost


def get_total_access(resource, point, layer, verbose=False):
    layer_size = get_layer_size(layer)

sgauthamr2001's avatar
sgauthamr2001 committed
    access_list, array_cost = get_access(point, layer, resource)

    if verbose >= 3:
        print("access breakdown: ", access_list)

    total_level_access = []
    for i in range(len(access_list)):
sgauthamr2001's avatar
sgauthamr2001 committed
        """List of total access of each buffer at level i"""
        if not isinstance(access_list[i][0], list):
            buffer_access = list(map(mul, access_list[i], layer_size))
            total_level_access.append(sum(buffer_access))
sgauthamr2001's avatar
sgauthamr2001 committed
        else:
            for j in range(len(access_list[i])):
                buffer_access = list(map(mul, access_list[i][j], layer_size))
                total_level_access.append(sum(buffer_access))

    return total_level_access


def get_level_costs(resource, point, layer, verbose=False):
    num_levels = resource.buffer_levels()

    level_energy = []
    for level in range(num_levels):
        level_energy.append(get_level_cost(resource, point, layer, level))

    para_index = [i for i, e in enumerate(resource.paras) if e.access_mode != 0]

    delta = 1
    for index in para_index:
sgauthamr2001's avatar
sgauthamr2001 committed
        array_energy = (
            get_array_and_curr_level_cost(resource, point, layer, index + 1)
            - level_energy[index + delta]
        )
        level_energy.insert(index + delta, array_energy)
        delta += 1

    return level_energy

sgauthamr2001's avatar
sgauthamr2001 committed

# FIXME
def get_block_cost(resource, point, layer, verbose=False):
sgauthamr2001's avatar
sgauthamr2001 committed
    """
    Get the cost of the given mapping point on given resource.

    If the point is not feasible on the resource, return inf.
sgauthamr2001's avatar
sgauthamr2001 committed
    """
    # TODO include static energy
    num_levels = resource.buffer_levels()

sgauthamr2001's avatar
sgauthamr2001 committed
    access_list, array_cost = get_access(point, layer, resource)
    layer_size = get_layer_size(layer)

    total_access_cost = get_total_access_cost(resource, array_cost)
    assert len(total_access_cost) == len(access_list)

    block_costs = [0.0, 0.0, 0.0]
    for i in range(len(total_access_cost)):
sgauthamr2001's avatar
sgauthamr2001 committed
        buffer_access = [a * b for a, b in list(zip(access_list[i], layer_size))]
        block_cost = [x * total_access_cost[i] for x in buffer_access]
        block_costs = list(map(add, block_cost, block_costs))

    if verbose:
sgauthamr2001's avatar
sgauthamr2001 committed
        print("access_list: ", access_list)
        bank_size_list, block_size_list = get_block_sizes(num_levels, point, layer)
sgauthamr2001's avatar
sgauthamr2001 committed
        print("bank_size_list: ", bank_size_list)
        print("block_size_list: ", block_size_list)
        print("layer_size: ", layer_size)
        print("block costs: ", block_costs)
sgauthamr2001's avatar
sgauthamr2001 committed

def get_cost(resource, point, layer, verbose=False):
sgauthamr2001's avatar
sgauthamr2001 committed
    """
    Get the cost of the given mapping point on given resource.

    If the point is not feasible on the resource, return inf.
sgauthamr2001's avatar
sgauthamr2001 committed
    """
    # TODO include static energy
    # TODO support other access_mode
    num_levels = resource.buffer_levels()
sgauthamr2001's avatar
sgauthamr2001 committed
    assert len(point.loop_blockings[0]) == num_levels, (
        "number of blockings does not match with number of memory "
        "levels: %d" % num_levels
    )
sgauthamr2001's avatar
sgauthamr2001 committed
    access_list, array_cost = get_access(point, layer, resource)

    total_access_cost = get_total_access_cost(resource, array_cost)
    assert len(total_access_cost) == len(access_list)

    total_cost = 0.0
    for i in range(len(total_access_cost)):
sgauthamr2001's avatar
sgauthamr2001 committed
        """List of total access of each buffer at level i"""
        if not isinstance(access_list[i][0], list):
            total_cost += sum(
                [access * total_access_cost[i][0] for access in access_list[i]]
            )
sgauthamr2001's avatar
sgauthamr2001 committed
        else:
            for j in range(len(access_list[i])):
                total_cost += access_list[i][j] * total_access_cost[i][j]
sgauthamr2001's avatar
sgauthamr2001 committed
        # print("total_access_cost", total_access_cost)
        # print("access_list", access_list)
sgauthamr2001's avatar
sgauthamr2001 committed
        # print("layer_size",layer_size)

        idx_adjust = 0
        if len(total_access_cost) > 4:
            idx_adjust = 1
sgauthamr2001's avatar
sgauthamr2001 committed

        layer_access_cost = (
            total_access_cost[: 1 + idx_adjust] + total_access_cost[2 + idx_adjust :]
        )
        print(
            "16b_Access_Energy_[RegisterFile(s),Buffer,DRAM]_(pJ): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
                [item[0] for item in layer_access_cost],
                [item[1] for item in layer_access_cost],
                [item[2] for item in layer_access_cost],
            )
        )
        print(
            "PE_Access_Cost_(pJ): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
                total_access_cost[1 + idx_adjust][0],
                total_access_cost[1 + idx_adjust][1],
                total_access_cost[1 + idx_adjust][2],
            )
        )

        layer_num_access = access_list[: 1 + idx_adjust] + access_list[2 + idx_adjust :]
        print(
            "Tiles_Accessed_from_[RegisterFile(s),Buffer,DRAM]_in_Layer: \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
                [item[0] for item in layer_num_access],
                [item[1] for item in layer_num_access],
                [item[2] for item in layer_num_access],
            )
        )
        print(
            "Tiles_Accessed_from_[RegisterFile(s),Buffer,DRAM]_PEs_in_Layer: \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
                access_list[1 + idx_adjust][0],
                access_list[1 + idx_adjust][1],
                access_list[1 + idx_adjust][2],
            )
        )

        bank_size_list, block_size_list = get_block_sizes(num_levels, point, layer)
sgauthamr2001's avatar
sgauthamr2001 committed

        # print("bank_size_list", bank_size_list)
        # print("block_size_list", block_size_list)

        print(
            "Memory_Bank_Size_List_When_Parallelized/Unrolled_[RegisterFile(s),Buffer,DRAM]_(bytes): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
                [item[0] for item in bank_size_list],
                [item[1] for item in bank_size_list],
                [item[2] for item in bank_size_list],
            )
        )
        print(
            "Memory_Block_Size_List_When_NOT_Parallelized/Unrolled_[RegisterFile(s),Buffer,DRAM]_(bytes): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
                [item[0] for item in block_size_list],
                [item[1] for item in block_size_list],
                [item[2] for item in block_size_list],
            )
        )
        print(
            "Layer_Size_(number_of_pixels): \n\tifmap: {}\n\tofmap: {}\n\tfilter: {}".format(
                layer_size[0], layer_size[1], layer_size[2]
            )
        )
        # print('total cost: ', total_cost)

    # return total_cost
    return total_cost, total_access_cost, access_list