/***************************************************************************** * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ #include #include #include "area.h" #include "bank.h" #include "basic_circuit.h" #include "component.h" #include "const.h" #include "decoder.h" #include "parameter.h" #include "Ucache.h" #include "subarray.h" #include "uca.h" #include #include #include #include using namespace std; const uint32_t nthreads = NTHREADS; void min_values_t::update_min_values(const min_values_t * val) { min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; min_area = (min_area > val->min_area) ? val->min_area : min_area; min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; } void min_values_t::update_min_values(const uca_org_t & res) { min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage; min_area = (min_area > res.area) ? res.area : min_area; min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; } void min_values_t::update_min_values(const nuca_org_t * res) { min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn; min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage; min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area; min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; } void min_values_t::update_min_values(const mem_array * res) { min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn; min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage; min_area = (min_area > res->area) ? res->area : min_area; min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; } void * calc_time_mt_wrapper(void * void_obj) { calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj; uint32_t tid = calc_obj->tid; list & data_arr = calc_obj->data_arr; list & tag_arr = calc_obj->tag_arr; bool is_tag = calc_obj->is_tag; bool pure_ram = calc_obj->pure_ram; bool pure_cam = calc_obj->pure_cam; bool is_main_mem = calc_obj->is_main_mem; double Nspd_min = calc_obj->Nspd_min; min_values_t * data_res = calc_obj->data_res; min_values_t * tag_res = calc_obj->tag_res; data_arr.clear(); data_arr.push_back(new mem_array); tag_arr.clear(); tag_arr.push_back(new mem_array); uint32_t Ndwl_niter = _log2(MAXDATAN) + 1; uint32_t Ndbl_niter = _log2(MAXDATAN) + 1; uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1; uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; bool is_valid_partition; int wt_min, wt_max; if (g_ip->force_wiretype) { if (g_ip->wt == 0) { wt_min = Low_swing; wt_max = Low_swing; } else { wt_min = Global; wt_max = Low_swing-1; } } else { wt_min = Global; wt_max = Low_swing; } for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) { for (int wr = wt_min; wr <= wt_max; wr++) { for (uint32_t iter = tid; iter < niter; iter += nthreads) { // reconstruct Ndwl, Ndbl, Ndcm unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter)); unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter); unsigned int Ndcm = 1 << (iter % Ndcm_niter); for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2) { for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) { //for debuging if (g_ip->force_cache_config && is_tag == false) { wr = g_ip->wt; Ndwl = g_ip->ndwl; Ndbl = g_ip->ndbl; Ndcm = g_ip->ndcm; if(g_ip->nspd != 0) { Nspd = g_ip->nspd; } if(g_ip->ndsam1 != 0) { Ndsam_lev_1 = g_ip->ndsam1; Ndsam_lev_2 = g_ip->ndsam2; } } if (is_tag == true) { is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, tag_arr.back(), 0, NULL, NULL, is_main_mem); } // If it's a fully-associative cache, the data array partition parameters are identical to that of // the tag array, so compute data array partition properties also here. if (is_tag == false || g_ip->fully_assoc) { is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, data_arr.back(), 0, NULL, NULL, is_main_mem); } if (is_valid_partition) { if (is_tag == true) { tag_arr.back()->wt = (enum Wire_type) wr; tag_res->update_min_values(tag_arr.back()); tag_arr.push_back(new mem_array); } if (is_tag == false || g_ip->fully_assoc) { data_arr.back()->wt = (enum Wire_type) wr; data_res->update_min_values(data_arr.back()); data_arr.push_back(new mem_array); } } if (g_ip->force_cache_config && is_tag == false) { wr = wt_max; iter = niter; if(g_ip->nspd != 0) { Nspd = MAXDATASPD; } if (g_ip->ndsam1 != 0) { Ndsam_lev_1 = MAX_COL_MUX+1; Ndsam_lev_2 = MAX_COL_MUX+1; } } } } } } } delete data_arr.back(); delete tag_arr.back(); data_arr.pop_back(); tag_arr.pop_back(); pthread_exit(NULL); } bool calculate_time( bool is_tag, int pure_ram, bool pure_cam, double Nspd, unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, mem_array *ptr_array, int flag_results_populate, results_mem_array *ptr_results, uca_org_t *ptr_fin_res, bool is_main_mem) { DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem); if (dyn_p.is_valid == false) { return false; } UCA * uca = new UCA(dyn_p); if (flag_results_populate) { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables } else { collect_uca_results(Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, uca, ptr_array, is_main_mem); } delete uca; return true; } void collect_uca_results( // bool is_tag, // int pure_ram, // bool pure_cam, double Nspd, unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, UCA const * const uca, mem_array * const ptr_array, // int flag_results_populate, // results_mem_array *ptr_results, // uca_org_t *ptr_fin_res, bool is_main_mem) { int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; int num_mats = uca->bank.dp.num_mats; bool is_fa = uca->bank.dp.fully_assoc; bool pure_cam = uca->bank.dp.pure_cam; ptr_array->Ndwl = Ndwl; ptr_array->Ndbl = Ndbl; ptr_array->Nspd = Nspd; ptr_array->deg_bl_muxing = uca->bank.dp.deg_bl_muxing; ptr_array->Ndsam_lev_1 = Ndsam_lev_1; ptr_array->Ndsam_lev_2 = Ndsam_lev_2; ptr_array->access_time = uca->access_time; ptr_array->cycle_time = uca->cycle_time; ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time; ptr_array->area_ram_cells = uca->area_all_dataramcells; ptr_array->area = uca->area.get_area(); ptr_array->height = uca->area.h; ptr_array->width = uca->area.w; ptr_array->mat_height = uca->bank.mat.area.h; ptr_array->mat_length = uca->bank.mat.area.w; ptr_array->subarray_height = uca->bank.mat.subarray.area.h; ptr_array->subarray_length = uca->bank.mat.subarray.area.w; ptr_array->power = uca->power; ptr_array->delay_senseamp_mux_decoder = MAX(uca->delay_array_to_sa_mux_lev_1_decoder, uca->delay_array_to_sa_mux_lev_2_decoder); ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver; ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out; ptr_array->delay_route_to_bank = uca->htree_in_add->delay; ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay; ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree; ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; ptr_array->delay_comparator = uca->bank.mat.delay_comparator; ptr_array->all_banks_height = uca->area.h; ptr_array->all_banks_width = uca->area.w; ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area()); ptr_array->power_routing_to_bank = uca->power_routing_to_bank; ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; // cout<<"power_data_input_htree"<bank.htree_in_data->power.readOp.leakage<power_data_output_htree = uca->bank.htree_out_data->power; // cout<<"power_data_output_htree"<bank.htree_out_data->power.readOp.leakage<power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power; ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power; ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power; ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power; ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power; ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power; ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders; ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power; ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power; ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders; ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bitlines = uca->bank.mat.power_bitline; ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_sense_amps = uca->bank.mat.power_sa; ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv; ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv; ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_comparators = uca->bank.mat.power_comparator; ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; // cout << " num of mats: " << dyn_p.num_mats << endl; if (is_fa || pure_cam) { ptr_array->power_htree_in_search = uca->bank.htree_in_search->power; // cout<<"power_htree_in_search"<bank.htree_in_search->power.readOp.leakage<power_htree_out_search = uca->bank.htree_out_search->power; // cout<<"power_htree_out_search"<bank.htree_out_search->power.readOp.leakage<power_searchline = uca->bank.mat.power_searchline; // cout<<"power_searchlineh"<bank.mat.power_searchline.readOp.leakage<power_searchline.searchOp.dynamic *= num_mats; ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge; ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats; ptr_array->power_matchlines = uca->bank.mat.power_matchline; ptr_array->power_matchlines.searchOp.dynamic *= num_mats; ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge; ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats; ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv; // cout<<"power_matchline.searchOp.leakage"<bank.mat.power_matchline.searchOp.leakage<activate_energy = uca->activate_energy; ptr_array->read_energy = uca->read_energy; ptr_array->write_energy = uca->write_energy; ptr_array->precharge_energy = uca->precharge_energy; ptr_array->refresh_power = uca->refresh_power; ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page; ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page; ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks; ptr_array->precharge_delay = uca->precharge_delay; // cout<<"power_matchline.searchOp.leakage"<bank.mat.<bank.mat.subarray.get_total_cell_area()<array_leakage= uca->bank.array_leakage; ptr_array->wl_leakage= uca->bank.wl_leakage; ptr_array->cl_leakage= uca->bank.cl_leakage; if (g_ip->power_gating) { ptr_array->sram_sleep_tx_width= uca->bank.mat.sram_sleep_tx->width; ptr_array->sram_sleep_tx_area= uca->bank.mat.array_sleep_tx_area; ptr_array->sram_sleep_wakeup_latency= uca->bank.mat.array_wakeup_t; ptr_array->sram_sleep_wakeup_energy= uca->bank.mat.array_wakeup_e.readOp.dynamic; ptr_array->wl_sleep_tx_width= uca->bank.mat.row_dec->sleeptx->width; ptr_array->wl_sleep_tx_area= uca->bank.mat.wl_sleep_tx_area; ptr_array->wl_sleep_wakeup_latency= uca->bank.mat.wl_wakeup_t; ptr_array->wl_sleep_wakeup_energy= uca->bank.mat.wl_wakeup_e.readOp.dynamic; ptr_array->bl_floating_wakeup_latency= uca->bank.mat.blfloating_wakeup_t; ptr_array->bl_floating_wakeup_energy= uca->bank.mat.blfloating_wakeup_e.readOp.dynamic; } ptr_array->num_active_mats = uca->bank.dp.num_act_mats_hor_dir; ptr_array->num_submarray_mats = uca->bank.mat.num_subarrays_per_mat; // cout<<"array_leakage"<array_leakage<wl_leakage<cl_leakage<long_channel_leakage_reduction_periperal = uca->long_channel_leakage_reduction_periperal; ptr_array->long_channel_leakage_reduction_memcell = uca->long_channel_leakage_reduction_memcell; } bool check_uca_org(uca_org_t & u, min_values_t *minval) { if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { return false; } if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > g_ip->dynamic_power_dev) { return false; } if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > g_ip->leakage_power_dev) { return false; } if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > g_ip->cycle_time_dev) { return false; } if (((u.area - minval->min_area)/minval->min_area)*100 > g_ip->area_dev) { return false; } return true; } bool check_mem_org(mem_array & u, const min_values_t *minval) { if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { return false; } if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > g_ip->dynamic_power_dev) { return false; } if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > g_ip->leakage_power_dev) { return false; } if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > g_ip->cycle_time_dev) { return false; } if (((u.area - minval->min_area)/minval->min_area)*100 > g_ip->area_dev) { return false; } return true; } void find_optimal_uca(uca_org_t *res, min_values_t * minval, list & ulist) { double cost = 0; double min_cost = BIGNUM; float d, a, dp, lp, c; dp = g_ip->dynamic_power_wt; lp = g_ip->leakage_power_wt; a = g_ip->area_wt; d = g_ip->delay_wt; c = g_ip->cycle_time_wt; if (ulist.empty() == true) { cout << "ERROR: no valid cache organizations found" << endl; exit(0); } for (list::iterator niter = ulist.begin(); niter != ulist.end(); niter++) { if (g_ip->ed == 1) { cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn); if (min_cost > cost) { min_cost = cost; *res = (*(niter)); } } else if (g_ip->ed == 2) { cost = ((niter)->access_time/minval->min_delay)* ((niter)->access_time/minval->min_delay)* ((niter)->power.readOp.dynamic/minval->min_dyn); if (min_cost > cost) { min_cost = cost; *res = (*(niter)); } } else { /* * check whether the current organization * meets the input deviation constraints */ bool v = check_uca_org(*niter, minval); //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling if (v) { cost = (d * ((niter)->access_time/minval->min_delay) + c * ((niter)->cycle_time/minval->min_cyc) + dp * ((niter)->power.readOp.dynamic/minval->min_dyn) + lp * ((niter)->power.readOp.leakage/minval->min_leakage) + a * ((niter)->area/minval->min_area)); //fprintf(stderr, "cost = %g\n", cost); if (min_cost > cost) { min_cost = cost; *res = (*(niter)); niter = ulist.erase(niter); if (niter!=ulist.begin()) niter--; } } else { niter = ulist.erase(niter); if (niter!=ulist.begin()) niter--; } } } if (min_cost == BIGNUM) { cout << "ERROR: no cache organizations met optimization criteria" << endl; exit(0); } } void filter_tag_arr(const min_values_t * min, list & list) { double cost = BIGNUM; double cur_cost; double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt; mem_array * res = NULL; if (list.empty() == true) { cout << "ERROR: no valid tag organizations found" << endl; exit(1); } while (list.empty() != true) { bool v = check_mem_org(*list.back(), min); if (v) { cur_cost = wt_delay * (list.back()->access_time/min->min_delay) + wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) + wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) + wt_area * (list.back()->area/min->min_area) + wt_cyc * (list.back()->cycle_time/min->min_cyc); } else { cur_cost = BIGNUM; } if (cur_cost < cost) { if (res != NULL) { delete res; } cost = cur_cost; res = list.back(); } else { delete list.back(); } list.pop_back(); } if(!res) { cout << "ERROR: no valid tag organizations found" << endl; exit(0); } list.push_back(res); } void filter_data_arr(list & curr_list) { if (curr_list.empty() == true) { cout << "ERROR: no valid data array organizations found" << endl; exit(1); } list::iterator iter; for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) { mem_array * m = *iter; if (m == NULL) exit(1); if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) && ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5)) { delete m; iter = curr_list.erase(iter); iter --; } } } /* * Performs exhaustive search across different sub-array sizes, * wire types and aspect ratios to find an optimal UCA organization * 1. First different valid tag array organizations are calculated * and stored in tag_arr array * 2. The exhaustive search is repeated to find valid data array * organizations and stored in data_arr array * 3. Cache area, delay, power, and cycle time for different * cache organizations are calculated based on the * above results * 4. Cache model with least cost is picked from sol_list */ void solve(uca_org_t *fin_res) { bool is_dram = false; int pure_ram = g_ip->pure_ram; bool pure_cam = g_ip->pure_cam; init_tech_params(g_ip->F_sz_um, false); list tag_arr (0); list data_arr(0); list::iterator miter; list sol_list(1, uca_org_t()); fin_res->tag_array.access_time = 0; fin_res->tag_array.Ndwl = 0; fin_res->tag_array.Ndbl = 0; fin_res->tag_array.Nspd = 0; fin_res->tag_array.deg_bl_muxing = 0; fin_res->tag_array.Ndsam_lev_1 = 0; fin_res->tag_array.Ndsam_lev_2 = 0; // distribute calculate_time() execution to multiple threads calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads]; pthread_t threads[nthreads]; for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].tid = t; calc_array[t].pure_ram = pure_ram; calc_array[t].pure_cam = pure_cam; calc_array[t].data_res = new min_values_t(); calc_array[t].tag_res = new min_values_t(); } bool is_tag; uint32_t ram_cell_tech_type; // If it's a cache, first calculate the area, delay and power for all tag array partitions. if (!(pure_ram||pure_cam||g_ip->fully_assoc)) { //cache is_tag = true; ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].is_tag = is_tag; calc_array[t].is_main_mem = false; calc_array[t].Nspd_min = 0.125; pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); } for (uint32_t t = 0; t < nthreads; t++) { pthread_join(threads[t], NULL); } for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].data_arr.sort(mem_array::lt); data_arr.merge(calc_array[t].data_arr, mem_array::lt); calc_array[t].tag_arr.sort(mem_array::lt); tag_arr.merge(calc_array[t].tag_arr, mem_array::lt); } } // calculate the area, delay and power for all data array partitions (for cache or plain RAM). // if (!g_ip->fully_assoc) // {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion is_tag = false; ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type; is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].is_tag = is_tag; calc_array[t].is_main_mem = g_ip->is_main_mem; if (!(pure_cam||g_ip->fully_assoc)) { calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8); } else { calc_array[t].Nspd_min = 1; } pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); } for (uint32_t t = 0; t < nthreads; t++) { pthread_join(threads[t], NULL); } data_arr.clear(); for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].data_arr.sort(mem_array::lt); data_arr.merge(calc_array[t].data_arr, mem_array::lt); } // } min_values_t * d_min = new min_values_t(); min_values_t * t_min = new min_values_t(); min_values_t * cache_min = new min_values_t(); for (uint32_t t = 0; t < nthreads; t++) { d_min->update_min_values(calc_array[t].data_res); t_min->update_min_values(calc_array[t].tag_res); } for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { (*miter)->arr_min = d_min; } //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n"; filter_data_arr(data_arr); if(!(pure_ram||pure_cam||g_ip->fully_assoc)) { filter_tag_arr(t_min, tag_arr); } //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n"; if (pure_ram||pure_cam||g_ip->fully_assoc) { for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { uca_org_t & curr_org = sol_list.back(); //essentially adds value to sol_list, with no extra memory copying. curr_org.tag_array2 = NULL; curr_org.data_array2 = (*miter); curr_org.find_delay(); curr_org.find_energy(); curr_org.find_area(); curr_org.find_cyc(); //update min values for the entire cache cache_min->update_min_values(curr_org); sol_list.push_back(uca_org_t());//add a new node to the back } } else { while (tag_arr.empty() != true) { mem_array * arr_temp = (tag_arr.back()); //delete tag_arr.back(); tag_arr.pop_back();//this causes double free problem if uca_org_t has a destructor to release all contained pointers---when called by sol_list.clear(); so uca_org_t does not use destructor to delete contained pointers for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { uca_org_t & curr_org = sol_list.back(); curr_org.tag_array2 = arr_temp; curr_org.data_array2 = (*miter); //try all combinations of tag and data array curr_org.find_delay(); curr_org.find_energy(); curr_org.find_area(); curr_org.find_cyc(); //update min values for the entire cache cache_min->update_min_values(curr_org); sol_list.push_back(uca_org_t()); } } } sol_list.pop_back();//delete the last unused node added in the loop above find_optimal_uca(fin_res, cache_min, sol_list); sol_list.clear(); for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) { if (*miter != fin_res->data_array2) { delete *miter; } } data_arr.clear(); for (uint32_t t = 0; t < nthreads; t++) { delete calc_array[t].data_res; delete calc_array[t].tag_res; } delete [] calc_array; delete cache_min; delete d_min; delete t_min; } void update_dvs(uca_org_t *fin_res) { if(fin_res->tag_array2 || fin_res->data_array2) { // Wire::print_wire(); Wire winit;//init before changing dvs // fin_res->uca_q = vector(g_ip->dvs_voltage.size()); for (unsigned int i=0; i< g_ip->dvs_voltage.size(); i++) { fin_res->uca_q.push_back(new uca_org_t()); g_ip->hp_Vdd = g_ip->dvs_voltage[i]; g_ip->specific_hp_vdd = true; g_ip->lstp_Vdd = g_ip->dvs_voltage[i]; g_ip->specific_lstp_vdd = true; g_ip->lop_Vdd = g_ip->dvs_voltage[i]; g_ip->specific_lop_vdd = true; // g_ip->power_gating = false; // g_ip->bitline_floating = false; // g_ip->wl_power_gated = false; // g_ip->interconect_power_gated = false; // g_ip->cl_power_gated = false; // g_ip->array_power_gated = false; init_tech_params(g_ip->F_sz_um,true); winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false); // Wire::print_wire(); if(fin_res->tag_array2) { DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); if(tag_arr_dyn_p.is_valid) { UCA * tag_arr = new UCA(tag_arr_dyn_p); fin_res->uca_q[i]->tag_array2 = new mem_array(); collect_uca_results(fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, tag_arr, fin_res->uca_q[i]->tag_array2, g_ip->is_main_mem); delete tag_arr; } } DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); if(data_arr_dyn_p.is_valid) { UCA * data_arr = new UCA(data_arr_dyn_p); fin_res->uca_q[i]->data_array2 = new mem_array(); collect_uca_results(fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, data_arr, fin_res->uca_q[i]->data_array2, g_ip->is_main_mem); delete data_arr; } fin_res->uca_q[i]->find_delay(); fin_res->uca_q[i]->find_energy(); fin_res->uca_q[i]->find_area(); fin_res->uca_q[i]->find_cyc(); // output_UCA(fin_res->uca_q[i]); // Wire::print_wire(); } //reset input to original values in *.cfg file g_ip->specific_hp_vdd = false; g_ip->specific_lstp_vdd = false; g_ip->specific_lop_vdd = false; init_tech_params(g_ip->F_sz_um,true); } else { cout << "ERROR: Cannot retrieve array structure for tag and data array" << endl; exit(1); } } void update_pg(uca_org_t *fin_res) { if(fin_res->tag_array2 || fin_res->data_array2) { Wire winit; fin_res->uca_pg_reference = new uca_org_t(); /* if (i == 0) {g_ip->hp_Vdd = 0.8; } else g_ip->hp_Vdd = 1.1; g_ip->specific_hp_vdd = true; cout<<"VDD=====" << g_ip->hp_Vdd <F_sz_um,true); winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false); Wire::print_wire(); */ g_ip->array_power_gated = false; g_ip->bitline_floating = false; g_ip->wl_power_gated = false; g_ip->cl_power_gated = false; g_ip->interconect_power_gated = false; g_ip->power_gating = false; // winit.wire_dvs_update(); // Wire::print_wire(); // init_tech_params(g_ip->F_sz_um,true); // winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false); // Wire::print_wire(); if(fin_res->tag_array2) { // init_tech_params(g_ip->F_sz_um,true); DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); if(tag_arr_dyn_p.is_valid) { UCA * tag_arr = new UCA(tag_arr_dyn_p); fin_res->uca_pg_reference->tag_array2 = new mem_array(); collect_uca_results(fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, tag_arr, fin_res->uca_pg_reference->tag_array2, g_ip->is_main_mem); delete tag_arr; } } // init_tech_params(g_ip->F_sz_um,false); DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); if(data_arr_dyn_p.is_valid) { UCA * data_arr = new UCA(data_arr_dyn_p); fin_res->uca_pg_reference->data_array2 = new mem_array(); collect_uca_results(fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, data_arr, fin_res->uca_pg_reference->data_array2, g_ip->is_main_mem); delete data_arr; } fin_res->uca_pg_reference->find_delay(); fin_res->uca_pg_reference->find_energy(); fin_res->uca_pg_reference->find_area(); fin_res->uca_pg_reference->find_cyc(); // output_UCA(fin_res->uca_pg_reference); // Wire::print_wire(); } else { cout << "ERROR: Cannot retrieve array structure for tag and data array" << endl; exit(1); } //reset input to original values in *.cfg file g_ip->array_power_gated = true; g_ip->bitline_floating = true; g_ip->wl_power_gated = true; g_ip->cl_power_gated = true; g_ip->interconect_power_gated = true; g_ip->power_gating = true; } /* update for thermal void update(uca_org_t *fin_res) { if(fin_res->tag_array2) { init_tech_params(g_ip->F_sz_um,true); DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); if(tag_arr_dyn_p.is_valid) { UCA * tag_arr = new UCA(tag_arr_dyn_p); fin_res->tag_array2->power = tag_arr->power; } else { cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; exit(1); } } init_tech_params(g_ip->F_sz_um,false); DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); if(data_arr_dyn_p.is_valid) { UCA * data_arr = new UCA(data_arr_dyn_p); fin_res->data_array2->power = data_arr->power; } else { cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; exit(1); } fin_res->find_energy(); } */