/***************************************************************************** * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ #include "area.h" #include "decoder.h" #include "parameter.h" #include #include #include using namespace std; Decoder::Decoder( int _num_dec_signals, bool flag_way_select, double _C_ld_dec_out, double _R_wire_dec_out, bool fully_assoc_, bool is_dram_, bool is_wl_tr_, const Area & cell_, bool power_gating_, int nodes_DSTN_) :exist(false), C_ld_dec_out(_C_ld_dec_out), R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), //power(), fully_assoc(fully_assoc_), is_dram(is_dram_), is_wl_tr(is_wl_tr_), total_driver_nwidth(0), total_driver_pwidth(0), cell(cell_), power_gating(power_gating_), nodes_DSTN(nodes_DSTN_), sleeptx(NULL) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { w_dec_n[i] = 0; w_dec_p[i] = 0; } /* * _num_dec_signals is the number of decoded signal as output * num_addr_bits_dec is the number of signal to be decoded * as the decoders input. */ int num_addr_bits_dec = _log2(_num_dec_signals); if (num_addr_bits_dec < 4) { if (flag_way_select) { exist = true; num_in_signals = 2; } else { num_in_signals = 0; } } else { exist = true; if (flag_way_select) { num_in_signals = 3; } else { num_in_signals = 2; } } assert(cell.h>0); assert(cell.w>0); // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; //area.h = 4 * cell.h; area.h = g_tp.h_dec * cell.h; compute_widths(); compute_area(); } void Decoder::compute_widths() { double F; double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); if (exist) { if (num_in_signals == 2 || fully_assoc) { w_dec_n[0] = 2 * g_tp.min_w_nmos_; w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand2; } else { w_dec_n[0] = 3 * g_tp.min_w_nmos_; w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand3; } F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); num_gates = logical_effort( num_gates_min, num_in_signals == 2 ? gnand2 : gnand3, F, w_dec_n, w_dec_p, C_ld_dec_out, p_to_n_sz_ratio, is_dram, is_wl_tr, g_tp.max_w_nmos_dec); } } void Decoder::compute_area() { double cumulative_area = 0; double cumulative_curr = 0; // cumulative leakage current double cumulative_curr_Ig = 0; // cumulative leakage current if (exist) { // First check if this decoder exists if (num_in_signals == 2) { cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); } else if (num_in_signals == 3) { cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); } for (int i = 1; i < num_gates; i++) { cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); } power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; power.readOp.power_gated_leakage = cumulative_curr * g_tp.peri_global.Vcc_min; power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; area.w = (cumulative_area / area.h); if (power_gating) { compute_power_gating(); cumulative_area += sleeptx->area.get_area(); area.w = (cumulative_area / area.h); } } } void Decoder::compute_power_gating() { //For all driver chains there is only one sleep transistors to save area //Total transistor width for sleep tx calculation for (int i = 0; i power_gating) sleeptx = new Sleep_tx (g_ip->perfloss, Isat_subarray, is_footer, c_wakeup, detalV, nodes_DSTN, area); } double Decoder::compute_delays(double inrisetime) { if (exist) { double ret_val = 0; // outrisetime int i; double rd, tf, this_delay, c_load, c_intrinsic, Vpp; double Vdd = g_tp.peri_global.Vdd; if ((is_wl_tr) && (is_dram)) { Vpp = g_tp.vpp; } else if (is_wl_tr) { Vpp = g_tp.sram_cell.Vdd; } else { Vpp = g_tp.peri_global.Vdd; } // first check whether a decoder is required at all rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; inrisetime = this_delay / (1.0 - 0.5); power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; // cout<<"w_dec_n["<<0<<"] = "<blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + drv1->power_nand3_path.readOp.leakage + drv2->power_nand2_path.readOp.leakage + drv2->power_nand3_path.readOp.leakage; block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + blk2->power_nand2_path.readOp.leakage + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; driver_power.readOp.power_gated_leakage = drv1->power_nand2_path.readOp.power_gated_leakage + drv1->power_nand3_path.readOp.power_gated_leakage + drv2->power_nand2_path.readOp.power_gated_leakage + drv2->power_nand3_path.readOp.power_gated_leakage; block_power.readOp.power_gated_leakage = blk1->power_nand2_path.readOp.power_gated_leakage + blk1->power_nand3_path.readOp.power_gated_leakage + blk1->power_L2.readOp.power_gated_leakage + blk2->power_nand2_path.readOp.power_gated_leakage + blk2->power_nand3_path.readOp.power_gated_leakage + blk2->power_L2.readOp.power_gated_leakage; power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + block_power.readOp.power_gated_leakage; driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + drv1->power_nand3_path.readOp.gate_leakage + drv2->power_nand2_path.readOp.gate_leakage + drv2->power_nand3_path.readOp.gate_leakage; block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + blk1->power_nand3_path.readOp.gate_leakage + blk1->power_L2.readOp.gate_leakage + blk2->power_nand2_path.readOp.gate_leakage + blk2->power_nand3_path.readOp.gate_leakage + blk2->power_L2.readOp.gate_leakage; power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; } void PredecBlkDrv::leakage_feedback(double temperature) { double leak_nand2_path = 0; double leak_nand3_path = 0; double gate_leak_nand2_path = 0; double gate_leak_nand3_path = 0; if (flag_driver_exists) { // first check whether a predecoder block driver is needed for (int i = 0; i < number_gates_nand2_path; ++i) { leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); } leak_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); for (int i = 0; i < number_gates_nand3_path; ++i) { leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); } leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; } } double Predec::compute_delays(double inrisetime) { // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. pair tmp_pair1, tmp_pair2; tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); tmp_pair1 = blk1->compute_delays(tmp_pair1); tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); tmp_pair2 = blk2->compute_delays(tmp_pair2); tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); driver_power.readOp.dynamic = drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; block_power.readOp.dynamic = blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + blk1->power_L2.readOp.dynamic + blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + blk2->power_L2.readOp.dynamic; power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; delay = tmp_pair1.first; return tmp_pair1.second; } void Predec::leakage_feedback(double temperature) { drv1->leakage_feedback(temperature); drv2->leakage_feedback(temperature); blk1->leakage_feedback(temperature); blk2->leakage_feedback(temperature); driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + drv1->power_nand3_path.readOp.leakage + drv2->power_nand2_path.readOp.leakage + drv2->power_nand3_path.readOp.leakage; block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + blk2->power_nand2_path.readOp.leakage + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + drv1->power_nand3_path.readOp.gate_leakage + drv2->power_nand2_path.readOp.gate_leakage + drv2->power_nand3_path.readOp.gate_leakage; block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + blk1->power_nand3_path.readOp.gate_leakage + blk1->power_L2.readOp.gate_leakage + blk2->power_nand2_path.readOp.gate_leakage + blk2->power_nand3_path.readOp.gate_leakage + blk2->power_L2.readOp.gate_leakage; power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; } // returns pair Predec::get_max_delay_before_decoder( pair input_pair1, pair input_pair2) { pair ret_val; double delay; delay = drv1->delay_nand2_path + blk1->delay_nand2_path; ret_val.first = delay; ret_val.second = input_pair1.first; delay = drv1->delay_nand3_path + blk1->delay_nand3_path; if (ret_val.first < delay) { ret_val.first = delay; ret_val.second = input_pair1.second; } delay = drv2->delay_nand2_path + blk2->delay_nand2_path; if (ret_val.first < delay) { ret_val.first = delay; ret_val.second = input_pair2.first; } delay = drv2->delay_nand3_path + blk2->delay_nand3_path; if (ret_val.first < delay) { ret_val.first = delay; ret_val.second = input_pair2.second; } return ret_val; } Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram, bool power_gating_, int nodes_DSTN_) :number_gates(0), min_number_gates(2), c_gate_load(c_gate_load_), c_wire_load(c_wire_load_), r_wire_load(r_wire_load_), delay(0), // power(), is_dram_(is_dram), total_driver_nwidth(0), total_driver_pwidth(0), power_gating(power_gating_), nodes_DSTN(nodes_DSTN_), sleeptx(NULL) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { width_n[i] = 0; width_p[i] = 0; } compute_widths(); compute_area(); } void Driver::compute_widths() { double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); double c_load = c_gate_load + c_wire_load; width_n[0] = g_tp.min_w_nmos_; width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); number_gates = logical_effort( min_number_gates, 1, F, width_n, width_p, c_load, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } void Driver::compute_area() { double cumulative_area = 0; area.h = g_tp.cell_h_def; for (int i = 0; i < number_gates; i++) { cumulative_area += compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); } area.w = (cumulative_area / area.h); if (power_gating) { compute_power_gating(); cumulative_area += sleeptx->area.get_area(); area.w = (cumulative_area / area.h); } } void Driver::compute_power_gating() { //For all driver chains there is only one sleep transistors to save area //Total transistor width for sleep tx calculation for (int i = 0; i power_gating) sleeptx = new Sleep_tx (g_ip->perfloss, Isat_subarray, is_footer, c_wakeup, detalV, nodes_DSTN,//default is 1 for drivers area); } double Driver::compute_delay(double inrisetime) { int i; double rd, c_load, c_intrinsic, tf; double this_delay = 0; for (i = 0; i < number_gates - 1; ++i) { rd = tr_R_on(width_n[i], NCH, 1, is_dram_); c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; inrisetime = this_delay / (1.0 - 0.5); power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd; power.readOp.power_gated_leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vcc_min; power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; } i = number_gates - 1; c_load = c_gate_load + c_wire_load; rd = tr_R_on(width_n[i], NCH, 1, is_dram_); c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd; power.readOp.power_gated_leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vcc_min; power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; return this_delay / (1.0 - 0.5); } //TODO: add sleep tx in predec/predecblk/predecdriver