/***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ #include #include #include #include #include #include #include #include #include "parameter.h" #include "array.h" #include "const.h" #include "basic_circuit.h" #include "XML_Parse.h" #include "processor.h" #include "version.h" Processor::Processor(ParseXML *XML_interface) :XML(XML_interface),//TODO: using one global copy may have problems. mc(0), niu(0), pcie(0), flashcontroller(0) { /* * placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm * There is no point to have heterogeneous memory controller on chip, * thus McPAT only support homogeneous memory controllers. */ int i; double pppm_t[4] = {1,1,1,1}; set_proc_param(); if (procdynp.homoCore) numCore = procdynp.numCore==0? 0:1; else numCore = procdynp.numCore; if (procdynp.homoL2) numL2 = procdynp.numL2==0? 0:1; else numL2 = procdynp.numL2; if (XML->sys.Private_L2 && numCore != numL2) { cout<<"Number of private L2 does not match number of cores"<computeEnergy(); cores[i]->computeEnergy(false); if (procdynp.homoCore){ core.area.set_area(core.area.get_area() + cores[i]->area.get_area()*procdynp.numCore); set_pppm(pppm_t,cores[i]->clockRate*procdynp.numCore, procdynp.numCore,procdynp.numCore,procdynp.numCore); core.power = core.power + cores[i]->power*pppm_t; set_pppm(pppm_t,1/cores[i]->executionTime, procdynp.numCore,procdynp.numCore,procdynp.numCore); core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t; area.set_area(area.get_area() + core.area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm power = power + core.power; rt_power = rt_power + core.rt_power; } else{ core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); area.set_area(area.get_area() + cores[i]->area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm set_pppm(pppm_t,cores[i]->clockRate, 1, 1, 1); core.power = core.power + cores[i]->power*pppm_t; power = power + cores[i]->power*pppm_t; set_pppm(pppm_t,1/cores[i]->executionTime, 1, 1, 1); core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t; rt_power = rt_power + cores[i]->rt_power*pppm_t; } } if (!XML->sys.Private_L2) { if (numL2 >0) for (i = 0;i < numL2; i++) { l2array.push_back(new SharedCache(XML,i, &interface_ip)); l2array[i]->computeEnergy(); l2array[i]->computeEnergy(false); if (procdynp.homoL2){ l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()*procdynp.numL2); set_pppm(pppm_t,l2array[i]->cachep.clockRate*procdynp.numL2, procdynp.numL2,procdynp.numL2,procdynp.numL2); l2.power = l2.power + l2array[i]->power*pppm_t; set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, procdynp.numL2,procdynp.numL2,procdynp.numL2); l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t; area.set_area(area.get_area() + l2.area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm power = power + l2.power; rt_power = rt_power + l2.rt_power; } else{ l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()); area.set_area(area.get_area() + l2array[i]->area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm set_pppm(pppm_t,l2array[i]->cachep.clockRate, 1, 1, 1); l2.power = l2.power + l2array[i]->power*pppm_t; power = power + l2array[i]->power*pppm_t;; set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, 1, 1, 1); l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t; rt_power = rt_power + l2array[i]->rt_power*pppm_t; } } } if (numL3 >0) for (i = 0;i < numL3; i++) { l3array.push_back(new SharedCache(XML,i, &interface_ip, L3)); l3array[i]->computeEnergy(); l3array[i]->computeEnergy(false); if (procdynp.homoL3){ l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()*procdynp.numL3); set_pppm(pppm_t,l3array[i]->cachep.clockRate*procdynp.numL3, procdynp.numL3,procdynp.numL3,procdynp.numL3); l3.power = l3.power + l3array[i]->power*pppm_t; set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, procdynp.numL3,procdynp.numL3,procdynp.numL3); l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t; area.set_area(area.get_area() + l3.area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm power = power + l3.power; rt_power = rt_power + l3.rt_power; } else{ l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()); area.set_area(area.get_area() + l3array[i]->area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm set_pppm(pppm_t,l3array[i]->cachep.clockRate, 1, 1, 1); l3.power = l3.power + l3array[i]->power*pppm_t; power = power + l3array[i]->power*pppm_t; set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, 1, 1, 1); l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t; rt_power = rt_power + l3array[i]->rt_power*pppm_t; } } if (numL1Dir >0) for (i = 0;i < numL1Dir; i++) { l1dirarray.push_back(new SharedCache(XML,i, &interface_ip, L1Directory)); l1dirarray[i]->computeEnergy(); l1dirarray[i]->computeEnergy(false); if (procdynp.homoL1Dir){ l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()*procdynp.numL1Dir); set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate*procdynp.numL1Dir, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir); l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t; set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir); l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t; area.set_area(area.get_area() + l1dir.area.get_area());//placement and routing overhead is 10%, l1dir scales worse than cache 40% is accumulated from 90 to 22nm power = power + l1dir.power; rt_power = rt_power + l1dir.rt_power; } else{ l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()); area.set_area(area.get_area() + l1dirarray[i]->area.get_area()); set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate, 1, 1, 1); l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t; power = power + l1dirarray[i]->power; set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, 1, 1, 1); l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t; rt_power = rt_power + l1dirarray[i]->rt_power; } } if (numL2Dir >0) for (i = 0;i < numL2Dir; i++) { l2dirarray.push_back(new SharedCache(XML,i, &interface_ip, L2Directory)); l2dirarray[i]->computeEnergy(); l2dirarray[i]->computeEnergy(false); if (procdynp.homoL2Dir){ l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()*procdynp.numL2Dir); set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate*procdynp.numL2Dir, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir); l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t; set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir); l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t; area.set_area(area.get_area() + l2dir.area.get_area());//placement and routing overhead is 10%, l2dir scales worse than cache 40% is accumulated from 90 to 22nm power = power + l2dir.power; rt_power = rt_power + l2dir.rt_power; } else{ l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()); area.set_area(area.get_area() + l2dirarray[i]->area.get_area()); set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate, 1, 1, 1); l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t; power = power + l2dirarray[i]->power*pppm_t; set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, 1, 1, 1); l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t; rt_power = rt_power + l2dirarray[i]->rt_power*pppm_t; } } if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) { mc = new MemoryController(XML, &interface_ip, MC); mc->computeEnergy(); mc->computeEnergy(false); mcs.area.set_area(mcs.area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs); area.set_area(area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs); set_pppm(pppm_t,XML->sys.mc.number_mcs*mc->mcp.clockRate, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs); mcs.power = mc->power*pppm_t; power = power + mcs.power; set_pppm(pppm_t,1/mc->mcp.executionTime, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs); mcs.rt_power = mc->rt_power*pppm_t; rt_power = rt_power + mcs.rt_power; } if (XML->sys.flashc.number_mcs >0 )//flash controller { flashcontroller = new FlashController(XML, &interface_ip); flashcontroller->computeEnergy(); flashcontroller->computeEnergy(false); double number_fcs = flashcontroller->fcp.num_mcs; flashcontrollers.area.set_area(flashcontrollers.area.get_area()+flashcontroller->area.get_area()*number_fcs); area.set_area(area.get_area()+flashcontrollers.area.get_area()); set_pppm(pppm_t,number_fcs, number_fcs ,number_fcs, number_fcs ); flashcontrollers.power = flashcontroller->power*pppm_t; power = power + flashcontrollers.power; set_pppm(pppm_t,number_fcs , number_fcs ,number_fcs ,number_fcs ); flashcontrollers.rt_power = flashcontroller->rt_power*pppm_t; rt_power = rt_power + flashcontrollers.rt_power; } if (XML->sys.niu.number_units >0) { niu = new NIUController(XML, &interface_ip); niu->computeEnergy(); niu->computeEnergy(false); nius.area.set_area(nius.area.get_area()+niu->area.get_area()*XML->sys.niu.number_units); area.set_area(area.get_area()+niu->area.get_area()*XML->sys.niu.number_units); set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units); nius.power = niu->power*pppm_t; power = power + nius.power; set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units); nius.rt_power = niu->rt_power*pppm_t; rt_power = rt_power + nius.rt_power; } if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels >0) { pcie = new PCIeController(XML, &interface_ip); pcie->computeEnergy(); pcie->computeEnergy(false); pcies.area.set_area(pcies.area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units); area.set_area(area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units); set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units); pcies.power = pcie->power*pppm_t; power = power + pcies.power; set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units); pcies.rt_power = pcie->rt_power*pppm_t; rt_power = rt_power + pcies.rt_power; } if (numNOC >0) { for (i = 0;i < numNOC; i++) { if (XML->sys.NoC[i].type) {//First add up area of routers if NoC is used nocs.push_back(new NoC(XML,i, &interface_ip, 1)); if (procdynp.homoNOC) { noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC); area.set_area(area.get_area() + noc.area.get_area()); } else { noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); area.set_area(area.get_area() + nocs[i]->area.get_area()); } } else {//Bus based interconnect nocs.push_back(new NoC(XML,i, &interface_ip, 1, sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage))); if (procdynp.homoNOC){ noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC); area.set_area(area.get_area() + noc.area.get_area()); } else { noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); area.set_area(area.get_area() + nocs[i]->area.get_area()); } } } /* * Compute global links associated with each NOC, if any. This must be done at the end (even after the NOC router part) since the total chip * area must be obtain to decide the link routing */ for (i = 0;i < numNOC; i++) { if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) { nocs[i]->init_link_bus(sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage));//compute global links if (procdynp.homoNOC) { noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() * nocs[i]->nocdynp.total_nodes * procdynp.numNOC); area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() * nocs[i]->nocdynp.total_nodes * procdynp.numNOC); } else { noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() * nocs[i]->nocdynp.total_nodes); area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() * nocs[i]->nocdynp.total_nodes); } } } //Compute energy of NoC (w or w/o links) or buses for (i = 0;i < numNOC; i++) { nocs[i]->computeEnergy(); nocs[i]->computeEnergy(false); if (procdynp.homoNOC){ set_pppm(pppm_t,procdynp.numNOC*nocs[i]->nocdynp.clockRate, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC); noc.power = noc.power + nocs[i]->power*pppm_t; set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC); noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t; power = power + noc.power; rt_power = rt_power + noc.rt_power; } else { set_pppm(pppm_t,nocs[i]->nocdynp.clockRate, 1, 1, 1); noc.power = noc.power + nocs[i]->power*pppm_t; power = power + nocs[i]->power*pppm_t; set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, 1, 1, 1); noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t; rt_power = rt_power + nocs[i]->rt_power*pppm_t; } } } // //clock power // globalClock.init_wire_external(is_default, &interface_ip); // globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 // globalClock.end_wiring_level =5;//toplevel metal // globalClock.start_wiring_level =5;//toplevel metal // globalClock.l_ip.with_clock_grid=false;//global clock does not drive local final nodes // globalClock.optimize_wire(); } void Processor::displayDeviceType(int device_type_, uint32_t indent) { string indent_str(indent, ' '); switch ( device_type_ ) { case 0 : cout <sys.longer_channel_device; bool power_gating = XML->sys.power_gating; string indent_str(indent, ' '); string indent_str_next(indent+2, ' '); if (is_tdp) { if (plevel<5) { cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR << " of " << VER_UPDATE << ") results (current print level is "<< plevel <<", please increase print level to see the details in components): "<sys.core_tech_node<<" nm"<sys.core[0].clock_rate<0){ cout <sys.number_of_cores << " cores "<sys.device_type,indent); cout << indent_str_next << "Area = " << core.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? core.power.readOp.longer_channel_leakage:core.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? core.power.readOp.power_gated_with_long_channel_leakage : core.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" << endl; cout <sys.Private_L2) { if (numL2 >0){ cout <sys.L2[0].device_type,indent); cout << indent_str_next << "Area = " << l2.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? l2.power.readOp.longer_channel_leakage:l2.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? l2.power.readOp.power_gated_with_long_channel_leakage : l2.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" << endl; cout <0){ cout <sys.L3[0].device_type, indent); cout << indent_str_next << "Area = " << l3.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? l3.power.readOp.longer_channel_leakage:l3.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? l3.power.readOp.power_gated_with_long_channel_leakage : l3.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" << endl; cout <0){ cout <sys.L1Directory[0].device_type, indent); cout << indent_str_next << "Area = " << l1dir.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? l1dir.power.readOp.longer_channel_leakage:l1dir.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? l1dir.power.readOp.power_gated_with_long_channel_leakage : l1dir.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" << endl; cout <0){ cout <sys.L1Directory[0].device_type, indent); cout << indent_str_next << "Area = " << l2dir.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? l2dir.power.readOp.longer_channel_leakage:l2dir.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? l2dir.power.readOp.power_gated_with_long_channel_leakage : l2dir.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" << endl; cout <0){ cout <sys.device_type, indent); cout << indent_str_next << "Area = " << noc.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? noc.power.readOp.longer_channel_leakage:noc.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? noc.power.readOp.power_gated_with_long_channel_leakage : noc.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" << endl; cout <sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) { cout <sys.mc.number_mcs << " Memory Controllers "<sys.device_type, indent); cout << indent_str_next << "Area = " << mcs.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? mcs.power.readOp.longer_channel_leakage:mcs.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? mcs.power.readOp.power_gated_with_long_channel_leakage : mcs.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" << endl; cout <sys.flashc.number_mcs >0) { cout <fcp.num_mcs << " Flash/SSD Controllers "<sys.device_type, indent); cout << indent_str_next << "Area = " << flashcontrollers.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? flashcontrollers.power.readOp.longer_channel_leakage:flashcontrollers.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? flashcontrollers.power.readOp.power_gated_with_long_channel_leakage : flashcontrollers.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic << " W" << endl; cout <sys.niu.number_units >0 ) { cout <niup.num_units << " Network Interface Units "<sys.device_type, indent); cout << indent_str_next << "Area = " << nius.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? nius.power.readOp.longer_channel_leakage:nius.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? nius.power.readOp.power_gated_with_long_channel_leakage : nius.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" << endl; cout <sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0) { cout <pciep.num_units << " PCIe Controllers "<sys.device_type, indent); cout << indent_str_next << "Area = " << pcies.area.get_area()*1e-6<< " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel? pcies.power.readOp.longer_channel_leakage:pcies.power.readOp.leakage) <<" W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel? pcies.power.readOp.power_gated_with_long_channel_leakage : pcies.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" << endl; cout <1) { for (i = 0;i < numCore; i++) { cores[i]->displayEnergy(indent+4,plevel,is_tdp); cout <<"*****************************************************************************************"<sys.Private_L2) { for (i = 0;i < numL2; i++) { l2array[i]->displayEnergy(indent+4,is_tdp); cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); cout <<"*****************************************************************************************"<sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) { mc->displayEnergy(indent+4,is_tdp); cout <<"*****************************************************************************************"<sys.flashc.number_mcs >0 && XML->sys.flashc.memory_channels_per_mc>0) { flashcontroller->displayEnergy(indent+4,is_tdp); cout <<"*****************************************************************************************"<sys.niu.number_units >0 ) { niu->displayEnergy(indent+4,is_tdp); cout <<"*****************************************************************************************"<sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0) { pcie->displayEnergy(indent+4,is_tdp); cout <<"*****************************************************************************************"<displayEnergy(indent+4,plevel,is_tdp); cout <<"*****************************************************************************************"<sys.homogeneous_cores); procdynp.homoL2 = bool(debug?1:XML->sys.homogeneous_L2s); procdynp.homoL3 = bool(debug?1:XML->sys.homogeneous_L3s); procdynp.homoNOC = bool(debug?1:XML->sys.homogeneous_NoCs); procdynp.homoL1Dir = bool(debug?1:XML->sys.homogeneous_L1Directories); procdynp.homoL2Dir = bool(debug?1:XML->sys.homogeneous_L2Directories); procdynp.numCore = XML->sys.number_of_cores; procdynp.numL2 = XML->sys.number_of_L2s; procdynp.numL3 = XML->sys.number_of_L3s; procdynp.numNOC = XML->sys.number_of_NoCs; procdynp.numL1Dir = XML->sys.number_of_L1Directories; procdynp.numL2Dir = XML->sys.number_of_L2Directories; procdynp.numMC = XML->sys.mc.number_mcs; procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc; // if (procdynp.numCore<1) // { // cout<<" The target processor should at least have one core on chip." <2) // { // cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global and local NOCs)"<sys.device_type; interface_ip.data_arr_peri_global_tech_type = debug?0:XML->sys.device_type; interface_ip.tag_arr_ram_cell_tech_type = debug?0:XML->sys.device_type; interface_ip.tag_arr_peri_global_tech_type = debug?0:XML->sys.device_type; interface_ip.specific_hp_vdd = false; interface_ip.specific_lop_vdd = false; interface_ip.specific_lstp_vdd = false; interface_ip.specific_vcc_min = false; interface_ip.ic_proj_type = debug?0:XML->sys.interconnect_projection_type; interface_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied. interface_ip.area_wt = 0;//Fixed number, This is used to exhaustive search for individual components. interface_ip.dynamic_power_wt = 100;//Fixed number, This is used to exhaustive search for individual components. interface_ip.leakage_power_wt = 0; interface_ip.cycle_time_wt = 0; interface_ip.delay_dev = 10000;//Fixed number, make sure timing can be satisfied. interface_ip.area_dev = 10000;//Fixed number, This is used to exhaustive search for individual components. interface_ip.dynamic_power_dev = 10000;//Fixed number, This is used to exhaustive search for individual components. interface_ip.leakage_power_dev = 10000; interface_ip.cycle_time_dev = 10000; interface_ip.ed = 2; interface_ip.burst_len = 1;//parameters are fixed for processor section, since memory is processed separately interface_ip.int_prefetch_w = 1; interface_ip.page_sz_bits = 0; interface_ip.temp = debug?360: XML->sys.temperature; interface_ip.F_sz_nm = debug?90:XML->sys.core_tech_node;//XML->sys.core_tech_node; interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000; //***********This section of code does not have real meaning, they are just to ensure all data will have initial value to prevent errors. //They will be overridden during each components initialization interface_ip.cache_sz =64; interface_ip.line_sz = 1; interface_ip.assoc = 1; interface_ip.nbanks = 1; interface_ip.out_w = interface_ip.line_sz*8; interface_ip.specific_tag = 1; interface_ip.tag_w = 64; interface_ip.access_mode = 2; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.is_main_mem = false; interface_ip.rpters_in_htree = true ; interface_ip.ver_htree_wires_over_array = 0; interface_ip.broadcast_addr_din_over_ver_htrees = 0; interface_ip.num_rw_ports = 1; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 1; interface_ip.nuca = 0; interface_ip.nuca_bank_count = 0; interface_ip.is_cache =true; interface_ip.pure_ram =false; interface_ip.pure_cam =false; interface_ip.force_cache_config =false; interface_ip.power_gating =XML->sys.power_gating; if (XML->sys.Embedded) { interface_ip.wt =Global_30; interface_ip.wire_is_mat_type = 0; interface_ip.wire_os_mat_type = 0; } else { interface_ip.wt =Global; interface_ip.wire_is_mat_type = 2; interface_ip.wire_os_mat_type = 2; } interface_ip.force_wiretype = false; interface_ip.print_detail = 1; interface_ip.add_ecc_b_ =true; } Processor::~Processor(){ while (!cores.empty()) { delete cores.back(); cores.pop_back(); } while (!l2array.empty()) { delete l2array.back(); l2array.pop_back(); } while (!l3array.empty()) { delete l3array.back(); l3array.pop_back(); } while (!nocs.empty()) { delete nocs.back(); nocs.pop_back(); } while (!l1dirarray.empty()) { delete l1dirarray.back(); l1dirarray.pop_back(); } while (!l2dirarray.empty()) { delete l2dirarray.back(); l2dirarray.pop_back(); } if (mc) { delete mc; mc=0; } if (niu) { delete niu; niu =0; } if (pcie) { delete pcie; pcie=0; } if (flashcontroller) { delete flashcontroller; flashcontroller = 0; } };