# Cache size //-size (bytes) 1024 //-size (bytes) 2048 //-size (bytes) 4096 //-size (bytes) 32768 //-size (bytes) 131072 //-size (bytes) 262144 -size (bytes) 1048576 //-size (bytes) 2097152 //-size (bytes) 4194304 //-size (bytes) 8388608 //-size (bytes) 16777216 //-size (bytes) 33554432 //-size (bytes) 134217728 //-size (bytes) 67108864 //-size (bytes) 1073741824 # power gating //-Array Power Gating - "true" //-WL Power Gating - "true" //-CL Power Gating - "true" //-Bitline floating - "true" //-Interconnect Power Gating - "true" -Power Gating - "true" -Power Gating Performance Loss 0.01 //-CLDriver vertical - "false" # Line size //-block size (bytes) 8 -block size (bytes) 64 # To model Fully Associative cache, set associativity to zero //-associativity 0 -associativity 2 //-associativity 4 //-associativity 8 //-associativity 8 -read-write port 1 -exclusive read port 0 -exclusive write port 0 -single ended read ports 0 -search port 1 # Multiple banks connected using a bus -UCA bank count 1 -technology (u) 0.022 //-technology (u) 0.040 //-technology (u) 0.032 //-technology (u) 0.090 # following three parameters are meaningful only for main memories -page size (bits) 8192 -burst length 8 -internal prefetch width 8 # following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) -Data array cell type - "itrs-hp" //-Data array cell type - "itrs-lstp" //-Data array cell type - "itrs-lop" # following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) -Data array peripheral type - "itrs-hp" //-Data array peripheral type - "itrs-lstp" //-Data array peripheral type - "itrs-lop" # following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) -Tag array cell type - "itrs-hp" //-Tag array cell type - "itrs-lstp" //-Tag array cell type - "itrs-lop" # following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) -Tag array peripheral type - "itrs-hp" //-Tag array peripheral type - "itrs-lstp" //-Tag array peripheral type - "itrs-lop # Customized VDD; Default means using ITRS defined value. -hp Vdd (V) "default" //-hp Vdd (V) 1.1 -lstp Vdd (V) "default" -lop Vdd (V) "default" # Dynamic Voltage Scaling, assuming the same voltages are applied to both tag and data array regardless the device types of each array -DVS(V): 0.8 1.1 1.3 1.4 1.5 # whether to use long (10% ) channel devices ("true" or "false", when "true" assuming 90% of the device (non time critical) can be long channel device) -Long channel devices - "true" # User defined voltage for Power-saving states, assuming the same sleep voltage for all components to simply user interface # While the default power-saving models have different sleep voltage all different component. # Default means using technology (ITRS based) lowest value for state-retaining power-gating; # Please understand the implications when setting up voltage for different sleep states, # for example, when deep sleep state is used (voltage lower than the technology allowed state retaining supply voltage), # the effects of losing data and cold start effects must be considered (beyond the scope of CACTI-P) when waking up the structure # Power-gating and DVS cannot happen at the same time. Because power-gating happens when circuit is idle, while DVS happens when circuit is active. //-Powergating voltage (V) "default" -Powergating voltage (V) 0 # Bus width include data bits and address bits required by the decoder //-output/input bus width 16 -output/input bus width 512 // 300-400 in steps of 10 -operating temperature (K) 360 # Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) # or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) -cache type "cache" //-cache type "ram" //-cache type "cam" //-cache type "main memory" # to model special structure like branch target buffers, directory, etc. # change the tag size parameter # if you want cacti to calculate the tagbits, set the tag size to "default" -tag size (b) "default" //-tag size (b) 22 # fast - data and tag access happen in parallel # sequential - data array is accessed after accessing the tag array # normal - data array lookup and tag access happen in parallel # final data block is broadcasted in data array h-tree # after getting the signal from the tag array //-access mode (normal, sequential, fast) - "fast" -access mode (normal, sequential, fast) - "normal" //-access mode (normal, sequential, fast) - "sequential" # DESIGN OBJECTIVE for UCA (or banks in NUCA) -design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 # Percentage deviation from the minimum value # Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization # that compromises at most 10% delay. # NOTE: Try reasonable values for % deviation. Inconsistent deviation # percentage values will not produce any valid organizations. For example, # 0:0:100:100:100 will try to identify an organization that has both # least delay and dynamic power. Since such an organization is not possible, CACTI will # throw an error. Refer CACTI-6 Technical report for more details -deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000 # Objective for NUCA -NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 -NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 # Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for # energy-delay or energy-delay sq. product # Note: Optimize tag will disable weight or deviate values mentioned above # Set it to NONE to let weight and deviate values determine the # appropriate cache configuration //-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" -Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" //-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" -Cache model (NUCA, UCA) - "UCA" //-Cache model (NUCA, UCA) - "NUCA" # In order for CACTI to find the optimal NUCA bank value the following # variable should be assigned 0. -NUCA bank count 0 # NOTE: for nuca network frequency is set to a default value of # 5GHz in time.c. CACTI automatically # calculates the maximum possible frequency and downgrades this value if necessary # By default CACTI considers both full-swing and low-swing # wires to find an optimal configuration. However, it is possible to # restrict the search space by changing the signalling from "default" to # "fullswing" or "lowswing" type. -Wire signalling (fullswing, lowswing, default) - "Global_30" //-Wire signalling (fullswing, lowswing, default) - "default" //-Wire signalling (fullswing, lowswing, default) - "lowswing" //-Wire inside mat - "global" -Wire inside mat - "semi-global" //-Wire outside mat - "global" -Wire outside mat - "semi-global" -Interconnect projection - "conservative" //-Interconnect projection - "aggressive" # Contention in network (which is a function of core count and cache level) is one of # the critical factor used for deciding the optimal bank count value # core count can be 4, 8, or 16 //-Core count 4 -Core count 8 //-Core count 16 -Cache level (L2/L3) - "L3" -Add ECC - "true" //-Print level (DETAILED, CONCISE) - "CONCISE" -Print level (DETAILED, CONCISE) - "DETAILED" # for debugging -Print input parameters - "true" //-Print input parameters - "false" # force CACTI to model the cache with the # following Ndbl, Ndwl, Nspd, Ndsam, # and Ndcm values //-Force cache config - "true" -Force cache config - "false" -Ndwl 1 -Ndbl 1 -Nspd 0 -Ndcm 1 -Ndsam1 0 -Ndsam2 0