#BEGIN_LEGAL # #Copyright (c) 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # #END_LEGAL from enum import Enum, auto from typing import Dict, List import math import copy import collections import genutil import ildutil import ild_info import opnds import ild_phash import ild_codegen import ild_eosz import ild_easz import ild_nt import ild import actions_codegen import actions import verbosity import tup2int import operand_storage # _token_2_module is for fields that might be modified in the pattern. # if fields are modified in the pattern by some NT, then we must be # consistent across buckets (legacy/vex/evex, opcode,map). This is # only relevant for fields that would be used in the "dynamic decode # part 1". # EASZ is only rarely modified in experimental extensions. _token_2_module = {'EOSZ':ild_eosz, 'EASZ':ild_easz} _find_fn_pfx = 'xed3_phash_find' def _log(f,s): if verbosity.vild(): f.write(s) def _set_state_space_from_ii(agi, ii, state_space): """ state_space is a 2D dictionary, ii is generator.instruction_info_t this functions sets: state_space[OPNAME][OPVAL] = True for every operand decider or prebinding with name OPNAME and value OPVAL legal for the given ii. """ for bt in ii.ipattern.bits: if bt.is_operand_decider(): if bt.test == 'eq': state_space[bt.token][bt.requirement] = True #look at prebindings too #for things like ZEROING that don't have all possible #values mentioned in patterns for (name, binding) in list(ii.prebindings.items()): bitnum = len(binding.bit_info_list) #dirty hack: we don't want big prebindings to explode #our dictionaries #FIXME: this assumes that all constraints used for #pattern dispatching (all constraints explicitly mentioned #in patterns) have bit widths up to 3 bits. #This is true now, but might change later. Should put an #assertion somewhere. #Also better to use a genutil.max_constraint_bitwidth than #3. if bitnum < 4: if not name in state_space: state_space[name] = {} for val in range(0, 2**bitnum): state_space[name][val] = True elif binding.is_constant(): val = int(binding.get_value(), 2) state_space[name][val] = True def _set_space_from_operands(agi, operands, state_space): state_dict = agi.common.state_bits for op in operands: ops = [] #if binding operand is a macro if op.name.lower() in state_dict: op_spec = state_dict[op.name.lower()].list_of_str found_op = False for w in op_spec: exapnded_op = opnds.parse_one_operand(w) ops.append(exapnded_op) else: ops.append(op) for op in ops: if (op.bits and op.name in state_space and op.type == 'imm_const'): op_val = int(op.bits, 16) state_space[op.name][op_val] = True def get_all_constraints_state_space(agi): """Returns a 2D dictionary state_space. state_space[OPNAME][OPVAL] == True if there is an operand with name OPNAME and value OPVAL. The dictionary contains all legal values for operands in grammar. Only operands that appear as operand deciders, prebindings, or instruction operands are added to the returned dictionary. """ state_space = collections.defaultdict(dict) for g in agi.generator_list: for ii in g.parser_output.instructions: _set_state_space_from_ii(agi, ii, state_space) #set state_space from operands #These are NTs partition tables right parts for g in agi.generator_list: ii = g.parser_output.instructions[0] if genutil.field_check(ii,'iclass'): continue #only real NTs, not instructions for ii in g.parser_output.instructions: _set_space_from_operands(agi, ii.operands, state_space) # in some configurations xed can be build without any AVX # instructions, in this case the operand VEXVALID will no be added. # the ild relies on this operand so we add it manually if 'VEXVALID' not in state_space: state_space['VEXVALID'][0] = True else: # AVX/EVEX builds... # 2014-10-10: when I got rid of the NTs for decoding the # VEX/EVEX/XOP prefixes, I ended up losing the only NTs that # mention ZEROING=1 and VLBAD (VL=3). So we add them here. # They are required for proper splattering of don't care # cases. in the hash function generation. For example when, # EVEX.RC is rounding control and co-opting the EVEX.LL field, # we need to have the value of VL=3 because it is not # "corrected" when we are still picking an instruction (aka # 'static decode'). state_space['ZEROING'][1] = True state_space['VL'][3] = True return state_space def get_state_op_widths(agi, state_space): """ Returns a dictionary from operand name to operands bit width """ widths_dict = {} for opname,val_dict in list(state_space.items()): if opname in agi.operand_storage.get_operands(): opnd = agi.operand_storage.get_operand(opname) widths_dict[opname] = int(opnd.bitwidth) continue maxval = max(val_dict.keys()) if maxval == 0: #log doesn't work on 0 so well width = 1 else: width = int(math.floor(math.log(maxval, 2))) + 1 widths_dict[opname] = width #Special, "compressed" operands #FIXME: we can add these special operands widths in grammar widths_dict[_bin_MOD3] = 1 widths_dict[_vd_token_7_ones] = 1 widths_dict[_vd_token_1f_ones] = 1 widths_dict[_rm_token_4] = 1 widths_dict[_mask_token_n0] = 1 widths_dict[_mask_token_zero] = 1 #constraints on uimm0 operands are 8 bits width max widths_dict['UIMM0'] = 8 return widths_dict #Following functions are for operands compressing _bin_MOD3 = 'MOD3' def _is_binary_MOD3(ptrn_list): mod3_eq = 'MOD=3' mod3_neq = 'MOD!=3' for ptrn in ptrn_list: if not (mod3_eq in ptrn.ptrn or mod3_neq in ptrn.ptrn): return False return True def _replace_MOD_with_MOD3(cnames, ptrn_list): cnames.remove('MOD') cnames.add(_bin_MOD3) for ptrn in ptrn_list: if 'MOD=3' in ptrn.ptrn: ptrn.constraints[_bin_MOD3] = {1: True} else: ptrn.constraints[_bin_MOD3] = {0: True} _vd_token = 'VEXDEST' _vd_token210 = f'{_vd_token}210' _vd_token3 = f'{_vd_token}3' _vd_token4 = f'{_vd_token}4' _vd_token_7_ones = f'{_vd_token}210_7' _vd_token_1f_ones = f'{_vd_token}_1F' def _has_VEXDEST210_equals_7_restriction(cnames, ptrn_list): """Return true if some pattern in the list of input pattern_t's has a VVVV=1111 restriction. If that occurs, we can replace all VEXDEST210 restricions since there are no other kinds of VVVV restrictions. """ if _vd_token210 not in cnames: return False for ptrn in ptrn_list: if _vd_token_7_ones in ptrn.constraints: genutil.die("XXX VEXDEST210_7 already exists in the patterns.") for ptrn in ptrn_list: if _vd_token210 in ptrn.constraints: cvals = ptrn.constraints[_vd_token210] if len(cvals) == 1 and 7 in cvals: return True return False def _replace_VEXDEST210_with_VD2107(cnames, ptrn_list): ''' Replace VEXDEST210 with all-ones optimized constraint ''' cnames.remove(_vd_token210) cnames.add(_vd_token_7_ones) for ptrn in ptrn_list: found = False for bt in ptrn.ii.ipattern.bits: if bt.token == _vd_token210: if bt.test == 'eq': found = True ptrn.constraints[_vd_token_7_ones] = {1:True} break if not found: #vd7==0 says any VD ptrn.constraints[_vd_token_7_ones] = {0:True, 1:True} # Bit ENUM representation class bit(Enum): ONE = auto() ZERO = auto() DCARE = auto() # Don't-care def get_constraint_bit(constraints : Dict[str, dict], op : str, all_ops_widths: dict ) -> bit: """ Represent XED operand constraint as a bit, if possible. Args: constraints (Dict[str, dict]): All pattern constraints op (str): Convert this operand constraints into bit representation all_ops_widths (dict): dict of operands width Returns: bit Enum: Bit representation of "op" pattern constraint. None if operand's constraint is not convertible to bit. """ if all_ops_widths[op] == 1: # One bit width operand if op not in constraints: # No operand constrains means don't-care return bit.DCARE constr : Dict[int, bool] = constraints[op] if constr == {1:True}: return bit.ONE elif constr == {0:True}: return bit.ZERO elif constr == {0:True, 1:True}: # Explicitly don't-care return bit.DCARE return None def _has_VEXDEST_equals_1F_restriction(cnames : set, ptrn_list : 'List[ild.pattern_t]', all_ops_widths : dict): """Return true if all patterns includes VVVVV=1F or VVVVV=any restriction. In this case, we can replace all VEXDEST210, VEXDEST3, VEXDEST4 restrictions with one check since there are no other kinds of VVVVV restrictions. Assumption - This check is executed only after a completed VEXDEST210_7 optimization """ if not set([_vd_token_7_ones, _vd_token3, _vd_token4]).issubset(cnames): return False for ptrn in ptrn_list: if _vd_token_1f_ones in ptrn.constraints: genutil.die(f"XXX {_vd_token_1f_ones} already exists in the patterns.") # Treat VEXDEST[0-2] a one bit, see VEXDEST210_7 assumption above bit210 = get_constraint_bit(ptrn.constraints, _vd_token_7_ones, all_ops_widths) bit3 = get_constraint_bit(ptrn.constraints, _vd_token3, all_ops_widths) bit4 = get_constraint_bit(ptrn.constraints, _vd_token4, all_ops_widths) # xed stores the inverted VEXDEST[4] bit value. store the encoding bit value: if bit4 == bit.ONE: bit4 = bit.ZERO if bit4 == bit.ZERO: bit4 = bit.ONE vd_bits = [bit210, bit3, bit4] if all(b == bit.ONE for b in vd_bits): # Trivial VVVVV=11111 constraint continue elif all(b == bit.DCARE for b in vd_bits): # Trivial VVVVV=any (Don't-Care) constraint continue else: # Found non-trivial constraint (VVVVV not in [0x1F, don't-care]) return False # At this point, all patterns restrictions are VVVVV trivial (all_ones or don't-care) return True def _replace_VEXDEST_with_VD1F(cnames : set, ptrn_list : 'List[ild.pattern_t]'): """ Replace VEXDEST210, VEXDEST3, VEXDEST4 constrains with VEXDEST_1F """ cnames.remove(_vd_token_7_ones) cnames.remove(_vd_token3) cnames.remove(_vd_token4) cnames.add(_vd_token_1f_ones) for ptrn in ptrn_list: found = dict.fromkeys([_vd_token4, _vd_token3, _vd_token_7_ones], False) if ptrn.constraints[_vd_token_7_ones] == {1:True}: found[_vd_token_7_ones] = True for bt in ptrn.ii.ipattern.bits: if bt.token == _vd_token3: if bt.test == 'eq': found[_vd_token3] = True if bt.token == _vd_token3: if bt.test == 'eq': found[_vd_token4] = True if all(found.values()): # VVVVV=11111 ptrn.constraints[_vd_token_1f_ones] = {1:True} elif any(found.values()): # non-trivial VVVV restriction error = f'XXX VEXDEST210_1F Found unexpected VVVVV restriction, should not reach here:\n' error += f'VEXDEST210_1F={found}' genutil.die(error) else: # VVVVV=any (don't-care) ptrn.constraints[_vd_token_1f_ones] = {1:True, 0:True} _rm_token = 'RM' _rm_token_4 = 'RM4' def _is_binary_RM_4(cnames, ptrn_list): # ptrn_list is a list of ild.pattern_t. Returns True if all # patterns have just RM=4 as a constraint. if _rm_token not in cnames: return False for ptrn in ptrn_list: # not all patterns have the _rm_token and querying the # default-dictionary can add it. so we check first... if _rm_token in ptrn.constraints: cvals = ptrn.constraints[_rm_token] if len(cvals) != 1: # more than one constraint value return False elif 4 not in cvals: # one constraint value but it is not 4. return False else: # some pattern does not have and RM constraint return False # all have one constraint of RM=4. return True def _replace_RM_with_RM4(cnames, ptrn_list): # When we call this we know that all the patterns in the pattern # list have RM=4 either from (a) RM[0b100] or (b) RM=4 # constraints. The RM[0b100] is captured in prebindings (and the # constraints, which is how we got here) and 3 raw 1, 0, 0 bits # are present in the pattern. This function does remove the RM # from the cnames, but it doesn't really do anything to the # ipattern. So there is no need to search ipattern again # ptrn_list is a list of ild.pattern_t cnames.remove(_rm_token) cnames.add(_rm_token_4) for ptrn in ptrn_list: #print("B-ICLASS: {}".format(ptrn.ii.iclass)) ptrn.constraints[_rm_token_4] = {1:True} _mask_token = 'MASK' _mask_token_n0 = 'MASK_NOT0' _mask_token_zero = 'MASK_ZERO' def _is_binary_MASK_NOT0(cnames, ptrn_list): if _mask_token not in cnames: return False for ptrn in ptrn_list: # check before indexing to avoid creating entry in # default-dict. if _mask_token in ptrn.constraints: cvals = ptrn.constraints[_mask_token] if len(cvals)!=7: return False elif 0 in cvals: # have 7 values but 0 is there so no good return False else: # some pattern does not have mask constraint return False return True def _has_MASK_ZERO_restriction(cnames, ptrn_list): """Return true if all pattern_t in the list has MASK=000 or MASK=any restriction.""" if _mask_token not in cnames: return False for ptrn in ptrn_list: if _mask_token in ptrn.constraints: cvals : Dict[int, bool] = ptrn.constraints[_mask_token] if cvals == {0:True}: for bt in ptrn.ii.ipattern.bits: if bt.token == _mask_token: if bt.test != 'eq': return False # MASK!=000. Optimization is only for MASK=000 elif len(cvals.keys())==8 and all(cvals.values()): continue # MASK=Any else: return False return True def _replace_MASK_with_MASK_NOT0(cnames, ptrn_list): cnames.remove(_mask_token) cnames.add(_mask_token_n0) for ptrn in ptrn_list: found = False for bt in ptrn.ii.ipattern.bits: if bt.token == _mask_token: if bt.test == 'ne': found = True ptrn.constraints[_mask_token_n0] = {1:True} break if not found: #mask is not in the pattern, all values of MASK_NOT0 are valid ptrn.constraints[_mask_token_n0] = {0:True, 1:True} def _replace_MASK_with_MASK_ZERO(cnames, ptrn_list): cnames.remove(_mask_token) cnames.add(_mask_token_zero) for ptrn in ptrn_list: if _mask_token in ptrn.constraints and ptrn.constraints[_mask_token] == {0:True}: ptrn.constraints[_mask_token_zero] = {1:True} else: # all values of MASK_ZERO are valid ptrn.constraints[_mask_token_zero] = {0:True, 1:True} def get_compressed_op_getter_fn(opname): """ Compressed operands are special - we do not capture them in ILD and do not derive them in NTs. (though we could.. FIXME: is it worthy?), hence in order to get their value we can not use regular xed3_operand_get_* function - we use special getters for them. get_compressed_op_getter_fn(opname) returns a name of the special getter for a given compressed operand name. FIXME: right now we just use the same operand naming scheme as for regular operands. Do we need this function? """ return operand_storage.get_op_getter_fn(opname) mod3_repl=0 vd7_repl=0 vd1f_repl=0 rm4_repl=0 masknot0_repl=0 mask0_repl=0 def replacement_stats(): s = [] for x,y in [('MOD3', mod3_repl), ('VD7', vd7_repl), ('VD1F', vd1f_repl), ('RM4', rm4_repl), ('MASK!=0', masknot0_repl), ('MASK=0',mask0_repl)]: s.append('{} {}'.format(x,y)) return ', '.join(s) def _get_united_cdict(ptrn_list : list, state_space : Dict[str, Dict[int,bool]], vexvalid : str, all_ops_widths : dict): """@param ptrn_list: list of ild.pattern_t @param state_space: all legal values for xed operands: state_space['REXW'][1] = True, state_space['REXW'][0]=True @param vexvalid: VEXVALID value we want to filter by. vevxavlid=='0' will include only patterns with vexvalid=='0' constraint value. @param all_ops_widths: dict of operands to their bit widths. @return ild_cdict.constrant_dict_t which unites patterns constraint dicts This gets called with all the patterns for a specific map & opcode, but for all encoding spaces. So first we filter based on encoding space (vexvalid). """ global mod3_repl, vd7_repl, vd1f_repl, rm4_repl, masknot0_repl, mask0_repl cnames = [] # FIXME: 2019-10-30: patterns now know their vexvalid value and # encspace, and the maps are split by encspace as well, so we can # avoid the following filtering by vexvalid. #filter by encoding space (vexvalid) ptrns : List[ild.pattern_t] = [] ivv = int(vexvalid) for ptrn in ptrn_list: #FIXME: 2019-10-30: if vexvalid in list(ptrn.special_constraints['VEXVALID'].keys()): if ivv == ptrn.vv: ptrns.append(ptrn) if len(ptrns) == 0: return None for ptrn in ptrns: cnames.extend(list(ptrn.constraints.keys())) cnames = set(cnames) if _is_binary_MOD3(ptrns): mod3_repl += 1 _replace_MOD_with_MOD3(cnames, ptrns) if _has_VEXDEST210_equals_7_restriction(cnames, ptrns): vd7_repl += 1 _replace_VEXDEST210_with_VD2107(cnames, ptrns) if _has_VEXDEST_equals_1F_restriction(cnames, ptrns, all_ops_widths): vd7_repl -= 1 vd1f_repl += 1 _replace_VEXDEST_with_VD1F(cnames, ptrns) if _is_binary_RM_4(cnames, ptrns): rm4_repl += 1 _replace_RM_with_RM4(cnames, ptrns) # Hint for disabled optimization (Replaced with MASKNOT0()) # if _is_binary_MASK_NOT0(cnames, ptrns): # masknot0_repl += 1 # _replace_MASK_with_MASK_NOT0(cnames, ptrns) if _has_MASK_ZERO_restriction(cnames, ptrns): mask0_repl += 1 _replace_MASK_with_MASK_ZERO(cnames, ptrns) # For each pattern we have a list of constraints. ptrn.constraints # is the legal values for those constraints. In each map opcode # bin, we have several patterns with different constraints. We # want to make one hash table for these different patterns. Thats # why we want to take the union of all the constraints and make # one dictionary (and ultimately a hash table). Need to add all # legal variations of all constraints, cross product. (dangerous) #For example if we have two patterns: #PATTERN1: MOD=1 #PATTERN2: REG=2 #then for PATTERN1 we will create a constraint dictionary with all #combinations (MOD=1 REG=0), (MOD=1, REG=1) ,..., (MOD=1, REG=7) #and for PATTERN2 we will have (MOD=0 REG=2), (MOD=1 REG=2), ... cdicts = [] for ptrn in ptrns: cdict = constraint_dict_t(cnames, ptrn.constraints, state_space, ptrn) cdicts.append(cdict) insn_map = ptrns[0].insn_map opcode = ptrns[0].opcode msg = [] msg.append("cdict conflict in pattern") msg.append('MAP:%s OPCODE:%s\n' % (insn_map, opcode)) msg = "\n".join(msg) # now we unite (cross-product) after exploding/back-filling all the # constraints. All patterns now have same constraints. united_dict = constraint_dict_t.unite_dicts(cdicts, msg, cnames) #generate the int value for each tuple united_dict.create_tuple2int(all_ops_widths) #print "UNITED DICT: VV {} OPCODE {} MAP {}: tuples {}".format( # vexvalid, opcode, insn_map, len(united_dict.tuple2rule) ) #creating the default action that will be taken when we did not hit #a valid hash entry default_action = [actions.gen_return_action('0')] united_dict.action_codegen = actions_codegen.actions_codegen_t( united_dict.tuple2rule, default_action, united_dict.strings_dict) return united_dict #FIXME: maybe it should contain tuple2int function? #Now tuple2int is a part of phash object. class constraint_dict_t(object): def __init__(self, cnames=None, state_space=None, all_state_space=None, rule=None): """cnames is sorted list of constraint names. state_space is the constraints from the pattern_t. all_state_space is a dict w/legal values for all constraints in grammar. rule is the ild.py pattern_t object (essentially the instruction). """ #cnames is sorted list of strings - constraints' names that we want #this cdict to have. if cnames: self.cnames = sorted(list(cnames)) else: self.cnames = [] self.strings_dict = ild_codegen._dec_strings #state_space is a dict with constraints' values we want #this cdict to represent. #For example if we want cdict to allow only MODE=0 we will #have state_space['MODE'][0] = True if state_space: self.state_space = state_space else: self.state_space = {} #all_state_space is a dict with all legal values that constraints #have in grammar. #For example: #all_state_space['REXW'][0]=True, all_state_space['REXW'][1]=True #It is used when state_space doesn't have a constraint from cnames. #We need this when we build united constraint dict for a set of #patterns: #first we build a separate constraint dict for each pattern, but #it includes all the cnames that set has, and then we unite those #cdicts. See _get_united_cdict() function if all_state_space: self.all_state_space = all_state_space else: self.all_state_space = {} # this is the ild.py:pattern_t self.rule = rule #tuple2int maps the same tuples as tuple2int to hash key values. self.tuple2int = {} #reverse mapping from hash key to list of constraint value tuples. self.int2tuple = {} #dict of all operands -> bit width. self.op_widths = {} self.action_codegen = None #dict mapping tuples to rules. #tuples are the constraint values (without the constraint names). self.tuple2rule = {} # ild.pattern_t if self.state_space: self.tuple2rule = self._initialize_tuple2rule(self.cnames, {}) @staticmethod def unite_dicts(dict_list, err_msg, cnstr_names): """ dict_list is a list of constraint dictionaries. The keys in the dictionary are the values of the constraints as tuples. If we see the same values in more than one pattern, we have a decoding conflict in the grammar. The dictionaries have been expanded so that they all have the same constraint names upon entry. """ dlen = len(dict_list) if dlen == 0: return None if dlen == 1: return dict_list[0] res = constraint_dict_t(cnames=cnstr_names) for cdict in dict_list: for key in cdict.tuple2rule.keys(): if key in res.tuple2rule: # keys are tuples of constraint values msg = [] msg.append("key: %s" % (key,)) msg.append("cdict:%s" % cdict) msg.append("res:%s" % res) msg = "\n".join(msg) ildutil.ild_err(err_msg + msg) return None else: res.tuple2rule[key] = cdict.tuple2rule[key] return res def _initialize_tuple2rule(self, cnames, tuple2rule): # recursive """look @ first cnames, get the possible values for it. Add an entry to the current tuple2rule dictionary with each possible value and the current rule (which is the ild.py pattern_t, pointed to by the set of constraints.). Each time we recursively call, we take the next constraint and add its values to the key tuple for the dictionary. """ if len(cnames) == 0: # when we are out of constraints, we return the fully # constructed tuple. return tuple2rule # pick off the cnames one at a time name = cnames[0] if name in self.state_space: vals = sorted(self.state_space[name].keys()) else: vals = sorted(self.all_state_space[name].keys()) if len(tuple2rule) == 0: #initialize tuple2rule with singleton tuples for val in vals: tuple2rule[(val,)] = self.rule return self._initialize_tuple2rule(cnames[1:], tuple2rule) new_tuple2rule = {} for key_tuple in tuple2rule.keys(): for val in vals: new_key = key_tuple + (val,) new_tuple2rule[new_key] = self.rule return self._initialize_tuple2rule(cnames[1:], new_tuple2rule) def get_all_keys_by_val(self, val): return [k for k,v in self.tuple2rule.items() if v == val] def create_tuple2int(self, all_ops_widths): '''create the mapping of tuple to its int value by CONCATENTATING all the input constraint values to make an integer that is ultimately the input to the hash function. ''' tuple2int = {} int2tuple = {} for t in self.tuple2rule.keys(): res = tup2int.tuple2int(t, self.cnames, all_ops_widths) if res in int2tuple: err = "the tuple % and the tuple %s generate the same value:%d" genutil.die(err % (t,str(int2tuple[res]),res)) else: tuple2int[t] = res int2tuple[res] = t #later using the op_widths for the code generation self.op_widths = all_ops_widths self.tuple2int = tuple2int self.int2tuple = int2tuple def get_ptrn(self, tup): ''' return the pattern that represents the given tuple ''' return self.tuple2rule[tup].ptrn def filter_tuples(self,tuples): '''from all the dictionaries in self, remove the tuples that are not in the input tuples list. return new instance of cdict. This is used for re-grouping tuples when making the 2nd level of 2-level hash tables. ''' new_cdict = copy.copy(self) new_cdict.tuple2int = {} new_cdict.tuple2rule = {} for t in tuples: new_cdict.tuple2int[t] = self.tuple2int[t] new_cdict.tuple2rule[t] = self.tuple2rule[t] new_cdict.int2tuple = dict((i,t) for t,i in new_cdict.tuple2int.items()) return new_cdict def get_operand_accessor(self, cname): ''' return a tuple of the operand accessor function and the constraint names that it represents ''' ptrn_list = list(self.tuple2rule.values()) if cname in list(_token_2_module.keys()): nt_module = _token_2_module[cname] # name of python module! getter_fn = nt_module.get_getter_fn(ptrn_list) # indirect module refs! if not getter_fn: # -> error msg = 'Failed to resolve %s getter fn for ' msg += 'MAP:%s OPCODE:%s' insn_map = ptrn_list[0].insn_map opcode = ptrn_list[0].opcode ildutil.ild_err(msg % (cname, insn_map, opcode)) access_str = '%s(%s)' % (getter_fn, self.strings_dict['obj_str']) nt = ild_nt.get_nt_from_lufname(getter_fn) return access_str, nt else: access_str = ild_codegen.emit_ild_access_call(cname, self.strings_dict['obj_str']) return access_str, cname def __str__(self): rows = [] size = len(self.tuple2rule) rows.append("cdict_size=%d" % size) if size >= 100: rows.append('HUGE!') elif size >= 50: rows.append('BIG!') legend = "{} -> VALUE".format(" ".join(self.cnames)) rows.append(legend) if len(self.tuple2rule) == 0: rows.append("_ \t-> %s" % self.rule) # print all the decision values first with iclass, if any for key in sorted(self.tuple2rule.keys()): val = self.tuple2rule[key] # ild.pattern_t s = [] for cname, tval in zip(self.cnames,key): s.append("{}:{}".format(cname,tval)) iclass = 'n/a' if hasattr(val,'iclass'): iclass = val.iclass rows.append("{} -> {}".format(", ".join(s), iclass)) rows.append("\n\n") # now print them again with their detailed information for key in sorted(self.tuple2rule.keys()): val = self.tuple2rule[key] # ild.pattern_t s = [] for cname, tval in zip(self.cnames,key): s.append("{}:{}".format(cname,tval)) rows.append("{} \t-> {}".format(", ".join(s), str(val))) return "\n".join(rows) def get_constraints_lu_table(agi, ptrns_by_map_opcode, state_space, vexvalid, all_ops_widths): """ returns a tuple (cdict_by_map_opcode,cnames) cnames is a set of all constraint names used in patterns. cdict_by_map_opcode is a traditional 2D lookup dict from map,opcode to constraint_dict_t objects that represent the mapping from constraints values to different patterns of the corresponding (map,opcode,vexvalid) bin. These cdict objects can later be used for generating hash functions from constraint values to patterns (inums). """ maps = ild_info.get_maps(agi) cdict_by_map_opcode = collections.defaultdict(dict) cnames = set() for insn_map in maps: for opcode in range(0, 256): opcode = hex(opcode) ptrns = ptrns_by_map_opcode[insn_map][opcode] cdict = _get_united_cdict(ptrns, state_space, vexvalid, all_ops_widths) cdict_by_map_opcode[insn_map][opcode] = cdict if cdict: cnames = cnames.union(set(cdict.cnames)) return cdict_by_map_opcode,cnames def gen_ph_fos(agi, cdict_by_map_opcode, log_fn, ptrn_dict, vv): """ Returns a tuple (phash_lu, phash_fo_list, op_lu_list) * phash_lu: is a traditional 2D dict by (map, opcode) to a hash function name. * phash_fo_list: is a list of all phash function objects created (we might have fos that are not in lookup table - when we have 2-level hash functions). * op_lu_list: is a list for all the operands lookup functions Also writes log file for debugging. """ maps = ild_info.get_maps(agi) log_f = open(log_fn, 'w') cnames = set() # only for logging stats = { '0. #map-opcodes': 0, '1. #entries': 0, '2. #hentries': 0, '3. #hashes': 0, '4. #min_hashes': 0, '5. #cdict_size_1_to_10': 0, '6. #cdict_size_10_to_20': 0, '7. #cdict_size_20_to_100': 0, '8. #cdict_size_at_least_100': 0 } lu_fo_list = [] op_lu_map = {} # fn name -> fn obj phash_lu = {} # map, opcode -> fn name for insn_map in maps: phash_lu[insn_map] = {} zeros = 0 for opcode in range(0, 256): opcode = hex(opcode) cdict = cdict_by_map_opcode[insn_map][opcode] if cdict: stats['0. #map-opcodes'] += 1 stats['1. #entries'] += len(cdict.tuple2rule) cnames = cnames.union(set(cdict.cnames)) _log(log_f,'XYZ VV: {} MAP:{} OPCODE:{}:\n{}\n'.format( vv, insn_map, opcode, cdict)) phash = ild_phash.gen_hash(cdict) if phash: _log(log_f,"%s" % phash) phash_id = 'map%s_opcode%s_vv%d' % (insn_map, opcode, vv) fname = "%s_%s" % (_find_fn_pfx,phash_id) (fo_list, op_lu_fo) = phash.gen_find_fos(fname) lu_fo_list.extend(fo_list) #hold only one instance of each function if op_lu_fo: if op_lu_fo.function_name not in op_lu_map: op_lu_map[op_lu_fo.function_name] = op_lu_fo for fo in fo_list: _log(log_f,'//find function:\n') _log(log_f,fo.emit()) _log(log_f,'-----------------------------\n') #FIXME: assumption: L2 function is last in the list #maybe return dict or tuple to make a distinction between #L2 and L1 functions? phlu_fn = lu_fo_list[-1] phash_lu[insn_map][opcode] = phlu_fn.function_name phash.update_stats(stats) else: _log(log_f,'---NOPHASH-----\n') msg = "Failed to gen phash for map %s opcode %s" ildutil.ild_err(msg % (insn_map, opcode)) else: phash_lu[insn_map][opcode] = '(xed3_find_func_t)0' zeros = zeros + 1 if zeros == 256: # all zero... shortcut to avoid scanning maps for "all-zeros" _log(log_f, "ZEROING phash_lu for map {} vv {}\n".format(insn_map, vv)) phash_lu[insn_map] = None _log(log_f,"cnames: %s\n" %cnames) for key in sorted(stats.keys()): _log(log_f,"%s %s\n" % (key,stats[key])) _log(log_f, f'Restriction replacements stats:\n {replacement_stats()}') log_f.close() return phash_lu, lu_fo_list, list(op_lu_map.values())