#BEGIN_LEGAL # #Copyright (c) 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # #END_LEGAL import re import copy import os import collections import shutil import genutil import ildutil import mbuild import ild_info import ild_storage import ild_eosz import ild_easz import ild_imm import ild_disp import ild_modrm import codegen import ild_codegen import ild_cdict import dec_dyn import actions import verbosity op_bin_pattern = re.compile(r'[_10]{2,}$') op_hex_pattern = re.compile(r'[0-9a-f]{2}$', flags=re.IGNORECASE) reg_binding_pattern = re.compile(r'REG[\[](?P0b[01_]+)]') mod_eq_pattern = re.compile(r'MOD=(?P[0123]{1})') mod_neq_pattern = re.compile(r'MOD(!=|=!)(?P[0123]{1})') #debugdir is updated in init_debug #the module. debugdir = '.' #the debug file debug = None _mode_token = 'MODE' #just to know how many there are and how hard it is #42 nested NTs.. #mostly modrm-related def _get_nested_nts(agi): nested_nts = set() for nt_name in agi.nonterminal_dict.keys(): g = agi.generator_dict[nt_name] ii = g.parser_output.instructions[0] if genutil.field_check(ii,'iclass'): continue #only real NTs, not instructions for rule in g.parser_output.instructions: for bt in rule.ipattern.bits: if bt.is_nonterminal(): nested_nts.add(nt_name) for op in rule.operands: if op.type == 'nt_lookup_fn': nested_nts.add(nt_name) return nested_nts ########################################################################### ## < Interface for generator > ########################################################################### #needed to init the debug directory def init_debug(agi): global debug global debugdir debugdir = agi.common.options.gendir debug = open(mbuild.join(debugdir, 'ild_debug.txt'), 'w') def gen_xed3(agi, ild_info, ild_patterns, all_state_space, ild_gendir, all_ops_widths): all_cnames = set() ptrn_dict = {} # map,opcode -> pattern maps = ild_info.get_maps(agi) for insn_map in maps: ptrn_dict[insn_map] = collections.defaultdict(list) for ptrn in ild_patterns: ptrn_dict[ptrn.insn_map][ptrn.opcode].append(ptrn) #FIXME:bad name vv_lu = {} # vexvalid-space -> ( ph_lu, lu_fo_list) #mapping between a operands to their look up function op_lu_map = {} # func name -> function (for unique-ifying) for vv in sorted(all_state_space['VEXVALID'].keys()): #cdict is a 2D dictionary: #cdict[map][opcode] = ild_cdict.constraint_dict_t #Each constraint_dict_t describes all the patterns that fall #into corresponding map-opcode. #cnames is a set of all constraint names from the patterns #in the given vv space cdict,cnames = ild_cdict.get_constraints_lu_table(agi, ptrn_dict, all_state_space, vv, all_ops_widths) all_cnames = all_cnames.union(cnames) _msg("vv%s cnames: %s" % (vv,cnames)) constraints_log_file = mbuild.join(ild_gendir, 'all_constraints_vv%s.txt' %vv) #now generate the C hash functions for the constraint #dictionaries. # # ph_lu map from map,opcode -> hash fn name # lu_fo_list list of all phash fn objects # operands_lu_list list of operands lookup fns ph_lu, lu_fo_list, operands_lu_list = ild_cdict.gen_ph_fos(agi, cdict, constraints_log_file, ptrn_dict, vv) #hold only one instance of each function for op in operands_lu_list: if op.function_name not in op_lu_map: op_lu_map[op.function_name] = op vv_lu[str(vv)] = (ph_lu,lu_fo_list) _msg("all cnames: %s" % all_cnames) #dump the (a) hash functions and (b) lookup tables for obtaining #these hash functions (at decode time). ** Static decode ** ild_codegen.gen_static_decode(agi, vv_lu, list(op_lu_map.values()), h_fn='xed3-phash.h') #dec_dyn.work(...) generates all the functions and lookup tables for # ** Dynamic decode ** dec_dyn.work(agi, all_state_space, all_ops_widths, ild_patterns) #Main entry point of the module def work(agi): ild_gendir = agi.common.options.gendir init_debug(agi) debug.write("state_space:\n %s" % agi.common.state_space) # Collect up interesting NT names. # We are going to use them when we generate pattern_t objects # and also when we build resolution functions. eosz_nts = ild_eosz.get_eosz_binding_nts(agi) easz_nts = ild_easz.get_easz_binding_nts(agi) imm_nts = ild_imm.get_imm_binding_nts(agi) disp_nts = ild_disp.get_disp_binding_nts(agi) brdisp_nts = ild_disp.get_brdisp_binding_nts(agi) #just for debugging _msg("EOSZ NTS:") for nt_name in eosz_nts: _msg(nt_name) _msg("\nEASZ NTS:") for nt_name in easz_nts: _msg(nt_name) _msg("\nIMMNTS:") for nt_name in imm_nts: _msg(nt_name) _msg("\nDISP NTS:") for nt_name in disp_nts: _msg(nt_name) _msg("\nBRDISP NTS:") for nt_name in brdisp_nts: _msg(nt_name) nested_nts = _get_nested_nts(agi) _msg("\nNESTED NTS:") for nt_name in nested_nts: _msg(nt_name) #Get dictionary with all legal values for all interesting operands all_state_space = ild_cdict.get_all_constraints_state_space(agi) _msg("ALL_STATE_SPACE:") for k,v in list(all_state_space.items()): _msg("%s: %s"% (k,v)) #Get widths for the operands all_ops_widths = ild_cdict.get_state_op_widths(agi, all_state_space) _msg("ALL_OPS_WIDTHS:") for k,v in list(all_ops_widths.items()): _msg("%s: %s"% (k,v)) #generate a list of pattern_t objects that describes the ISA. #This is the main data structure for XED3 ild_patterns = get_patterns(agi, eosz_nts, easz_nts, imm_nts, disp_nts, brdisp_nts, all_state_space) if ild_patterns: #get ild_storage_t object - the main data structure for ILD #essentially a 2D dictionary: #ild_tbl[map][opcode] == [ ild_info_t ] #the ild_info_t objects are obtained from the grammar priority = 0 ild_tbl = _get_info_storage(agi, ild_patterns, priority) #generate modrm lookup tables ild_modrm.work(agi, ild_tbl, debug) #dump_patterns is for debugging if verbosity.vild(): dump_patterns(ild_patterns, mbuild.join(ild_gendir, 'all_patterns.txt')) eosz_dict = ild_eosz.work(agi, ild_tbl, eosz_nts, ild_gendir, debug) easz_dict = ild_easz.work(agi, ild_tbl, easz_nts, ild_gendir, debug) #dump operand accessor functions agi.operand_storage.dump_operand_accessors(agi) if eosz_dict and easz_dict: ild_imm.work(agi, ild_tbl, imm_nts, ild_gendir, eosz_dict, debug) ild_disp.work(agi, ild_tbl, disp_nts, brdisp_nts, ild_gendir, eosz_dict, easz_dict, debug) # now handle the actual instructions gen_xed3(agi, ild_info, ild_patterns, all_state_space, ild_gendir, all_ops_widths) def get_patterns(agi, eosz_nts, easz_nts, imm_nts, disp_nts, brdisp_nts, all_state_space): """ This function generates the pattern_t objects that have all the necessary information for the ILD. Returns these objects as a list. """ machine_modes = agi.common.get_state_space_values('MODE') patterns = [] pattern_t.map_info_g = agi.map_info for g in agi.generator_list: ii = g.parser_output.instructions[0] if genutil.field_check(ii,'iclass'): for ii in g.parser_output.instructions: ptrn = pattern_t(ii, eosz_nts, easz_nts, imm_nts, disp_nts, brdisp_nts, machine_modes, all_state_space) patterns.append(ptrn) if ptrn.incomplete_opcode: expanded_ptrns = ptrn.expand_partial_opcode() patterns.extend(expanded_ptrns) return patterns def _get_info_storage(agi, ptrn_list, priority): """convert list of pattern_t objects to ild_storage_t object, store by map/opcode""" lookup = ild_storage.get_lookup(agi) storage = ild_storage.ild_storage_t(lookup) for p in ptrn_list: info = ild_info.ptrn_to_info(p, priority) # convert to ild_info_t if info not in storage.get_info_list(p.insn_map,p.opcode): storage.append_info(p.insn_map, p.opcode, info) return storage #this is for debugging mostly. Also a good source of info on instruction set. def dump_patterns(patterns, fname): f = open(fname, 'w') for pattern in patterns: f.write( '%s\n' % pattern) f.close() ########################################################################### ## ########################################################################### #Maybe pattern_t should inherit from instruction_info_t #Let it inherit from object for now. class pattern_t(object): map_info_g = None def __init__(self, ii, eosz_nts, easz_nts, imm_nts, disp_nts, brdisp_nts, mode_space, state_space): self.ptrn = ii.ipattern_input self.ptrn_wrds = self.ptrn.split() self.iclass = ii.iclass self.legal = True self.category = ii.category #FIXME: remove all members of ii stored directly as members self.ii = ii #incomplete_opcode is used for expanding opcodes that have registers #embedded in them self.incomplete_opcode = False #number of missing bits in incomplete opcode. usually 0 or 3 self.missing_bits = 0 self.insn_map = None self.opcode = None self.space = None # LEGACY|VEX|EVEX self.has_modrm = False self.imm_nt_seq = None self.disp_nt_seq = None #modrm.reg bits value, set only when it is explicitly #e.g. bounded: REG[010] self.ext_opcode = None #all legal values for MODE operand in this pattern self.mode = None #an ordered string of EOSZ setting NTs in the pattern #we will use it to create the eosz lookup table for the pattern self.eosz_nt_seq = None #same for EASZ self.easz_nt_seq = None #operand deciders of the pattern self.constraints = None self._set_constraints(ii, state_space) self.vv = None # vexvalid, integer self._set_vexvalid() self.encspace = None self._set_encoding_space() mi,insn_map,opcode = self._get_map_opcode() self.map_info = mi self.insn_map = insn_map self.opcode = opcode self.has_modrm = ild_modrm.get_hasmodrm(self.ptrn) self.set_ext_opcode() self.set_mode(ii, mode_space) self.eosz_nt_seq = ild_eosz.get_eosz_nt_seq(self.ptrn_wrds, eosz_nts) self.easz_nt_seq = ild_easz.get_easz_nt_seq(self.ptrn_wrds, easz_nts) self.imm_nt_seq = ild_imm.get_imm_nt_seq(self.ptrn_wrds, imm_nts) self.disp_nt_seq = ild_disp.get_disp_nt_seq(self.ptrn_wrds, disp_nts.union(brdisp_nts)) self.actions = [actions.gen_return_action(ii.inum)] def is_legal(self): return (self.legal and self.opcode != None and self.insn_map != None and self.space != None) def set_ext_opcode(self): #inline captures MOD[mm] REG[rrr] RM[nnn] #or REG[111] etc. -- can be constant for w in self.ptrn_wrds: pb = reg_binding_pattern.match(w) if pb: bits = pb.group('bits') self.ext_opcode = genutil.make_numeric(bits) return def _set_constraints(self, ii, state_space): #FIXME: this assumes, that there are no contradictions #between constraints on the same operand. #If there are, e.g. MOD=3 ... MOD=1, both values will be #set as legal.. check such things here? #set constraints that come from operands deciders self.constraints = dec_dyn.get_ii_constraints(ii, state_space) #print "CONSTRAINTS: {}".format(self.constraints) #special care for VEXVALID - it makes it easy to dispatch vex #and legacy instructions. For legacy we will explicitly set #VEXVALID=0. if 'VEXVALID' not in self.constraints: self.constraints['VEXVALID'] = {0:True} # since we dispatch at the high level on MAP and VEXVALID, we # should not include them in the standard constraints. self.special_constraints = {} for od in ['MAP','VEXVALID']: if od in self.constraints: self.special_constraints[od] = self.constraints[od] del self.constraints[od] def _set_vexvalid(self): # FIXME: could just look at the pattern... lst = list(self.special_constraints['VEXVALID'].keys()) if len(lst) != 1: genutil.die("Not one value for VEXVALID in {}: {}".format(self.iclass, self.ptrn)) self.vv = int(lst[0]) def _set_encoding_space(self): """returns a string""" vv = self.get_vexvalid() self.encspace = ild_info.vexvalid_to_encoding_space(vv) def get_vexvalid(self): return self.vv def get_encoding_space(self): return self.encspace def set_mode(self, ii, mode_space): for bit_info in ii.ipattern.bits: if bit_info.token == _mode_token: if bit_info.test == 'eq': self.mode = [bit_info.requirement] else: reduced_mode_space = copy.deepcopy(mode_space) reduced_mode_space.remove(bit_info.requirement) self.mode = reduced_mode_space return #when MODE is not mentioned, we assume that any value is #legal, it should not mess the conflict resolution. self.mode = mode_space def parse_opcode(self, op_str): # has side effects of settting self.missing_bits and self.incomplete val = None if genutil.numeric(op_str): val = genutil.make_numeric(op_str) # special check for partial binary numbers as opcodes if genutil.is_binary(op_str): bin_str = re.sub('^0b', '', op_str) bin_str = re.sub('_', '', bin_str) #if it is bin string, it might be incomplete, and we #assume that given bits are msb. Hence we have to #shift left if len(bin_str) > 8: ildutil.ild_err("Unexpectedly long binary opcode: %s" % bin_str) if len(bin_str) < 8: self.missing_bits = 8-len(bin_str) val = val << (self.missing_bits) self.incomplete_opcode = True _msg('incomplete opcode for iclass %s, pttrn %s' % (self.iclass, self.ptrn)) return val #if opcode is incomplete, than we have 0's in all the missing bits and #need to create copies of the pattern_t that have all other possible #variations of the opcode. For example PUSH instruction has opcode 0x50 #and in expand_partial_opcode method we will create a list of pattern_t objects #that have opcodes 0x51-0x57 #We don't need to create the 0x50 variant, because it already exists #(it is the current self) #That way we will cover all the legal opcodes for the given incomplete #opcode. def expand_partial_opcode(self): expanded = [] if self.incomplete_opcode: if 'RM[rrr]' in self.ptrn or 'REG[rrr]' in self.ptrn: _msg('Expanding opcode for %s' % self.iclass) #FIXME: MJC: was assuming hex for self.opcode (2012-06-19) opcode = genutil.make_numeric(self.opcode) #iterating through all legal variations of the incomplete #opcode. #Since the variant with all missing bits == 0 was already #created (it is the current self), we need to iterate through #1 to 2 ^ (self.missing_bits) for i in range(1, 2**self.missing_bits): new_ptrn = copy.deepcopy(self) new_ptrn.opcode = hex(opcode | i) expanded.append(new_ptrn) return expanded def get_opcode(self, tokens): opcode = None for token in tokens: opcode = self.parse_opcode(token) if opcode != None: break if opcode == None: ildutil.ild_err("Failed to parse op_str with " + "from tokens %s" %( tokens)) return hex(opcode) def _get_map_opcode(self): encspace = self.get_encoding_space() for mi in pattern_t.map_info_g: if mi.space == encspace: # if no search pattern we are on the last record for map 0 if mi.search_pattern == '' or self.ptrn.find(mi.search_pattern) != -1: insn_map = mi.map_name # different than the stuff we use now. try: opcode = self.ptrn.split()[mi.opcpos] except: genutil.die("Did not find any pos {} in [{}] for {}".format(mi.opcpos, self.ptrn.split(), mi)) parsed_opcode = self.parse_opcode(opcode) # 0x00 is also a value so we must explicitly test vs # None, and not use "not parsed_opcode" if parsed_opcode==None: genutil.die("Did failed to convert opcode {} from {} for map {}".format(opcode, self.ptrn, mi)) return mi, insn_map, hex(parsed_opcode) genutil.die("Did not find map / opcode for {}".format(self.ptrn)) def has_emit_action(self): ''' This function is needed in order to match the interface of rule_t it has no real meaning for the docoder ''' return False def __str__(self): printed_members = [] printed_members.append('ICLASS\t: %s' % self.iclass) printed_members.append('PATTERN\t: %s' % self.ptrn) #putting map and opcode in one line for easier grepping #of the pattern list dump - very handy for different kinds of analysis printed_members.append('MAP:%s OPCODE:%s' % (self.insn_map, self.opcode)) printed_members.append('EXT_OPCODE\t: %s' % self.ext_opcode) printed_members.append('MODE\t: %s' % self.mode) printed_members.append('INCOMPLETE_OPCODE\t: %s' % self.incomplete_opcode) printed_members.append('HAS_MODRM\t: %s' % self.has_modrm) printed_members.append('EOSZ_SEQ:\t %s' % self.eosz_nt_seq) printed_members.append('EASZ_SEQ:\t %s' % self.easz_nt_seq) printed_members.append('IMM_SEQ\t: %s' % self.imm_nt_seq) printed_members.append('DISP_SEQ\t: %s' % self.disp_nt_seq) printed_members.append('CONSTRAINTS\t: {}'.format(self.emit_constraints())) printed_members.append('INUM\t: %s' % self.ii.inum) return "{\n"+ ",\n".join(printed_members) + "\n}" def emit_constraints(self): sl = [] for k in self.constraints.keys(): v = self.constraints[k] # dict with always true-valued items. # Only keys of v are relevant. s = "{}:{} ".format(k,str(v.keys())) sl.append(s) return "".join(sl) def __eq__(self, other): return (other != None and self.ptrn == other.ptrn and self.iclass == other.iclass) def __ne__(self, other): return (other == None or self.ptrn != other.ptrn or self.iclass != other.iclass) def _msg(s): debug.write("%s\n" % (s))