# # MIT License # # Copyright (c) 2020 Reto Achermann # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # # SPDX-License-Identifier: MIT # import os import re import xml.etree.ElementTree as ET from logger import logverbose, logok, logwarn, logerr, log # --------------------------------------------------------------------------------------- # Patterns for Extracting Values # --------------------------------------------------------------------------------------- identpattern = re.compile('[^a-z0-9_]+') def filterident(ident) : return identpattern.sub('', ident.replace(" ", "_").lower()) def parse_field_node(field_node, length) : # # 63 # 32 # # ... # # if field_node is None : logerr(" - expected a field node, but was None!") return None field_msb = int(field_node.find('field_msb').text) field_lsb = int(field_node.find('field_lsb').text) field_size = field_msb - field_lsb + 1 field_name_node = field_node.find('field_name') field_id = filterident(field_node.attrib['id']) if field_id == "none_{}_0".format(length - 1) : field_id = 'val' if field_name_node is None : field_name = field_id else : field_name = field_name_node.text is_reserved = False desc = field_node.find('field_description/para') if desc is not None: if desc.text is not None: is_reserved = desc.text.startswith("Reserved") logverbose(" - {} [{}..{}] ({} bits)".format(field_id, field_lsb, field_msb, field_size)) return { "id" : field_id, "name" : field_name, "msb" : field_msb, "lsb" : field_lsb, "size" : field_size, "reserved" : is_reserved } def parse_reg_fieldsets_node(reg_fieldsets_node, is_register) : # # ... # # # 63 # 32 # # ... # # # # ... # # # ... # # ... # if reg_fieldsets_node == None: logerr(" - expected a reg_fieldsets node, but was None!") return None # here there can be multiple fields/fieldset definitions. we take the one for aarc64 fields_nodes = [] for fields_node in reg_fieldsets_node.findall("fields") : fields_condition = fields_node.find("fields_condition") if fields_condition is None: fields_nodes.append(fields_node) continue if fields_condition.text is None: fields_nodes.append(fields_node) continue # do not take anything from AAarch32 if "from AArch32" in fields_condition.text or \ "to AArch32" in fields_condition.text or \ "using AArch32" in fields_condition.text: continue if fields_condition.text.endswith("== 0"): continue if "AArch32" in fields_condition.text : logwarn(' - had aarch32') fields_nodes.append(fields_node) # we have no fields, probably this is a cache if len(fields_nodes) == 0: if not is_register: return (0, None) else : logerr(" - expected fields_nodes to have exactly one element, had 0") if len(fields_nodes) != 1 : logwarn(" - expected fields_nodes to have exactly one element, had {}".format(len(fields_nodes) )) fields_node = fields_nodes[0] length = int(fields_node.attrib["length"]) fields = [] for field_node in fields_node.findall("field") : field = parse_field_node(field_node, length) fields.append(field) return (length, fields) def accessor_type(accessor) : fst = accessor.split(" ")[0].lower() # need to rewrite some of the access types to match known instructions if fst == "msrregister": return 'msr_reg' if fst == "msrimmediate" : return "msr_imm" return fst def parse_access_mechanisms_node(regname, access_mechanisms_node, instr) : # # # # ... # # # # # # # ... # # ... # if access_mechanisms_node == None: logerr("expected a access_mechanisms_node node, but was None!") return None is_writable = False is_readable = False access_mechanisms = [] for access_machism in access_mechanisms_node.findall("access_mechanism") : # getting the encoding node encoding_node = access_machism.find("encoding") if encoding_node is None: logerr("expected finding an encoding node, but was none") # get the accessor node and extract the accessor type accessor = access_machism.attrib["accessor"] accesstype = accessor_type(accessor) # the accessor type should be in the known instructions if (accesstype not in instr) : logerr("accessor type '{}' not found in {}".format(accesstype, instr.keys())) # check if the register is writable or readable access_instr = instr[accesstype] if access_instr["type"] == "write" : is_writable = True if access_instr["type"] == "read" : is_readable = True # get the access_instruction node within access_instruction = encoding_node.find("access_instruction") if access_instruction is None: logerr("expected finding an access_instruction node, but was none") # skip those accessors with two execution levels access_ident = filterident(access_instruction.text) if "_el12" in access_ident : continue if "_el02" in access_ident : continue # skipping the nxs variants of those accesses if "tlbi" in access_ident and "nxs" in access_ident : continue regname = regname.replace("icv", 'icc') if "_el" in regname and regname not in access_ident : #logwarn("XXX {} {}".format(regname, access_ident)) continue encodings = {} for enc in encoding_node.findall("enc") : n = enc.attrib["n"] v = enc.attrib["v"] encodings[n.lower()] = v access_mechanisms.append({ "access_instruction" : access_instruction.text, "access_type" : accesstype, "encoding" : encodings }) if len(access_mechanisms) == 0: logerr("no access mechanisms present") return (is_writable, is_readable, access_mechanisms) def parse_reg_groups_node(reg_groups_node) : # layout of the reg_groups node # # ... # ... # if reg_groups_node == None: logerr("expected a reg_groups node, but was None!") return None reg_groups = [] for reg_group in reg_groups_node.findall("reg_group") : reg_groups.append(reg_group.text) if len(reg_groups) == 1 : return reg_groups[0] if len(reg_groups) == 0 : logerr(" - expected at least one reg group!") reg_groups_filtered = [] reg_groups_discarded = [] for reg_group in reg_groups : if reg_group in ['IMPLEMENTATION DEFINED', 'Virtualization registers'] : reg_groups_discarded.append(reg_group) else : reg_groups_filtered.append(reg_group) if len(reg_groups_filtered) == 1: return reg_groups_filtered[0] if len(reg_groups_filtered) > 2 : print("filtered", reg_groups_filtered) print("discarded", reg_groups_discarded) return reg_groups_filtered[0] return 'Virtualization registers' def parse_reg_purpose_node(reg_purpose_node) : # layout of the reg_purpose node # # # # ... # # if reg_purpose_node == None: logerr(" - expected a reg_purpose node, but was None!") return None para_node = reg_purpose_node.find("purpose_text/para") if para_node is None: logerr(' - expected a `reg_purpose` node in the register description. found none.') return para_node.text def parse_register_node(cfg, args, instr, reg_node) : # the structure of the register node is as follows: # # # SPSR_EL1 # Saved Program Status Register (EL1) # # .. # .. # .. # # .. # .. # .. # .. # # extract the `reg_short_name` node text, remove all non-identifier-compatible args reg_id = filterident(reg_node.find("reg_short_name").text) # extract the long name from the register node reg_name = reg_node.find("reg_long_name").text if "execution_state" not in reg_node.attrib: logerr(" @ Register: {} ({}) has no execution state, skipping.".format(reg_name, reg_id)) return None if reg_node.attrib["execution_state"] != "AArch64" : logerr(" @ Register: {} ({}) is not AArch64, skipping.".format(reg_name, reg_id)) return None # is this a register description or an instruction? is_register = (reg_node.attrib["is_register"] == "True") if is_register: log(" @ Parsing register description: {} ({})".format(reg_name, reg_id)) else : log(" @ Parsing instruction description: {} ({})".format(reg_name, reg_id)) # reg_reset_value seems to be never set, skipping it #if len (reg_node.find("reg_reset_value").getchildren()) > 0 : # logerr("reg_reset_value was not empty") # reg_mappings won't be used later on as they describe mapping onto AArch32 # if len (reg_node.find("reg_mappings").getchildren()) > 0 : # logerr("reg_mappings was not empty") # extract the purpose of this register reg_purpose = parse_reg_purpose_node(reg_node.find("reg_purpose")) # get the register groups reg_group = parse_reg_groups_node(reg_node.find("reg_groups")) # reg_configuration won't be used # if len (reg_node.find("reg_configuration").getchildren()) > 0 : # logerr("reg_configuration was not empty") # reg_attributes won't be used # if len (reg_node.find("reg_attributes").getchildren()) > 0 : # logwarn("reg_attributes was not empty") # extract the reg_fieldsets length, reg_fieldsets = parse_reg_fieldsets_node(reg_node.find("reg_fieldsets"), is_register) # extract the access_mechanisms parsed_access_mechanisms = parse_access_mechanisms_node(reg_id, reg_node.find("access_mechanisms"), instr) reg_writable = parsed_access_mechanisms[0] reg_readable = parsed_access_mechanisms[1] access_mechanisms = parsed_access_mechanisms[2] # arch_variants won't be used they describe which architecture version e.g. 8.7a supports it # if len (reg_node.find("arch_variants").getchildren()) > 0 : # logerr("arch_variants was not empty") log(" + Length: {}".format(length)) log(" + Access: Writable: {}, Readable: {}".format(reg_writable, reg_readable)) log(" + Register: {}".format("yes" if is_register else "no")) log(" + Group: {}".format(reg_group)) # the parsed register data. regdata = { "id" : reg_id, "name" : reg_name, "length" : length, "is_register" : is_register, "is_writable" : reg_writable, "is_readable" : reg_readable, "purpose" : reg_purpose, "group" : reg_group, "access" : access_mechanisms, "fields" : reg_fieldsets, } return regdata # parse an Arm Architecture System Registers description XML file def parse_xml_file(cfg, args, instr, xmlfile) : log("\nRegister File: {}".format(xmlfile)) # the parsed register list of the file parsed_registers = [] parsed_sysinstr = [] # parse the xml file root = ET.parse(xmlfile).getroot() # the structure of the file is as follows: # # # # ... # ... # # # loop over all XML nodes `register`, and parse them for reg_node in root.findall(".//register"): res = parse_register_node(cfg, args, instr, reg_node) res["file"] = os.path.basename(xmlfile) if res["is_register"] : parsed_registers.append(res) else : parsed_sysinstr.append(res) # return the list of parsed registers return (parsed_registers, parsed_sysinstr) # this function parses the instructions file to obtain the way the instructions are formatted def parse_xml_instructions(cfg, args, xmlfile) : # start off with the default access instructins from the config accessinstr = cfg["accessinstructions"] # if there is no instructions file, take the ones from the configurations if not xmlfile.is_file() : log("Using default access instructions from config file") return accessinstr log("Parsing instructions file: {}".format(xmlfile)) # the structure of the file is as follows # # # # MRS # # # # # # # # get the root of the XML tree root = ET.parse(xmlfile).getroot() for ai_node in root.findall(".//access_instruction_def"): # we only care about aarch64 if ai_node.attrib["execution_state"].lower() != "aarch64" : continue # access name should always be present access_name = ai_node.find("name").text.lower() # the id should always be present access_id = ai_node.attrib["id"].lower() # the acces type might not be there (e.g. for TLBI instructions) acc_type_node = ai_node.find("access_type") if acc_type_node is not None: access_type = acc_type_node.attrib["type"].lower() else : access_type = None # getting the access syntax, there might be multiple variants as_fmt = None as_syntax = None for as_node in ai_node.findall("access_syntax") : # skip the no input variants if "variant" in as_node.attrib: continue # get the instruction name as_fmt = [as_node.attrib["name"].lower()] as_syntax = [as_node.attrib["name"].lower()] # get the variables / parameters for var_node in as_node.findall("var") : n = var_node.attrib["n"] as_syntax.append(n) # if n = 'Xt' this is a generic register, this gets replaced by asm '$0' if n == 'Xt' : n = "$0" elif n == 'imm' : n = "#{imm}" # if n = 'systemreg' this means we need to replace it with the register name elif n in ["systemreg", "dc_op", "pstatefield", "ic_op", "at_op", "tlbi_op", "cfp_op", "cpp_op", "dvp_op", "op1", "op2"] : n = "{}" elif n in ["Cn", "Cm"] : prefix = var_node.attrib["prefix"] n = prefix + "{}" else : logwarn("Unhandled access method: {}".format(n)) as_fmt.append(n) accessinstr[access_id] = { "name" : access_name, "type" : access_type, "fmt" : as_fmt, "syntax" : as_syntax } return accessinstr