#!/usr/bin/env python3 # # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # import argparse import os from pathlib import Path import re import shlex import subprocess import sys """ This is a tool to convert addresses, which are from a call-stack dump generated by iwasm, into line info for a wasm file. When a wasm file is compiled with debug info, it is possible to transfer the address to line info. For example, there is a call-stack dump: ``` #00: 0x0a04 - $f18 #01: 0x08e4 - $f11 #02: 0x096f - $f12 #03: 0x01aa - _start ``` - store the call-stack dump into a file, e.g. call_stack.txt - run the following command to convert the address into line info: ``` $ cd test-tools/addr2line $ python3 addr2line.py --wasi-sdk --wabt --wasm-file call_stack.txt ``` The script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address in the call-stack dump. - if addresses are not available in the stack trace (i.e. iwasm <= 1.3.2) or iwasm is used in fast interpreter mode, run the following command to convert the function index into line info (passing the `--no-addr` option): ``` $ python3 addr2line.py --wasi-sdk --wabt --wasm-file call_stack.txt --no-addr ``` The script will use *wasm-objdump* in wabt to get the function names corresponding to function indexes, then use *llvm-dwarfdump* to lookup the line info for each function index in the call-stack dump. """ def locate_sourceMappingURL_section(wasm_objdump: Path, wasm_file: Path) -> bool: """ Figure out if the wasm file has a sourceMappingURL section. """ cmd = f"{wasm_objdump} -h {wasm_file}" p = subprocess.run( shlex.split(cmd), check=True, capture_output=True, text=True, universal_newlines=True, ) outputs = p.stdout.split(os.linesep) for line in outputs: line = line.strip() if "sourceMappingURL" in line: return True return False def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int: """ Find the start offset of Code section in a wasm file. if the code section header likes: Code start=0x0000017c end=0x00004382 (size=0x00004206) count: 47 the start offset is 0x0000017c """ cmd = f"{wasm_objdump} -h {wasm_file}" p = subprocess.run( shlex.split(cmd), check=True, capture_output=True, text=True, universal_newlines=True, ) outputs = p.stdout.split(os.linesep) for line in outputs: line = line.strip() if "Code" in line: return int(line.split()[1].split("=")[1], 16) return -1 def get_line_info_from_function_addr_dwarf( dwarf_dump: Path, wasm_file: Path, offset: int ) -> tuple[str, str, str, str]: """ Find the location info of a given offset in a wasm file. """ cmd = f"{dwarf_dump} --lookup={offset} {wasm_file}" p = subprocess.run( shlex.split(cmd), check=False, capture_output=True, text=True, universal_newlines=True, ) outputs = p.stdout.split(os.linesep) function_name, function_file = "", "unknown" function_line, function_column = "?", "?" for line in outputs: line = line.strip() if "DW_AT_name" in line: function_name = get_dwarf_tag_value("DW_AT_name", line) if "DW_AT_decl_file" in line: function_file = get_dwarf_tag_value("DW_AT_decl_file", line) if "Line info" in line: _, function_line, function_column = parse_line_info(line) return (function_name, function_file, function_line, function_column) def get_dwarf_tag_value(tag: str, line: str) -> str: # Try extracting value as string STR_PATTERN = rf"{tag}\s+\(\"(.*)\"\)" m = re.match(STR_PATTERN, line) if m: return m.groups()[0] # Try extracting value as integer INT_PATTERN = rf"{tag}\s+\((\d+)\)" m = re.match(INT_PATTERN, line) return m.groups()[0] def get_line_info_from_function_name_dwarf( dwarf_dump: Path, wasm_file: Path, function_name: str ) -> tuple[str, str, str]: """ Find the location info of a given function in a wasm file. """ cmd = f"{dwarf_dump} --name={function_name} {wasm_file}" p = subprocess.run( shlex.split(cmd), check=False, capture_output=True, text=True, universal_newlines=True, ) outputs = p.stdout.split(os.linesep) function_name, function_file = "", "unknown" function_line = "?" for line in outputs: line = line.strip() if "DW_AT_name" in line: function_name = get_dwarf_tag_value("DW_AT_name", line) if "DW_AT_decl_file" in line: function_file = get_dwarf_tag_value("DW_AT_decl_file", line) if "DW_AT_decl_line" in line: function_line = get_dwarf_tag_value("DW_AT_decl_line", line) return (function_name, function_file, function_line) def get_line_info_from_function_addr_sourcemapping( emsymbolizer: Path, wasm_file: Path, offset: int ) -> tuple[str, str, str, str]: """ Find the location info of a given offset in a wasm file which is compiled with emcc. {emsymbolizer} {wasm_file} {offset of file} there usually are two lines: ?? relative path to source file:line:column """ debug_info_source = wasm_file.with_name(f"{wasm_file.name}.map") cmd = f"{emsymbolizer} -t code -f {debug_info_source} {wasm_file} {offset}" p = subprocess.run( shlex.split(cmd), check=False, capture_output=True, text=True, universal_newlines=True, cwd=Path.cwd(), ) outputs = p.stdout.split(os.linesep) function_name, function_file = "", "unknown" function_line, function_column = "?", "?" for line in outputs: line = line.strip() if not line: continue m = re.match("(.*):(\d+):(\d+)", line) if m: function_file, function_line, function_column = m.groups() continue else: # it's always ??, not sure about that if "??" != line: function_name = line return (function_name, function_file, function_line, function_column) def parse_line_info(line_info: str) -> tuple[str, str, str]: """ line_info -> [file, line, column] """ PATTERN = r"Line info: file \'(.+)\', line ([0-9]+), column ([0-9]+)" m = re.search(PATTERN, line_info) assert m is not None file, line, column = m.groups() return (file, int(line), int(column)) def parse_call_stack_line(line: str) -> tuple[str, str, str]: """ New format (WAMR > 1.3.2): #00: 0x0a04 - $f18 => (00, 0x0a04, $f18) Old format: #00 $f18 => (00, _, $f18) Text format (-DWAMR_BUILD_LOAD_CUSTOM_SECTION=1 -DWAMR_BUILD_CUSTOM_NAME_SECTION=1): #02: 0x0200 - a => (02, 0x0200, a) _start (always): #05: 0x011f - _start => (05, 0x011f, _start) """ # New format and Text format and _start PATTERN = r"#([0-9]+): 0x([0-9a-f]+) - (\S+)" m = re.match(PATTERN, line) if m is not None: return m.groups() # Old format PATTERN = r"#([0-9]+) (\S+)" m = re.match(PATTERN, line) if m is not None: return (m.groups()[0], None, m.groups()[1]) return None def parse_module_functions(wasm_objdump: Path, wasm_file: Path) -> dict[str, str]: function_index_to_name = {} cmd = f"{wasm_objdump} -x {wasm_file} --section=function" p = subprocess.run( shlex.split(cmd), check=True, capture_output=True, text=True, universal_newlines=True, ) outputs = p.stdout.split(os.linesep) for line in outputs: if not f"func[" in line: continue PATTERN = r".*func\[([0-9]+)\].*<(.*)>" m = re.match(PATTERN, line) assert m is not None index = m.groups()[0] name = m.groups()[1] function_index_to_name[index] = name return function_index_to_name def demangle(cxxfilt: Path, function_name: str) -> str: cmd = f"{cxxfilt} -n {function_name}" p = subprocess.run( shlex.split(cmd), check=True, capture_output=True, text=True, universal_newlines=True, ) return p.stdout.strip() def main(): parser = argparse.ArgumentParser(description="addr2line for wasm") parser.add_argument("--wasi-sdk", type=Path, help="path to wasi-sdk") parser.add_argument("--wabt", type=Path, help="path to wabt") parser.add_argument("--wasm-file", type=Path, help="path to wasm file") parser.add_argument("call_stack_file", type=Path, help="path to a call stack file") parser.add_argument( "--no-addr", action="store_true", help="use call stack without addresses or from fast interpreter mode", ) parser.add_argument("--emsdk", type=Path, help="path to emsdk") args = parser.parse_args() wasm_objdump = args.wabt.joinpath("bin/wasm-objdump") assert wasm_objdump.exists() llvm_dwarf_dump = args.wasi_sdk.joinpath("bin/llvm-dwarfdump") assert llvm_dwarf_dump.exists() llvm_cxxfilt = args.wasi_sdk.joinpath("bin/llvm-cxxfilt") assert llvm_cxxfilt.exists() emcc_production = locate_sourceMappingURL_section(wasm_objdump, args.wasm_file) if emcc_production: if args.emsdk is None: print("Please provide the path to emsdk via --emsdk") return -1 emsymbolizer = args.emsdk.joinpath("upstream/emscripten/emsymbolizer") assert emsymbolizer.exists() code_section_start = get_code_section_start(wasm_objdump, args.wasm_file) if code_section_start == -1: return -1 function_index_to_name = parse_module_functions(wasm_objdump, args.wasm_file) assert args.call_stack_file.exists() with open(args.call_stack_file, "rt", encoding="ascii") as f: for i, line in enumerate(f): line = line.strip() if not line: continue splitted = parse_call_stack_line(line) if splitted is None: print(f"{line}") continue _, offset, index = splitted if args.no_addr: # FIXME: w/ emcc production if not index.startswith("$f"): # E.g. _start or Text format print(f"{i}: {index}") continue index = index[2:] if index not in function_index_to_name: print(f"{i}: {line}") continue if not emcc_production: _, function_file, function_line = ( get_line_info_from_function_name_dwarf( llvm_dwarf_dump, args.wasm_file, function_index_to_name[index], ) ) else: _, function_file, function_line = _, "unknown", "?" function_name = demangle(llvm_cxxfilt, function_index_to_name[index]) print(f"{i}: {function_name}") print(f"\tat {function_file}:{function_line}") else: offset = int(offset, 16) # match the algorithm in wasm_interp_create_call_stack() # either a *offset* to *code* section start # or a *offset* in a file assert offset > code_section_start offset = offset - code_section_start if emcc_production: function_name, function_file, function_line, function_column = ( get_line_info_from_function_addr_sourcemapping( emsymbolizer, args.wasm_file, offset ) ) else: function_name, function_file, function_line, function_column = ( get_line_info_from_function_addr_dwarf( llvm_dwarf_dump, args.wasm_file, offset ) ) # if can't parse function_name, use name section or if function_name == "": if index.startswith("$f"): function_name = function_index_to_name.get(index[2:], index) else: function_name = index function_name = demangle(llvm_cxxfilt, function_name) print(f"{i}: {function_name}") print(f"\tat {function_file}:{function_line}:{function_column}") return 0 if __name__ == "__main__": sys.exit(main())