#!/usr/bin/env python3 from pathlib import Path from typing import List, Set from shutil import rmtree import os import re ZYDIS_ROOT = Path(__file__).resolve().parent.parent PUBLIC_INCLUDE_PATHS = [ ZYDIS_ROOT / 'include', ZYDIS_ROOT / 'dependencies' / 'zycore' / 'include', ] INTERNAL_INCLUDE_PATHS = [ZYDIS_ROOT / 'src'] INCLUDE_REGEXP = re.compile(r'^#\s*include\s*<((?:Zy|Generated).*)>\s*$') OUTPUT_DIR = ZYDIS_ROOT / 'amalgamated-dist' FILE_HEADER = ['// DO NOT EDIT. This file is auto-generated by `amalgamate.py`.', ''] def find_files( pattern: re.Pattern, root_dir: Path, ): root_dir = root_dir.resolve() paths = [] for root, dirs, files in os.walk(root_dir): paths.extend([Path(root) / name for name in files if pattern.match(name)]) return sorted(paths) def find_include_path( include: str, search_paths: List[Path], ) -> Path: for search_path in search_paths: path = search_path / include if path.exists(): return path.absolute() else: raise FileNotFoundError(f'can\'t find header: {include}') def merge_headers( *, header: str, search_paths: List[Path], covered_headers: Set[Path], stack: List[str], ) -> List[str]: # Locate and load header contents. path = find_include_path(header, search_paths) with path.open() as f: lines = [x.rstrip() for x in f] if header in covered_headers: return [] print(f'Processing header "{header}"') covered_headers.add(header) # Print the header we emit next & the include stack (if non-root). include_stack = [] if stack: include_stack = [ '//', '// Include stack:', *(f'// - {x}' for x in stack) ] filtered = [ f'', f'//', f'// Header: {header}', *include_stack, f'//', f'', ] # Copy over lines and recursively inline all headers. for line in lines: match = INCLUDE_REGEXP.match(line) if not match: filtered.append(line) continue # Recurse into includes. filtered += merge_headers( header=match.group(1), search_paths=search_paths, covered_headers=covered_headers, stack=stack + [header], ) return filtered def merge_sources(*, source_dir: Path, covered_headers: Set[Path]): output = [ '#include ', '', ] for source_file in find_files(re.compile('[\w-]+\.c'), source_dir): print(f'Processing source file "{source_file}"') # Print some comments to show where the code is from. output += [ f'', f'//', f'// Source file: {source_file}', f'//', f'', ] # Read source file. with (source_dir / source_file).open() as f: lines = [x.rstrip() for x in f] # Walk source file's lines. for line in lines: # Emit non-includes as-is. match = INCLUDE_REGEXP.match(line) if not match: output.append(line) continue path = match.group(1) if path in covered_headers: continue if 'Internal' not in path and 'Generated' not in path: print( f'WARN: Including header that looks like it is public ' f'and should thus already be covered by `Zydis.h` ' f'during processing of source files: {path}' ) print(f'Processing internal header "{path}"') output += merge_headers( header=path, search_paths=PUBLIC_INCLUDE_PATHS + INTERNAL_INCLUDE_PATHS, covered_headers=covered_headers, stack=[], ) return output def main(): if OUTPUT_DIR.exists(): print('Output directory exists. Deleting.') rmtree(OUTPUT_DIR) OUTPUT_DIR.mkdir() covered_headers = set() with open(OUTPUT_DIR / 'Zydis.h', 'w') as f: f.write('\n'.join(FILE_HEADER + merge_headers( header='Zydis/Zydis.h', search_paths=PUBLIC_INCLUDE_PATHS, covered_headers=covered_headers, stack=[], ))) with open(OUTPUT_DIR / 'Zydis.c', 'w') as f: f.write('\n'.join(FILE_HEADER + merge_sources( source_dir=ZYDIS_ROOT / 'src', covered_headers=covered_headers, ))) if __name__ == '__main__': main()