"""Helper functions to parse C code in heavily constrained scenarios. Currently supported functionality: * read_function_declarations: read function declarations from a header file. """ # Copyright The Mbed TLS Contributors # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later ### WARNING: the code in this file has not been extensively reviewed yet. ### We do not think it is harmful, but it may be below our normal standards ### for robustness and maintainability. import re from typing import Dict, Iterable, Iterator, List, Optional, Tuple class ArgumentInfo: """Information about an argument to an API function.""" #pylint: disable=too-few-public-methods _KEYWORDS = [ 'const', 'register', 'restrict', 'int', 'long', 'short', 'signed', 'unsigned', ] _DECLARATION_RE = re.compile( r'(?P\w[\w\s*]*?)\s*' + r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P\b\w+\b)?' + r'\s*(?P\[[^][]*\])?\Z', re.A | re.S) @classmethod def normalize_type(cls, typ: str) -> str: """Normalize whitespace in a type.""" typ = re.sub(r'\s+', r' ', typ) typ = re.sub(r'\s*\*', r' *', typ) return typ def __init__(self, decl: str) -> None: self.decl = decl.strip() m = self._DECLARATION_RE.match(self.decl) if not m: raise ValueError(self.decl) self.type = self.normalize_type(m.group('type')) #type: str self.name = m.group('name') #type: Optional[str] self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str class FunctionInfo: """Information about an API function.""" #pylint: disable=too-few-public-methods # Regex matching the declaration of a function that returns void. VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A) def __init__(self, #pylint: disable=too-many-arguments filename: str, line_number: int, qualifiers: Iterable[str], return_type: str, name: str, arguments: List[str]) -> None: self.filename = filename self.line_number = line_number self.qualifiers = frozenset(qualifiers) self.return_type = return_type self.name = name self.arguments = [ArgumentInfo(arg) for arg in arguments] def returns_void(self) -> bool: """Whether the function returns void.""" return bool(self.VOID_RE.search(self.return_type)) # Match one C comment. # Note that we match both comment types, so things like // in a /*...*/ # comment are handled correctly. _C_COMMENT_RE = re.compile(r'//(?:[^\n]|\\\n)*|/\*.*?\*/', re.S) _NOT_NEWLINES_RE = re.compile(r'[^\n]+') def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]: """Read logical lines from a file. Logical lines are one or more physical line, with balanced parentheses. """ with open(filename, encoding='utf-8') as inp: content = inp.read() # Strip comments, but keep newlines for line numbering content = re.sub(_C_COMMENT_RE, lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)), content) lines = enumerate(content.splitlines(), 1) for line_number, line in lines: # Read a logical line, containing balanced parentheses. # We assume that parentheses are balanced (this should be ok # since comments have been stripped), otherwise there will be # a gigantic logical line at the end. paren_level = line.count('(') - line.count(')') while paren_level > 0: _, more = next(lines) #pylint: disable=stop-iteration-return paren_level += more.count('(') - more.count(')') line += '\n' + more yield line_number, line _C_FUNCTION_DECLARATION_RE = re.compile( r'(?P(?:(?:extern|inline|static)\b\s*)*)' r'(?P\w[\w\s*]*?)\s*' + r'\b(?P\w+)' + r'\s*\((?P.*)\)\s*;', re.A | re.S) def read_function_declarations(functions: Dict[str, FunctionInfo], filename: str) -> None: """Collect function declarations from a C header file.""" for line_number, line in read_logical_lines(filename): m = _C_FUNCTION_DECLARATION_RE.match(line) if not m: continue qualifiers = m.group('qualifiers').split() return_type = m.group('return_type') name = m.group('name') arguments = m.group('arguments').split(',') if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]): arguments = [] # Note: we replace any existing declaration for the same name. functions[name] = FunctionInfo(filename, line_number, qualifiers, return_type, name, arguments)