#!/usr/bin/env python3 # Copyright 2021 The IREE Authors # # Licensed under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """Runs all matched benchmark suites on an Android device. This script probes the Android phone via `adb` and uses the device information to filter and run suitable benchmarks and optionally captures Tracy traces on the Android phone. It expects that `adb` is installed, and there is iree tools cross-compiled towards Android. If to capture traces, another set of tracing-enabled iree tools and the Tracy `capture` tool should be cross-compiled towards Android. Example usages: # Without trace generation python3 run_benchmarks.py \ --normal_benchmark_tool_dir=/path/to/normal/android/target/tools/dir \ /path/to/host/build/dir # With trace generation python3 run_benchmarks.py \ --normal_benchmark_tool_dir=/path/to/normal/android/target/tools/dir \ --traced_benchmark_tool_dir=/path/to/tracy/android/target/tools/dir \ --trace_capture_tool=/path/to/host/build/tracy/capture \ /path/to/host/build/dir """ import sys import pathlib # Add build_tools python dir to the search path. sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python"))) import atexit import json import requests import shutil import socket import struct import subprocess import tarfile import time from typing import Any, Optional, Sequence, Tuple from common import benchmark_suite as benchmark_suite_module from common.benchmark_config import BenchmarkConfig from common.benchmark_driver import BenchmarkDriver from common import benchmark_definition from common.benchmark_definition import ( execute_cmd, execute_cmd_and_get_stdout, execute_cmd_and_get_output, get_git_commit_hash, get_iree_benchmark_module_arguments, wait_for_iree_benchmark_module_start, parse_iree_benchmark_metrics, ) from common.benchmark_suite import BenchmarkCase, BenchmarkSuite from common.android_device_utils import ( get_android_device_model, get_android_device_info, get_android_gpu_name, ) import common.common_arguments from e2e_test_artifacts import iree_artifacts from e2e_test_framework.definitions import iree_definitions # Root directory to perform benchmarks in on the Android device. ANDROID_TMPDIR = pathlib.PurePosixPath("/data/local/tmp/iree-benchmarks") ADB_SERVER_ADDR = ("localhost", 5037) ANDROID_NORMAL_TOOL_DIR = ANDROID_TMPDIR / "normal-tools" ANDROID_TRACED_TOOL_DIR = ANDROID_TMPDIR / "traced-tools" ANDROID_TRACY_PORT = 8086 def adb_push_file( source: pathlib.Path, dest: pathlib.PurePosixPath, verbose: bool = False, ) -> pathlib.PurePosixPath: """Pushes content onto the Android device. Args: source: the path to the source file. dest: the full dest path on the device. Returns: The full path to the content on the Android device. """ # When the output is a TTY, keep the default progress info output. # In other cases, redirect progress info to null to avoid bloating log files. stdout_redirect = None if sys.stdout.isatty() else subprocess.DEVNULL execute_cmd( ["adb", "push", source.resolve(), dest], verbose=verbose, stdout=stdout_redirect, ) return dest def adb_execute_and_get_output( cmd_args: Sequence[str], cwd: pathlib.PurePosixPath = ANDROID_TMPDIR, verbose: bool = False, ) -> Tuple[str, str]: """Executes command with adb shell. Switches to `cwd` before executing. Waits for completion and returns the command stdout. Args: cmd_args: a list containing the command to execute and its parameters cwd: the directory to execute the command in Returns: Strings for stdout and stderr. """ cmd = ["adb", "shell", "cd", cwd, "&&"] cmd.extend(cmd_args) return execute_cmd_and_get_output(cmd, verbose=verbose) def adb_execute( cmd_args: Sequence[str], cwd: pathlib.PurePosixPath = ANDROID_TMPDIR, verbose: bool = False, ) -> subprocess.CompletedProcess: """Executes command with adb shell. Switches to `cwd` before executing. Waits for completion. Output is streamed to the terminal. Args: cmd_args: a list containing the command to execute and its parameters cwd: the directory to execute the command in Returns: The completed process. """ cmd = ["adb", "shell", "cd", cwd, "&&"] cmd.extend(cmd_args) return execute_cmd(cmd, verbose=verbose) def is_magisk_su(): """Returns true if the Android device has a Magisk SU binary.""" stdout, _ = adb_execute_and_get_output(["su", "--help"]) return "MagiskSU" in stdout def adb_execute_as_root(cmd_args: Sequence[Any]) -> subprocess.CompletedProcess: """Executes the given command as root.""" cmd = ["su", "-c" if is_magisk_su() else "root"] cmd.extend(cmd_args) return adb_execute(cmd) def adb_start_cmd( cmd_args: Sequence[str], cwd: pathlib.PurePosixPath = ANDROID_TMPDIR, verbose: bool = False, ) -> subprocess.Popen: """Executes command with adb shell in a directory and returns the handle without waiting for completion. Args: cmd_args: a list containing the command to execute and its parameters cwd: the directory to execute the command in Returns: A Popen object for the started command. """ cmd = ["adb", "shell", "cd", cwd, "&&"] cmd.extend(cmd_args) if verbose: print(f"cmd: {cmd}") return subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True) def adb_path_exists(android_path: pathlib.PurePosixPath, verbose: bool = False): """Run stat to check if the path exists.""" proc = adb_start_cmd(["stat", str(android_path)], verbose=verbose) return proc.wait() == 0 def adb_fetch_and_push_file( source: benchmark_definition.ResourceLocation, dest: pathlib.PurePosixPath, verbose: bool = False, ): """Fetch file from the path/URL and stream to the device. In the case of fetching, this method avoids the temporary file on the host and reduces the overhead when the file is large. Args: source: path/URL to fetch the file. dest: the full dest path on the device. verbose: output verbose message. Returns: File path on the device. """ if adb_path_exists(dest, verbose): return dest # If the source is a local file, push directly. local_path = source.get_local_path() if local_path: return adb_push_file(local_path, dest, verbose=verbose) if verbose: print(f"Streaming file {source} to {dest}.") url = source.get_url() assert url is not None req = requests.get(url, stream=True, timeout=60) if not req.ok: raise RuntimeError(f"Failed to fetch {source}: {req.status_code} - {req.text}") # Implement the ADB sync protocol to stream file chunk to the device, since # the adb client tool doesn't support it. # # Alternatively we can use thrid-party library such as # https://github.com/JeffLIrion/adb_shell. But the protocol we need is # simple and fairly stable. This part can be replaced with other solutions # if needed. # # To understand the details of the protocol, see # https://cs.android.com/android/_/android/platform/packages/modules/adb/+/93c8e3c26e4de3a2b767a2394200bc0721bb1e24:OVERVIEW.TXT def wait_ack_ok(sock: socket.socket): buf = bytearray() while len(buf) < 4: data = sock.recv(4 - len(buf)) if not data: break buf += data if buf.decode("utf-8") != "OKAY": raise RuntimeError(f"ADB communication error: {buf}") with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect(ADB_SERVER_ADDR) # Connect to any device (the first 4 hexadecimals is the following text # command length). sock.sendall(b"0012host:transport-any") wait_ack_ok(sock) # Switch to sync mode. sock.sendall(b"0005sync:") wait_ack_ok(sock) # Send the dest file path and file permissions 0644 (rw-r-r). file_attr = f"{dest},{0o644}".encode("utf-8") sock.sendall(b"SEND" + struct.pack("I", len(file_attr)) + file_attr) # Stream the file chunks. 64k bytes is the max chunk size for adb. for data in req.iter_content(chunk_size=64 * 1024): sock.sendall(b"DATA" + struct.pack("I", len(data)) + data) # End the file stream and set the creation time. sock.sendall(b"DONE" + struct.pack("I", int(time.time()))) wait_ack_ok(sock) return dest class AndroidBenchmarkDriver(BenchmarkDriver): """Android benchmark driver.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.already_pushed_files = {} def run_benchmark_case( self, benchmark_case: BenchmarkCase, benchmark_results_filename: Optional[pathlib.Path], capture_filename: Optional[pathlib.Path], ) -> None: module_rel_dir = iree_artifacts.get_module_dir_path( benchmark_case.run_config.module_generation_config ) android_case_dir = ANDROID_TMPDIR / module_rel_dir module_path = benchmark_case.module_dir / iree_artifacts.MODULE_FILENAME module_device_path = adb_fetch_and_push_file( source=module_path, dest=android_case_dir / iree_artifacts.MODULE_FILENAME, verbose=self.verbose, ) inputs_dir = None if benchmark_case.input_uri: inputs_dir = self.__fetch_and_unpack_npy( url=benchmark_case.input_uri, device_dir=android_case_dir / "inputs_npy", ) expected_outputs_dir = None if self.config.verify and benchmark_case.expected_output_uri: expected_outputs_dir = self.__fetch_and_unpack_npy( url=benchmark_case.expected_output_uri, device_dir=android_case_dir / "expected_outputs_npy", ) run_config = benchmark_case.run_config # TODO(#15452): Change to `--task_topology_cpu_ids` once we figure out # the right mapping. taskset = self.__deduce_taskset_from_run_config(run_config) run_args = run_config.materialize_run_flags(inputs_dir=inputs_dir) run_args.append(f"--module={module_device_path}") if benchmark_results_filename is not None: if self.config.normal_benchmark_tool_dir is None: raise ValueError("normal_benchmark_tool_dir can't be None.") if expected_outputs_dir: self.__run_verify( host_tool_dir=self.config.normal_benchmark_tool_dir, run_args=run_args, expected_outputs_dir=expected_outputs_dir, verify_params=benchmark_case.verify_params, taskset=taskset, ) self.__run_benchmark( host_tool_dir=self.config.normal_benchmark_tool_dir, benchmark_case=benchmark_case, run_args=run_args, results_filename=benchmark_results_filename, taskset=taskset, ) if capture_filename is not None: capture_config = self.config.trace_capture_config if capture_config is None: raise ValueError("Trace capture config can't be None.") self.__run_capture( host_tool_dir=capture_config.traced_benchmark_tool_dir, trace_capture_tool=capture_config.trace_capture_tool, benchmark_case=benchmark_case, run_args=run_args, capture_filename=capture_filename, taskset=taskset, ) def __run_verify( self, host_tool_dir: pathlib.Path, run_args: Sequence[str], expected_outputs_dir: pathlib.PurePosixPath, verify_params: Sequence[str], taskset: str, ): device_tool = self.__check_and_push_file( host_tool_dir / "iree-run-module", ANDROID_NORMAL_TOOL_DIR ) cmd = ["taskset", taskset, device_tool] cmd += run_args # Currently only support single output. cmd.append(f'--expected_output=@{expected_outputs_dir / "output_0.npy"}') cmd += verify_params adb_execute(cmd, verbose=self.verbose) def __run_benchmark( self, host_tool_dir: pathlib.Path, benchmark_case: BenchmarkCase, run_args: Sequence[str], results_filename: pathlib.Path, taskset: str, ): tool_name = benchmark_case.benchmark_tool_name device_tool = self.__check_and_push_file( host_tool_dir / tool_name, ANDROID_NORMAL_TOOL_DIR ) cmd = ["taskset", taskset, device_tool] cmd += run_args if tool_name == "iree-benchmark-module": cmd += get_iree_benchmark_module_arguments( driver_info=benchmark_case.driver_info, benchmark_min_time=self.config.benchmark_min_time, ) benchmark_stdout, benchmark_stderr = adb_execute_and_get_output( cmd, verbose=self.verbose ) benchmark_metrics = parse_iree_benchmark_metrics( benchmark_stdout, benchmark_stderr ) if self.verbose: print(benchmark_metrics) results_filename.write_text(json.dumps(benchmark_metrics.to_json_object())) def __run_capture( self, host_tool_dir: pathlib.Path, trace_capture_tool: pathlib.Path, benchmark_case: BenchmarkCase, run_args: Sequence[str], capture_filename: pathlib.Path, taskset: str, ): tool_name = benchmark_case.benchmark_tool_name device_tool = self.__check_and_push_file( host_tool_dir / tool_name, ANDROID_TRACED_TOOL_DIR ) run_cmd = [ "TRACY_NO_EXIT=1", f"IREE_PRESERVE_DYLIB_TEMP_FILES={ANDROID_TMPDIR}", "taskset", taskset, device_tool, ] run_cmd += run_args if tool_name == "iree-benchmark-module": run_cmd += get_iree_benchmark_module_arguments( driver_info=benchmark_case.driver_info, benchmark_min_time=self.config.benchmark_min_time, dump_results=False, capture_mode=True, ) # Just launch the traced benchmark tool with TRACY_NO_EXIT=1 without # waiting for the adb command to complete as that won't happen. process = adb_start_cmd(run_cmd, verbose=self.verbose) wait_for_iree_benchmark_module_start(process, self.verbose) # Now it's okay to collect the trace via the capture tool. This will # send the signal to let the previously waiting benchmark tool to # complete. capture_cmd = [trace_capture_tool, "-f", "-o", capture_filename] # If verbose, just let the subprocess print its output. The subprocess # may need to detect if the output is a TTY to decide whether to log # verbose progress info and use ANSI colors, so it's better to use # stdout redirection than to capture the output in a string. stdout_redirect = None if self.verbose else subprocess.DEVNULL execute_cmd(capture_cmd, verbose=self.verbose, stdout=stdout_redirect) def __deduce_taskset_from_run_config( self, run_config: iree_definitions.E2EModelRunConfig ) -> str: """Deduces the CPU mask according to device and execution config.""" cpu_params = run_config.target_device_spec.device_parameters.cpu_params if not cpu_params: # Assume the mobile CPUs have <= 16 cores. return "ffff" exec_config = run_config.module_execution_config pinned_cores = cpu_params.pinned_cores # Use the fastest cores in the spec for single-thread benchmarks. if ( exec_config.driver == iree_definitions.RuntimeDriver.LOCAL_SYNC or "1-thread" in exec_config.tags ): pinned_cores = pinned_cores[-1:] cpu_mask = sum(1 << core_id for core_id in cpu_params.pinned_cores) return f"{cpu_mask:04x}" def __check_and_push_file( self, host_path: pathlib.Path, device_dir: pathlib.PurePosixPath ): """Checks if the file has been pushed and pushes it if not.""" android_path = self.already_pushed_files.get(host_path) if android_path is not None: return android_path android_path = adb_push_file( host_path, device_dir / host_path.name, verbose=self.verbose, ) self.already_pushed_files[host_path] = android_path return android_path def __fetch_and_unpack_npy(self, url: str, device_dir: pathlib.PurePosixPath): if adb_path_exists(device_dir, verbose=self.verbose): return device_dir archive_path = adb_fetch_and_push_file( source=benchmark_definition.ResourceLocation.build_url(url), dest=device_dir.with_suffix(".tgz"), ) adb_execute( ["mkdir", "-p", str(device_dir)] + ["&&", "tar", "-xvf", str(archive_path), "-C", str(device_dir)], verbose=self.verbose, ) return device_dir def set_cpu_frequency_scaling_governor(governor: str): git_root = execute_cmd_and_get_stdout(["git", "rev-parse", "--show-toplevel"]) cpu_script = ( pathlib.Path(git_root) / "build_tools" / "benchmarks" / "set_android_scaling_governor.sh" ) android_path = adb_push_file(cpu_script, ANDROID_TMPDIR / cpu_script.name) adb_execute_as_root([android_path, governor]) def set_gpu_frequency_scaling_policy(policy: str): git_root = execute_cmd_and_get_stdout(["git", "rev-parse", "--show-toplevel"]) device_model = get_android_device_model() gpu_name = get_android_gpu_name() benchmarks_tool_dir = pathlib.Path(git_root) / "build_tools" / "benchmarks" if device_model == "Pixel-6" or device_model == "Pixel-6-Pro": gpu_script = benchmarks_tool_dir / "set_pixel6_gpu_scaling_policy.sh" elif gpu_name.lower().startswith("adreno"): gpu_script = benchmarks_tool_dir / "set_adreno_gpu_scaling_policy.sh" else: raise RuntimeError( f"Unsupported device '{device_model}' for setting GPU scaling policy" ) android_path = adb_push_file(gpu_script, ANDROID_TMPDIR / gpu_script.name) adb_execute_as_root([android_path, policy]) def add_port_forwarding(port: int, verbose: bool): """Add adb port forwarding.""" execute_cmd_and_get_stdout( ["adb", "forward", f"tcp:{port}", f"tcp:{port}"], verbose=verbose ) atexit.register( execute_cmd_and_get_stdout, ["adb", "forward", "--remove", f"tcp:{port}"], verbose=verbose, ) def main(args): device_info = get_android_device_info(args.verbose) if args.verbose: print(device_info) commit = get_git_commit_hash("HEAD") benchmark_config = BenchmarkConfig.build_from_args(args, commit) benchmark_groups = json.loads(args.execution_benchmark_config.read_text()) run_configs = benchmark_suite_module.get_run_configs_by_target_and_shard( benchmark_groups, args.target_device_name, args.shard_index ) benchmark_suite = BenchmarkSuite.load_from_run_configs( run_configs=run_configs, root_benchmark_dir=benchmark_config.root_benchmark_dir ) benchmark_driver = AndroidBenchmarkDriver( device_info=device_info, benchmark_config=benchmark_config, benchmark_suite=benchmark_suite, benchmark_grace_time=1.0, verbose=args.verbose, ) if args.pin_cpu_freq: set_cpu_frequency_scaling_governor("performance") atexit.register(set_cpu_frequency_scaling_governor, "schedutil") if args.pin_gpu_freq: set_gpu_frequency_scaling_policy("performance") atexit.register(set_gpu_frequency_scaling_policy, "default") # Clear the benchmark directory on the Android device first just in case # there are leftovers from manual or failed runs. adb_execute( ["rm", "-rf", str(ANDROID_TMPDIR), "&&", "mkdir", "-p", str(ANDROID_TMPDIR)], cwd=pathlib.PurePosixPath("/"), verbose=args.verbose, ) if not args.no_clean: # Clear the benchmark directory on the Android device. atexit.register( execute_cmd_and_get_stdout, ["adb", "shell", "rm", "-rf", ANDROID_TMPDIR], verbose=args.verbose, ) # Also clear temporary directory on the host device. atexit.register(shutil.rmtree, args.tmp_dir) trace_capture_config = benchmark_config.trace_capture_config if trace_capture_config: add_port_forwarding(port=ANDROID_TRACY_PORT, verbose=args.verbose) benchmark_driver.run() benchmark_results = benchmark_driver.get_benchmark_results() if args.output is not None: with open(args.output, "w") as f: f.write(benchmark_results.to_json_str()) if args.verbose: print(benchmark_results.commit) print(benchmark_results.benchmarks) if trace_capture_config: # Put all captures in a tarball and remove the original files. with tarfile.open(trace_capture_config.capture_tarball, "w:gz") as tar: for capture_filename in benchmark_driver.get_capture_filenames(): tar.add(capture_filename) benchmark_errors = benchmark_driver.get_benchmark_errors() if benchmark_errors: print("Benchmarking completed with errors", file=sys.stderr) raise RuntimeError(benchmark_errors) if __name__ == "__main__": main(common.common_arguments.Parser().parse_args())