#!/usr/bin/env python3
# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Miscellaneous tool to help work with benchmark suite and benchmark CI."""

import pathlib
import sys

# Add build_tools python dir to the search path.
sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python")))

import argparse
import json
import os
import shlex
import subprocess
from typing import Dict, List, Optional, Sequence
import functools

from e2e_test_artifacts import model_artifacts, iree_artifacts
from e2e_test_framework import serialization
from e2e_test_framework.definitions import iree_definitions

IREE_COMPILER_NAME = "iree-compile"


def _convert_to_cmd_string(cmds: Sequence[str]) -> str:
    if os.name == "nt":
        # list2cmdline is an undocumented method for Windows command lines. Python
        # doesn't provide an official method for quoting Windows command lines and
        # the correct implementation is slightly non-trivial. Use the undocumented
        # method for now and can be rewritten with our own implementation later.
        # See https://learn.microsoft.com/en-us/archive/blogs/twistylittlepassagesallalike/everyone-quotes-command-line-arguments-the-wrong-way
        return subprocess.list2cmdline(cmds)

    return " ".join(shlex.quote(cmd) for cmd in cmds)


def _dump_cmds_of_generation_config(
    gen_config: iree_definitions.ModuleGenerationConfig,
    root_path: pathlib.PurePath = pathlib.PurePath(),
):
    imported_model = gen_config.imported_model
    imported_model_path = iree_artifacts.get_imported_model_path(
        imported_model=imported_model, root_path=root_path
    )
    module_dir_path = iree_artifacts.get_module_dir_path(
        module_generation_config=gen_config, root_path=root_path
    )
    module_path = module_dir_path / iree_artifacts.MODULE_FILENAME
    compile_cmds = [
        IREE_COMPILER_NAME,
        str(imported_model_path),
        "-o",
        str(module_path),
    ]
    compile_cmds += gen_config.materialize_compile_flags(
        module_dir_path=module_dir_path
    )
    compile_cmd_str = _convert_to_cmd_string(compile_cmds)

    if imported_model.import_config.tool == iree_definitions.ImportTool.NONE:
        import_cmd_str = "# (Source model is already in MLIR)"
    else:
        source_model_path = model_artifacts.get_model_path(
            model=imported_model.model, root_path=root_path
        )
        import_cmds = [
            imported_model.import_config.tool.value,
            str(source_model_path),
            "-o",
            str(imported_model_path),
        ]
        import_cmds += imported_model.import_config.materialize_import_flags(
            model=imported_model.model
        )
        import_cmd_str = _convert_to_cmd_string(import_cmds)

    # Insert a blank line after each command to help read with line wrap.
    return ["Compile Module:", compile_cmd_str, "", "Import Model:", import_cmd_str, ""]


def _dump_cmds_from_run_config(
    run_config: iree_definitions.E2EModelRunConfig,
    root_path: pathlib.PurePath = pathlib.PurePath(),
):
    gen_config = run_config.module_generation_config
    module_path = (
        iree_artifacts.get_module_dir_path(
            module_generation_config=gen_config, root_path=root_path
        )
        / iree_artifacts.MODULE_FILENAME
    )

    run_cmds = [run_config.tool.value, f"--module={module_path}"]
    run_cmds += run_config.materialize_run_flags()
    # Insert a blank line after the command to help read with line wrap.
    lines = ["Run Module:", _convert_to_cmd_string(run_cmds), ""]
    lines += _dump_cmds_of_generation_config(gen_config=gen_config, root_path=root_path)
    return lines


def _dump_cmds_handler(
    e2e_test_artifacts_dir: pathlib.Path,
    execution_benchmark_config: Optional[pathlib.Path],
    compilation_benchmark_config: Optional[pathlib.Path],
    benchmark_id: Optional[str],
    **_unused_args,
):
    lines = []

    if execution_benchmark_config is not None:
        benchmark_groups = json.loads(execution_benchmark_config.read_text())
        for target_device, benchmark_group in benchmark_groups.items():
            shard_count = len(benchmark_group["shards"])
            for shard in benchmark_group["shards"]:
                run_configs = serialization.unpack_and_deserialize(
                    data=shard["run_configs"],
                    root_type=List[iree_definitions.E2EModelRunConfig],
                )
                for run_config in run_configs:
                    if (
                        benchmark_id is not None
                        and benchmark_id != run_config.composite_id
                    ):
                        continue

                    lines.append("################")
                    lines.append("")
                    lines.append(f"Execution Benchmark ID: {run_config.composite_id}")
                    lines.append(f"Name: {run_config}")
                    lines.append(f"Target Device: {target_device}")
                    lines.append(f"Shard: {shard['index']} / {shard_count}")
                    lines.append("")
                    lines += _dump_cmds_from_run_config(
                        run_config=run_config, root_path=e2e_test_artifacts_dir
                    )

    if compilation_benchmark_config is not None:
        benchmark_config = json.loads(compilation_benchmark_config.read_text())
        gen_configs = serialization.unpack_and_deserialize(
            data=benchmark_config["generation_configs"],
            root_type=List[iree_definitions.ModuleGenerationConfig],
        )
        for gen_config in gen_configs:
            if benchmark_id is not None and benchmark_id != gen_config.composite_id:
                continue

            lines.append("################")
            lines.append("")
            lines.append(f"Compilation Benchmark ID: {gen_config.composite_id}")
            lines.append(f"Name: {gen_config}")
            lines.append("")
            lines += _dump_cmds_of_generation_config(
                gen_config=gen_config, root_path=e2e_test_artifacts_dir
            )

    print(*lines, sep="\n")


# Represents a benchmark results file with the data already loaded from a JSON file.
class JSONBackedBenchmarkData:
    def __init__(self, source_filepath: pathlib.PurePath, data: Dict):
        if not isinstance(data, dict):
            raise ValueError(
                f"'{source_filepath}' seems not to be a valid benchmark-results-file (No JSON struct as root element)."
            )
        if "commit" not in data:
            raise ValueError(
                f"'{source_filepath}' seems not to be a valid benchmark-results-file ('commit' field not found)."
            )
        if "benchmarks" not in data:
            raise ValueError(
                f"'{source_filepath}' seems not to be a valid benchmark-results-file ('benchmarks' field not found)."
            )

        self.source_filepath: pathlib.PurePath = source_filepath
        self.data: Dict = data

    # Parses a JSON benchmark results file and makes some sanity checks
    @staticmethod
    def load_from_file(filepath: pathlib.Path):
        try:
            data = json.loads(filepath.read_bytes())
        except json.JSONDecodeError as e:
            raise ValueError(f"'{filepath}' seems not to be a valid JSON file: {e.msg}")
        return JSONBackedBenchmarkData(filepath, data)

    # A convenience wrapper around `loadFromFile` that accepts a sequence of paths and returns a sequence of JSONBackedBenchmarkData objects as a generator.
    @staticmethod
    def load_many_from_files(filepaths: Sequence[pathlib.Path]):
        return (
            JSONBackedBenchmarkData.load_from_file(filepath) for filepath in filepaths
        )


# Merges the benchmark results from `right` into `left` and returns the updated `left`
def _merge_two_resultsets(
    left: JSONBackedBenchmarkData, right: JSONBackedBenchmarkData
) -> JSONBackedBenchmarkData:
    if left.data["commit"] != right.data["commit"]:
        raise ValueError(
            f"'{right.source_filepath}' and the previous files are based on different commits ({left.data['commit']} != {right.data['commit']}). Merging not supported."
        )
    left.data["benchmarks"].extend(right.data["benchmarks"])
    return left


def merge_results(benchmark_results: Sequence[JSONBackedBenchmarkData]):
    return functools.reduce(_merge_two_resultsets, benchmark_results)


def _merge_results_handler(
    benchmark_results_files: Sequence[pathlib.Path], **_unused_args
):
    print(
        json.dumps(
            merge_results(
                JSONBackedBenchmarkData.load_many_from_files(benchmark_results_files)
            )
        )
    )


def _parse_arguments() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Miscellaneous tool to help work with benchmark suite and benchmark CI."
    )

    subparser = parser.add_subparsers(
        required=True, title="operation", dest="operation"
    )
    dump_cmds_parser = subparser.add_parser(
        "dump-cmds", help="Dump the commands to compile and run benchmarks manually."
    )
    dump_cmds_parser.add_argument(
        "--e2e_test_artifacts_dir",
        type=pathlib.PurePath,
        default=pathlib.Path(),
        help="E2E test artifacts root path used in the outputs of artifact paths",
    )
    dump_cmds_parser.add_argument(
        "--benchmark_id", type=str, help="Only dump the benchmark with this id"
    )
    dump_cmds_parser.add_argument(
        "--execution_benchmark_config",
        type=pathlib.Path,
        help="Config file exported from export_benchmark_config.py execution",
    )
    dump_cmds_parser.add_argument(
        "--compilation_benchmark_config",
        type=pathlib.Path,
        help="Config file exported from export_benchmark_config.py compilation",
    )
    dump_cmds_parser.set_defaults(handler=_dump_cmds_handler)

    merge_results_parser = subparser.add_parser(
        "merge-results",
        help="Merges the results from multiple benchmark results JSON files into a single JSON structure.",
    )
    merge_results_parser.add_argument(
        "benchmark_results_files",
        type=pathlib.Path,
        nargs="+",
        help="One or more benchmark results JSON file paths",
    )
    merge_results_parser.set_defaults(handler=_merge_results_handler)

    args = parser.parse_args()
    if (
        args.operation == "dump-cmds"
        and args.execution_benchmark_config is None
        and args.compilation_benchmark_config is None
    ):
        parser.error(
            "At least one of --execution_benchmark_config or "
            "--compilation_benchmark_config must be set."
        )

    return args


def main(args: argparse.Namespace):
    args.handler(**vars(args))


if __name__ == "__main__":
    main(_parse_arguments())