# Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
r"""Script to generate a HTML summary comparing IREE and TFLite latencies and memory usage.
Example usage:
python parse_tflite_benchmarks.py \
--iree_version=20220924.276 \
--tflite_version=20220924.162 \
--platform=server \
--input_csv=server_results.csv \
--output_path=/tmp/server_summary.html
"""
import argparse
import pandas as pd
import pathlib
import sys
from datetime import date
# Add build_tools python dir to the search path.
sys.path.insert(0, str(pathlib.Path(__file__).parent / ".." / ".." / "python"))
from reporting.common import html_utils
# Supported platforms.
_PLATFORM_SERVER = "server"
_PLATFORM_MOBILE = "mobile"
# A map of model name to data type.
_MODEL_TO_DATA_TYPE = {
"albert_lite_base_squadv1_1": "fp32",
"albert_lite_base_squadv1_1_fp16": "fp16",
"deeplabv3": "fp32",
"deeplabv3_fp16": "fp16",
"efficientnet_lite0_fp32_2": "fp32",
"efficientnet_lite0_fp32_2_fp16": "fp16",
"efficientnet_lite0_int8_2": "int8",
"inception_v4_299_fp32": "fp32",
"inception_v4_299_fp32_fp16": "fp16",
"inception_v4_299_uint8": "uint8",
"mobilebert-baseline-tf2-quant": "int8",
"mobilebert_float_384_gpu": "fp32",
"mobilebert_float_384_gpu_fp16": "fp16",
"mobilenet_v2_1.0_224": "fp32",
"mobilenet_v2_1.0_224_fp16": "fp16",
"mobilenet_v2_224_1.0_uint8": "uint8",
"person_detect": "int8",
"resnet_v2_101_1_default_1": "fp32",
"resnet_v2_101_1_default_1_fp16": "fp16",
"ssd_mobilenet_v2_static_1.0_int8": "int8",
"ssd_mobilenet_v2_fpnlite_fp32": "fp32",
"ssd_mobilenet_v2_fpnlite_fp32_fp16": "fp16",
"ssd_mobilenet_v2_fpnlite_uint8": "uint8",
}
# Column headers.
_MODEL = "model"
_DATA_TYPE = "data type"
_RUNTIME = "runtime"
_LATENCY = "latency (ms)"
_TASKSET = "taskset"
_MEMORY = "vmhwm (KB)"
_THREADS = "threads"
_CONFIG = "config"
_DRIVER = "driver/delegate"
_TFLITE_CONFIG = "TFLite config"
_IREE_CONFIG = "IREE config"
_IREE_LATENCY = "IREE latency (ms)"
_TFLITE_LATENCY = "TFLite latency (ms)"
_IREE_MEMORY = "IREE vmhwm (kb)"
_TFLITE_MEMORY = "TFLite vmhwm (kb)"
_IREE_VS_TFLITE_LATENCY = "IREE vs TFLite latency"
_IREE_VS_TFLITE_MEMORY = "IREE vs TFLite memory"
_PERF_COLUMNS = [_IREE_VS_TFLITE_LATENCY, _IREE_VS_TFLITE_MEMORY]
_NUMBER_COLUMNS = [_IREE_LATENCY, _TFLITE_LATENCY, _IREE_MEMORY, _TFLITE_MEMORY]
_CONFIG_COLUMNS = [_TFLITE_CONFIG, _IREE_CONFIG]
def get_tflite_model_list(df):
"""Retrieves the list of TFLite models, filtering out duplicates.
The .csv file includes multiple entries of the same model but under a
different configuration (e.g. XNNPack enabled, XNNPack disabled).
"""
df = df.loc[df.runtime == "tflite"]
# Remove rows where the model name ends with `noxnn` since this is a duplicate.
df = df[~df.model.str.endswith("noxnn")]
return df.model.unique()
def get_fastest_result(model, df):
"""Retrieves the lowest latency result from multiple configurations.
Benchmarks are run under different configurations (e.g. number of threads,
Big core, LITTLE core, etc). This method retrieves the fastest configuration
whilst ensuring apples to apples comparisons (e.g. FP16 results are not
considered when the model is FP32).
Args:
model: The model name.
df: The dataframe to filter through.
Returns:
A dataframe containing the lowest latency.
"""
df = df[df.model.str.startswith(model)]
if not model.endswith("fp16"):
df = df[~df[_MODEL].str.endswith("fp16")]
df = df[df[_LATENCY] != 0]
df = df[df[_LATENCY] == df[_LATENCY].min()]
return df.head(1)
def get_tflite_config(model, df):
"""Generates a configuration string from TFLite config variables."""
config = []
if _TASKSET in df.columns:
taskset = df.taskset.iloc[0]
config.append(f"taskset {taskset}")
threads = df.threads.iloc[0]
config.append(f"{threads} threads" if threads > 1 else f"{threads} thread")
config.append("no xnnpack" if model.endswith("noxnn") else "xnnpack")
return ", ".join(config)
def generate_tflite_summary(dataframe):
"""Generates a dataframe containing the fastest TFLite result for each model."""
summary = pd.DataFrame(columns=[_MODEL, _LATENCY, _MEMORY, _CONFIG])
tflite_df = dataframe[dataframe.runtime == "tflite"]
model_list = get_tflite_model_list(dataframe)
for model in model_list:
df = get_fastest_result(model, tflite_df)
if df.empty:
print(f"Warning: TFLite results invalid for {model}.")
continue
latency = df[_LATENCY].iloc[0]
full_model_name = df.model.iloc[0]
memory = df[_MEMORY].iloc[0]
config = get_tflite_config(full_model_name, df)
summary.loc[len(summary)] = [model, latency, memory, config]
return summary
def get_iree_model_list(df):
"""Retrieves the list of IREE models, filtering out duplicates.
The .csv file includes multiple entries of the same model but under a
different configuration (e.g. mmt4d).
"""
df = df.loc[df.runtime == "iree"]
df = df[~df.model.str.endswith("mmt4d")]
df = df[~df.model.str.endswith("padfuse")]
return df.model.unique()
def get_iree_config(model, df):
"""Generates a configuration string from IREE config variables.
The configuration is embedded in the model name.
"""
config = []
if _TASKSET in df.columns:
taskset = df.taskset.iloc[0]
config.append(f"taskset {taskset}")
threads = df.threads.iloc[0]
config.append(f"{threads} threads" if threads > 1 else f"{threads} thread")
if model.endswith("im2col_mmt4d"):
config.append("im2col")
config.append("mmt4d")
elif model.endswith("mmt4d"):
config.append("mmt4d")
elif model.endswith("padfuse"):
config.append("fused pad")
return ", ".join(config)
def generate_iree_summary(dataframe):
"""Generates a dataframe containing the fastest IREE result for each model."""
summary = pd.DataFrame(columns=[_MODEL, _LATENCY, _MEMORY, _CONFIG])
iree_df = dataframe[dataframe.runtime == "iree"]
model_list = get_iree_model_list(dataframe)
for model in model_list:
df = get_fastest_result(model, iree_df)
if df.empty:
print(f"Warning: IREE results invalid for {model}.")
continue
latency = df[_LATENCY].iloc[0]
full_model_name = df.model.iloc[0]
memory = df[_MEMORY].iloc[0]
config = get_iree_config(full_model_name, df)
summary.loc[len(summary)] = [model, latency, memory, config]
return summary
def get_common_html_style(df, title):
"""Returns HTML style attributes common to both server and mobile."""
st = df.style.set_table_styles(html_utils.get_table_css())
st = st.hide(axis="index")
st = st.set_caption(title)
st = st.set_properties(
subset=[_MODEL],
**{
"width": "300px",
"text-align": "left",
},
)
st = st.set_properties(
subset=[_DATA_TYPE],
**{
"width": "100",
"text-align": "center",
},
)
st = st.set_properties(
subset=_NUMBER_COLUMNS,
**{
"width": "100",
"text-align": "right",
},
)
st = st.set_properties(
subset=_PERF_COLUMNS,
**{"width": "150px", "text-align": "right", "color": "#ffffff"},
)
st = st.applymap(html_utils.style_latency, subset=[_IREE_VS_TFLITE_LATENCY])
st = st.applymap(html_utils.style_memory, subset=[_IREE_VS_TFLITE_MEMORY])
return st
def generate_summary(dataframe, title):
"""Generates a table comparing latencies and memory usage between IREE and TFLite.
For each model, retrieves the lowest latency configuration from both IREE and TFLite.
Args:
dataframe: The raw data to summarize.
title: The title of the table.
Returns:
An HTML string containing the summarized report.
"""
summary = pd.DataFrame(
columns=[
_MODEL,
_DATA_TYPE,
_TFLITE_CONFIG,
_IREE_CONFIG,
_TFLITE_LATENCY,
_IREE_LATENCY,
_IREE_VS_TFLITE_LATENCY,
_TFLITE_MEMORY,
_IREE_MEMORY,
_IREE_VS_TFLITE_MEMORY,
]
)
tflite_df = generate_tflite_summary(dataframe)
iree_df = generate_iree_summary(dataframe)
model_list = tflite_df[_MODEL].unique()
for model in model_list:
tflite_results = tflite_df[tflite_df.model == model]
iree_results = iree_df[iree_df.model == model]
if tflite_results.empty:
print(f"Warning: No TFLite results found for model {model}")
continue
if iree_results.empty:
print(f"Warning: No IREE results found for model {model}")
continue
iree_latency = iree_results[_LATENCY].iloc[0]
tflite_latency = tflite_results[_LATENCY].iloc[0]
latency_comparison = html_utils.format_latency_comparison(
iree_latency, tflite_latency
)
iree_memory = iree_results[_MEMORY].iloc[0]
tflite_memory = tflite_results[_MEMORY].iloc[0]
memory_comparison = html_utils.format_memory_comparison(
iree_memory, tflite_memory
)
iree_config = iree_results.config.iloc[0]
tflite_config = tflite_results.config.iloc[0]
summary.loc[len(summary)] = [
model,
_MODEL_TO_DATA_TYPE[model],
tflite_config,
iree_config,
f"{tflite_latency:.1f}",
f"{iree_latency:.1f}",
latency_comparison,
f"{tflite_memory:,.0f}",
f"{iree_memory:,.0f}",
memory_comparison,
]
summary = summary.round(2)
st = get_common_html_style(summary, title)
st = st.set_properties(
subset=_CONFIG_COLUMNS,
**{
"width": "300px",
"text-align": "left",
},
)
return st.to_html().replace("\\n", "
") + "
"
def generate_detail(dataframe, title, platform):
"""Generates a table comparing latencies and memory usage between IREE and TFLite.
The table generated is more detailed than `generate_summary`. It lists latencies
of all IREE configurations, using the fastest TFLite configuration as baseline.
Args:
dataframe: The raw data to summarize.
title: The title of the table.
platform: Either `server` or `mobile`.
Returns:
An HTML string containing the detailed report.
"""
summary = pd.DataFrame(
columns=[
_MODEL,
_DATA_TYPE,
_TFLITE_CONFIG,
_IREE_CONFIG,
_TASKSET,
_THREADS,
_TFLITE_LATENCY,
_IREE_LATENCY,
_IREE_VS_TFLITE_LATENCY,
_TFLITE_MEMORY,
_IREE_MEMORY,
_IREE_VS_TFLITE_MEMORY,
]
)
model_list = get_tflite_model_list(dataframe)
for model in model_list:
df = dataframe[dataframe.model.str.startswith(model)]
# If result does not use FP16, remove FP16 results from dataframe to
# maintain apples-to-apples comparisons.
if not model.endswith("fp16"):
df = df[~df.model.str.endswith("fp16")]
if _TASKSET in df.columns:
tasksets = df.taskset.unique()
else:
tasksets = ["none"]
for taskset in tasksets:
per_taskset_df = df if taskset == "none" else df[df.taskset == taskset]
threads = per_taskset_df.threads.unique()
for thread in threads:
per_thread_df = per_taskset_df[per_taskset_df.threads == thread]
tflite_df = get_fastest_result(
model, per_thread_df[per_thread_df.runtime == "tflite"]
)
if tflite_df.empty:
continue
tflite_latency = tflite_df[_LATENCY].iloc[0]
tflite_memory = tflite_df[_MEMORY].iloc[0]
if tflite_latency == 0 or tflite_memory == 0:
continue
full_model_name = tflite_df.model.iloc[0]
# For TFLite config, we only want to know if XNNPack was used. The other
# configuration settings are covered in other columns.
tflite_config = (
"no xnnpack" if full_model_name.endswith("noxnn") else "xnnpack"
)
iree_df = per_thread_df[per_thread_df.runtime == "iree"]
for _, row in iree_df.iterrows():
iree_config = row[_DRIVER]
model_name = row[_MODEL]
if model_name.endswith("im2col_mmt4d"):
iree_config += ", im2col, mmt4d"
elif model_name.endswith("mmt4d"):
iree_config += ", mmt4d"
elif model_name.endswith("padfuse"):
iree_config += ", fused pad"
iree_latency = row[_LATENCY]
latency_comparison = html_utils.format_latency_comparison(
iree_latency, tflite_latency
)
iree_memory = row[_MEMORY]
memory_comparison = html_utils.format_memory_comparison(
iree_memory, tflite_memory
)
if iree_latency == 0 or iree_memory == 0:
continue
summary.loc[len(summary)] = [
model,
_MODEL_TO_DATA_TYPE[model],
tflite_config,
iree_config,
taskset,
thread,
f"{tflite_latency:.1f}",
f"{iree_latency:.1f}",
latency_comparison,
f"{tflite_memory:,.0f}",
f"{iree_memory:,.0f}",
memory_comparison,
]
summary = summary.round(2)
st = get_common_html_style(summary, title)
st = st.set_properties(
subset=[_TASKSET, _THREADS],
**{
"width": "100",
"text-align": "center",
},
)
st = st.set_properties(
subset=[_TFLITE_CONFIG],
**{
"width": "150px",
"text-align": "left",
},
)
st = st.set_properties(
subset=[_IREE_CONFIG],
**{
"width": "300px",
"text-align": "left",
},
)
if platform != "mobile":
st.hide_columns(subset=[_TASKSET])
return st.to_html().replace("\\n", "
") + "
"
def main(args):
"""Summarizes IREE vs TFLite benchmark results."""
if args.platform == _PLATFORM_SERVER:
cpu_drivers = ["cpu", "local-task"]
gpu_drivers = ["gpu", "cuda"]
else:
cpu_drivers = ["cpu", "local-task"]
gpu_drivers = ["gpu", "vulkan", "adreno"]
version_html = (
f"IREE version: {args.iree_version}
"
f"TFlite version: {args.tflite_version}
"
f"last updated: {date.today().isoformat()}
"
)
html = html_utils.generate_header_and_legend(version_html)
df = pd.read_csv(args.input_csv)
# Generate CPU Summary.
results = df[df[_DRIVER].isin(cpu_drivers)]
html += generate_summary(results, args.platform.capitalize() + " CPU Summary")
# Generate GPU Summary.
results = df[df[_DRIVER].isin(gpu_drivers)]
html += generate_summary(results, args.platform.capitalize() + " GPU Summary")
# Generate CPU Detailed View.
results = df[df[_DRIVER].isin(cpu_drivers)]
html += generate_detail(
results, args.platform.capitalize() + " CPU Detailed", args.platform
)
# Generate GPU Detailed View.
results = df[df[_DRIVER].isin(gpu_drivers)]
html += generate_detail(
results, args.platform.capitalize() + " GPU Detailed", args.platform
)
args.output_path.write_text(html)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--iree_version",
type=str,
default=None,
required=True,
help="The IREE version.",
)
parser.add_argument(
"--tflite_version",
type=str,
default=None,
required=True,
help="The TFLite version.",
)
parser.add_argument(
"--platform",
action="store",
type=str.lower,
help="The platform the models were benchmarked on. Either server or mobile.",
required=True,
choices=[_PLATFORM_SERVER, _PLATFORM_MOBILE],
)
parser.add_argument(
"--input_csv",
type=str,
default=None,
help="The path to the csv file containing benchmark results for both IREE and TFLite.",
)
parser.add_argument(
"--output_path",
type=pathlib.Path,
default="/tmp/summary.html",
help="The path to the output html file that summarizes results.",
)
return parser.parse_args()
if __name__ == "__main__":
main(parse_args())