import numpy as np import cftime_rs import cftime import time import matplotlib.pyplot as plt from typing import Tuple, List import os ITERATIONS = [1, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000, 500000, 1000000] UNITS = "hours since 2000-01-01 00:00:00" CALENDAR = "julian" def performance_comparison_chart( cftime_rs_time: List[float], cftime_time: List[float], title: str, output_file: str ) -> None: """ Generate a bar chart to compare the performance of cftime_rs and cftime. """ # Calculate performance improvement percentage performance_improvement = [ ((cftime_time - cftime_rs_time) / cftime_rs_time) * 100 for cftime_time, cftime_rs_time in zip(cftime_times, cftime_rs_times) ] fig, ax1 = plt.subplots(figsize=(10, 6)) x_ticks = np.linspace(min(ITERATIONS), max(ITERATIONS), len(ITERATIONS)) width = ITERATIONS[-1] / len(ITERATIONS) / 2 ax1.bar( x_ticks - width / 2, cftime_rs_times, width=width, label="cftime_rs", color="royalblue", alpha=0.7, ) ax1.bar( x_ticks + width / 2, cftime_times, width=width, label="cftime", color="orange", alpha=0.7, ) ax1.set_xlabel("Number of Iterations") ax1.set_ylabel("Execution time (seconds)") ax1.set_xticks(x_ticks, [str(int(x)) for x in ITERATIONS]) ax1.set_title(title) ax1.set_xticks(x_ticks) ax1.grid(axis="y", linestyle="--", alpha=0.7) # Create the secondary y-axis for performance improvement ax2 = ax1.twinx() ax2.plot( x_ticks, performance_improvement, marker="o", color="green", label="Performance Improvement (%)", alpha=0.4, ) ax2.set_ylabel("Performance Improvement (%)") ax1.legend(loc="upper left") ax2.legend(loc="upper right") plt.savefig(os.path.join(os.path.dirname(__file__), output_file)) def cftime_rs_benchmark(arr: np.array) -> float: cftime_rs_start = time.time() datetimes = cftime_rs.num2date(arr, UNITS, CALENDAR) for datetime in datetimes: datetime.__str__() _ = cftime_rs.date2num(datetimes, UNITS, CALENDAR, dtype="int") cftime_rs_end = time.time() return cftime_rs_end - cftime_rs_start def cftime_benchmark(arr: np.array) -> float: cftime_start = time.time() datetimes = cftime.num2date(arr, UNITS, CALENDAR) for datetime in datetimes: datetime.__str__() _ = cftime.date2num(datetimes, UNITS, CALENDAR) cftime_end = time.time() return cftime_end - cftime_start def get_data_with_str() -> Tuple[List[float], List[float]]: cftime_rs_times = [] cftime_times = [] print("cftime_rs_benchmark_with_str") for n in ITERATIONS: print(f"Number of Iterations with str: {n}") arr = np.array(range(n)) cftime_rs_duration = cftime_rs_benchmark(arr) cftime_rs_times.append(cftime_rs_duration) cftime_duration = cftime_benchmark(arr) cftime_times.append(cftime_duration) print(f"cftime_rs : {cftime_rs_duration:.4f} seconds") print(f"cftime : {cftime_duration:.4f} seconds") return cftime_rs_times, cftime_times def cftime_rs_benchmark_without_str(arr: np.array) -> float: cftime_rs_start = time.time() datetimes = cftime_rs.num2date(arr, UNITS, CALENDAR) _ = cftime_rs.date2num(datetimes, UNITS, CALENDAR, dtype="int") cftime_rs_end = time.time() return cftime_rs_end - cftime_rs_start def cftime_benchmark_without_str(arr: np.array) -> float: cftime_start = time.time() datetimes = cftime.num2date(arr, UNITS, CALENDAR) _ = cftime.date2num(datetimes, UNITS, CALENDAR) cftime_end = time.time() return cftime_end - cftime_start def get_data_without_str() -> Tuple[List[float], List[float]]: cftime_rs_times = [] cftime_times = [] for n in ITERATIONS: print("cftime_rs_benchmark_without_str") print(f"Number of Iterations with str: {n}") arr = np.array(range(n)) cftime_rs_duration = cftime_rs_benchmark_without_str(arr) cftime_rs_times.append(cftime_rs_duration) cftime_duration = cftime_benchmark_without_str(arr) cftime_times.append(cftime_duration) print(f"cftime_rs : {cftime_rs_duration:.4f} seconds") print(f"cftime : {cftime_duration:.4f} seconds") return cftime_rs_times, cftime_times def cftime_rs_benchmark_pydatetime_without_str(arr: np.array) -> float: cftime_rs_start = time.time() datetimes = cftime_rs.num2pydate(arr, UNITS, CALENDAR) _ = cftime_rs.pydate2num(datetimes, UNITS, CALENDAR, dtype="int") cftime_rs_end = time.time() return cftime_rs_end - cftime_rs_start def cftime_benchmark_pydatetime_without_str(arr: np.array) -> float: cftime_start = time.time() datetimes = cftime.num2date(arr, UNITS, CALENDAR, only_use_python_datetimes=True) _ = cftime.date2num(datetimes, UNITS, CALENDAR) cftime_end = time.time() return cftime_end - cftime_start def get_data_pydatetime_without_str() -> Tuple[List[float], List[float]]: cftime_rs_times = [] cftime_times = [] print("cftime_rs_benchmark_pydatetime_without_str") for n in ITERATIONS: print(f"Number of Iterations with str: {n}") arr = np.array(range(n)) cftime_rs_duration = cftime_rs_benchmark_without_str(arr) cftime_rs_times.append(cftime_rs_duration) cftime_duration = cftime_benchmark_without_str(arr) cftime_times.append(cftime_duration) print(f"cftime_rs : {cftime_rs_duration:.4f} seconds") print(f"cftime : {cftime_duration:.4f} seconds") return cftime_rs_times, cftime_times if __name__ == "__main__": cftime_rs_times, cftime_times = get_data_with_str() performance_comparison_chart( cftime_rs_times, cftime_times, title="Performance Comparison: cftime_rs vs. cftime.\nDecoding, calling __str__ and encoding. \nLower is better", output_file="performance_comparison_with_str.png", ) cftime_rs_times, cftime_times = get_data_without_str() performance_comparison_chart( cftime_rs_times, cftime_times, title="Performance Comparison: cftime_rs vs. cftime./Decoding and encoding. \nLower is better", output_file="performance_comparison_without_str.png", ) cftime_rs_times, cftime_times = get_data_pydatetime_without_str() performance_comparison_chart( cftime_rs_times, cftime_times, title="Performance Comparison: cftime_rs vs. cftime.\nDecoding and encoding by using python datetime \nLower is better", output_file="performance_comparison_pydatetime_without_str.png", )