import json import sys import math import matplotlib import matplotlib.pyplot as plt import numpy as np from matplotlib.ticker import MultipleLocator #matplotlib.use('tkagg') def median(lst): n = len(lst) s = sorted(lst) return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None def graph_blocks_vs_time(dataset): for data in dataset.values(): times = [x[1] for x in data] block_counts = [x[2] + x[3] + x[4] + x[5] for x in data] plt.plot(block_counts, times, 'o') plt.show() #raise NotImplementedError def graph_funcs_vs_time(dataset): func_count = [] total_times = [] for data in dataset.values(): func_count.append(len(data)) total_time = sum([x[1] for x in data]) total_times.append(total_time) plt.plot(func_count, total_times, 'o') plt.show() def get_data(filenames): dataset = {} for filename in filenames: with open(filename) as f: data = json.load(f) dataset[filename] = data return dataset def get_aggregate_data(dataset): aggregate_data = [] #print(len(dataset.keys())) for name,data in dataset.items(): name = name.split('/')[-1].split(".")[0] times = [x[2] + x[3] + x[4] + x[5] + x[6] for x in data] average_t = sum(times) / len(times) median_t = median(times) max_t = max(times) min_t = min(times) total_t = sum(times) num_funcs = len(times) N = len(times) // 100 print("top 1% = ", N, " functions out of", len(times)) top_n = sorted(data, key=lambda x: (x[2] + x[3] + x[4] + x[5] + x[6]), reverse = True)[:N] top_percent = sum([x[2] + x[3] + x[4] + x[5] + x[6] for x in top_n]) / total_t top_percent_medians = median([x[1] for x in top_n]) cfg_percent = sum([x[2] for x in data]) / total_t stack_percent = sum([x[3] for x in data]) / total_t heap_percent = sum([x[4] for x in data]) / total_t call_percent = sum([x[5] for x in data]) / total_t locals_percent = sum([x[6] for x in data]) / total_t print(top_n, top_percent) median_blocks = median([x[1] for x in data]) aggregate_data.append( (name,average_t,median_t,max_t,min_t,num_funcs,total_t,top_percent, cfg_percent, stack_percent, heap_percent, call_percent, locals_percent, median_blocks, top_percent_medians)) return aggregate_data def generate_summary_table(aggregate_data): names_row = " &" average_row = "Average Function Validation Time (s) & " median_row = "Median Function Validation Time (s) & " max_row = "Max Function Validation Time (s) & " min_row = "Min Function Validation Time (s) & " num_funcs_row = "\\# Functions in Module & " total_row = "Total Validation Time (s) & " #for name,average_t,median_t,max_t,min_t in aggregate_data: names_row += " & ".join([str(d[0]) for d in aggregate_data]) + "\\\\" average_row += " & ".join([str(round(d[1],2)) for d in aggregate_data]) + "\\\\" median_row += " & ".join([str(round(d[2],2)) for d in aggregate_data]) + "\\\\" max_row += " & ".join([str(round(d[3],2)) for d in aggregate_data]) + "\\\\" min_row += " & ".join([str(round(d[4],2)) for d in aggregate_data]) + "\\\\" num_funcs_row += " & ".join([str(round(d[5],2)) for d in aggregate_data]) + "\\\\" total_row += " & ".join([str(round(d[6],2)) for d in aggregate_data]) + "\\\\" table_str = "\n".join([names_row, average_row, median_row, max_row, min_row, num_funcs_row, total_row]) + "\n" return table_str #print out some quick statistics def summarise_data(aggregate_data): medians = [round(d[2],2) for d in aggregate_data] maxes = [round(d[3],2) for d in aggregate_data] num_funcs = [round(d[5],2) for d in aggregate_data] times = [round(d[6],2) for d in aggregate_data] one_percent = [d[7] for d in aggregate_data] cfg_percent = [d[8] for d in aggregate_data] stack_percent = [d[9] for d in aggregate_data] heap_percent = [d[10] for d in aggregate_data] call_percent = [d[11] for d in aggregate_data] locals_percent = [d[12] for d in aggregate_data] median_blocks = [d[13] for d in aggregate_data] top_percent_median_blocks = [d[14] for d in aggregate_data] #print(averages) #medians = [round(d[2],2) for d in aggregate_data] #print(medians) print(f"Number of binaries = {len(times)}") print(f"Median function validation time: {median(medians)}") num_above_min = len([time for time in maxes if time > 60.0]) print(f"Number of binariess with a function that took > 1 minute to validate: {num_above_min}") print(f"Top 1% of functions account for (on average) {sum(one_percent) / len(one_percent) * 100}% of total execution time") print(f"{sum(cfg_percent) / len(one_percent) * 100}% of verification time spent making CFGs") print(f"{sum(stack_percent) / len(stack_percent) * 100}% of verification time spent checking stack") print(f"{sum(heap_percent) / len(heap_percent) * 100}% of verification time spent checking heap") print(f"{sum(call_percent) / len(call_percent) * 100}% of verification time spent checking calls") print(f"{sum(locals_percent) / len(locals_percent) * 100}% of verification time spent checking locals") print(f"Average Time = {sum(times) / len(times)}") #print(f"Average Max function Time = {sum(maxes) / len(maxes)}") print(f"Min Validation Time: {min(times)}") print(f"Max Validation Time: {max(times)}") print(f"Median Validation Time = {median(times)}") print(f"Min Functions: {min(num_funcs)}") print(f"Max Functions: {max(num_funcs)}") print(f"Median Functions: {median(num_funcs)}") print(f"Median of Median of blocks in modules: {median(median_blocks)}") print(f"Median of Median of blocks in top 1% of functions in modules: {median(top_percent_median_blocks)}") fig, ax = plt.subplots() ax.xaxis.set_minor_locator(MultipleLocator(5)) plt.xlabel('Module Validation Time (s)') plt.ylabel('# of Modules') plt.hist(times, bins= math.ceil((max(times) - min(times))/5) ) print("Histogram Created") plt.savefig("performance.pdf") print("Histogram Saved") def run(filenames): dataset = get_data(filenames) #graph_blocks_vs_time(dataset) #graph_funcs_vs_time(dataset) aggregate_data = get_aggregate_data(dataset) summarise_data(aggregate_data) table = generate_summary_table(aggregate_data) print(table) def main(): filename = sys.argv[1] print(sys.argv) filenames = sys.argv[1:] run(filenames) if __name__ == "__main__": main()