#!/usr/bin/env python # import sys, json import numpy as np import matplotlib.pyplot as plt from collections import defaultdict def semver2int (semver): if semver == 'trunk': semver = '0.10.0.0' vi = 0 i = 0 for v in reversed(semver.split('.')): vi += int(v) * (i * 10) i += 1 return vi def get_perf_data (perfname, stats): """ Return [labels,x,y,errs] for perfname 'mb_per_sec' as a numpy arrays labels: broker versions x: list with identical value (to plot on same x point) y: perfname counter (average) errs: errors """ ver = defaultdict(list) # Per version: # * accumulate values # * calculate average # * calculate error # Accumulate values per version for x in stats: v = str(x[0]) ver[v].append(x[1][perfname]) print('%s is %s' % (perfname, ver)) labels0 = sorted(ver.keys(), key=semver2int) y0 = list() errs0 = list() # Maintain order by using labels0 for v in labels0: # Calculate average avg = sum(ver[v]) / float(len(ver[v])) y0.append(avg) # Calculate error errs0.append(max(ver[v]) - avg) labels = np.array(labels0) y1 = np.array(y0) x1 = np.array(range(0, len(labels))) errs = np.array(errs0) return [labels,x1,y1,errs] def plot (description, name, stats, perfname, outfile=None): labels,x,y,errs = get_perf_data(perfname, stats) colors = np.random.rand(len(labels)) plt.title('%s: %s %s' % (description, name, perfname)) plt.xlabel('Kafka version') plt.ylabel(perfname) plt.errorbar(x, y, yerr=errs, alpha=0.5) plt.xticks(x, labels, rotation='vertical') plt.margins(0.2) plt.subplots_adjust(bottom=0.2) if outfile is None: plt.show() else: plt.savefig(outfile, bbox_inches='tight') return if __name__ == '__main__': outfile = sys.argv[1] reports = [] for rf in sys.argv[2:]: with open(rf) as f: reports.append(json.load(f)) stats = defaultdict(list) # Extract performance test data for rep in reports: perfs = rep.get('tests', dict()).get('0038_performance', list).get('report', None) if perfs is None: continue for perf in perfs: for n in ['producer','consumer']: o = perf.get(n, None) if o is None: print('no %s in %s' % (n, perf)) continue stats[n].append((rep.get('broker_version', 'unknown'), o)) for t in ['producer','consumer']: for perfname in ['mb_per_sec', 'records_per_sec']: plot('librdkafka 0038_performance test: %s (%d samples)' % \ (outfile, len(reports)), t, stats[t], perfname, outfile='%s_%s_%s.png' % (outfile, t, perfname))