diff -pruN a/benchtests/scripts/compare_bench.py b/benchtests/scripts/compare_bench.py --- a/benchtests/scripts/compare_bench.py 1970-01-01 05:30:00.000000000 +0530 +++ b/benchtests/scripts/compare_bench.py 2015-05-07 15:32:41.843584024 +0530 @@ -0,0 +1,184 @@ +#!/usr/bin/python +# Copyright (C) 2015 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . +"""Compare two benchmark results + +Given two benchmark result files and a threshold, this script compares the +benchmark results and flags differences in performance beyond a given +threshold. +""" +import sys +import os +import pylab +import import_bench as bench + +def do_compare(func, var, tl1, tl2, par, threshold): + """Compare one of the aggregate measurements + + Helper function to compare one of the aggregate measurements of a function + variant. + + Args: + func: Function name + var: Function variant name + tl1: The first timings list + tl2: The second timings list + par: The aggregate to measure + threshold: The threshold for differences, beyond which the script should + print a warning. + """ + d = abs(tl2[par] - tl1[par]) * 100 / tl1[str(par)] + if d > threshold: + if tl1[par] > tl2[par]: + ind = '+++' + else: + ind = '---' + print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' % + (ind, func, var, par, d, tl1[par], tl2[par])) + + +def compare_runs(pts1, pts2, threshold): + """Compare two benchmark runs + + Args: + pts1: Timing data from first machine + pts2: Timing data from second machine + """ + + # XXX We assume that the two benchmarks have identical functions and + # variants. We cannot compare two benchmarks that may have different + # functions or variants. Maybe that is something for the future. + for func in pts1['functions'].keys(): + for var in pts1['functions'][func].keys(): + tl1 = pts1['functions'][func][var] + tl2 = pts2['functions'][func][var] + + # Compare the consolidated numbers + # do_compare(func, var, tl1, tl2, 'max', threshold) + do_compare(func, var, tl1, tl2, 'min', threshold) + do_compare(func, var, tl1, tl2, 'mean', threshold) + + # Skip over to the next variant or function if there is no detailed + # timing info for the function variant. + if 'timings' not in pts1['functions'][func][var].keys() or \ + 'timings' not in pts2['functions'][func][var].keys(): + continue + + # If two lists do not have the same length then it is likely that + # the performance characteristics of the function have changed. + # XXX: It is also likely that there was some measurement that + # strayed outside the usual range. Such ouiers should not + # happen on an idle machine with identical hardware and + # configuration, but ideal environments are hard to come by. + if len(tl1['timings']) != len(tl2['timings']): + print('* %s(%s): Timing characteristics changed' % + (func, var)) + print('\tBefore: [%s]' % + ', '.join([str(x) for x in tl1['timings']])) + print('\tAfter: [%s]' % + ', '.join([str(x) for x in tl2['timings']])) + continue + + # Collect numbers whose differences cross the threshold we have + # set. + issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \ + if abs(y - x) * 100 / x > threshold] + + # Now print them. + for t1, t2 in issues: + d = abs(t2 - t1) * 100 / t1 + if t2 > t1: + ind = '-' + else: + ind = '+' + + print("%s %s(%s): (%.2lf%%) from %g to %g" % + (ind, func, var, d, t1, t2)) + + +def plot_graphs(bench1, bench2): + """Plot graphs for functions + + Make scatter plots for the functions and their variants. + + Args: + bench1: Set of points from the first machine + bench2: Set of points from the second machine. + """ + for func in bench1['functions'].keys(): + for var in bench1['functions'][func].keys(): + # No point trying to print a graph if there are no detailed + # timings. + if u'timings' not in bench1['functions'][func][var].keys(): + print('Skipping graph for %s(%s)' % (func, var)) + continue + + pylab.clf() + pylab.ylabel('Time (cycles)') + + # First set of points + length = len(bench1['functions'][func][var]['timings']) + X = [float(x) for x in range(length)] + lines = pylab.scatter(X, bench1['functions'][func][var]['timings'], + 1.5 + 100 / length) + pylab.setp(lines, 'color', 'r') + + # Second set of points + length = len(bench2['functions'][func][var]['timings']) + X = [float(x) for x in range(length)] + lines = pylab.scatter(X, bench2['functions'][func][var]['timings'], + 1.5 + 100 / length) + pylab.setp(lines, 'color', 'g') + + if var: + filename = "%s-%s.png" % (func, var) + else: + filename = "%s.png" % func + print('Writing out %s' % filename) + pylab.savefig(filename) + + +def main(args): + """Program Entry Point + + Take two benchmark output files and compare their timings. + """ + if len(args) > 4 or len(args) < 3: + print('Usage: %s [threshold in %%]' % sys.argv[0]) + sys.exit(os.EX_USAGE) + + bench1 = bench.parse_bench(args[1], args[0]) + bench2 = bench.parse_bench(args[2], args[0]) + if len(args) == 4: + threshold = float(args[3]) + else: + threshold = 10.0 + + if (bench1['timing_type'] != bench2['timing_type']): + print('Cannot compare benchmark outputs: timing types are different') + return + + plot_graphs(bench1, bench2) + + bench.compress_timings(bench1) + bench.compress_timings(bench2) + + compare_runs(bench1, bench2, threshold) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff -pruN a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py --- a/benchtests/scripts/import_bench.py 1970-01-01 05:30:00.000000000 +0530 +++ b/benchtests/scripts/import_bench.py 2015-05-07 15:32:41.844584032 +0530 @@ -0,0 +1,141 @@ +#!/usr/bin/python +# Copyright (C) 2015 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . +"""Functions to import benchmark data and process it""" + +import json +try: + import jsonschema as validator +except ImportError: + print('Could not find jsonschema module.') + raise + + +def mean(lst): + """Compute and return mean of numbers in a list + + The numpy average function has horrible performance, so implement our + own mean function. + + Args: + lst: The list of numbers to average. + Return: + The mean of members in the list. + """ + return sum(lst) / len(lst) + + +def split_list(bench, func, var): + """ Split the list into a smaller set of more distinct points + + Group together points such that the difference between the smallest + point and the mean is less than 1/3rd of the mean. This means that + the mean is at most 1.5x the smallest member of that group. + + mean - xmin < mean / 3 + i.e. 2 * mean / 3 < xmin + i.e. mean < 3 * xmin / 2 + + For an evenly distributed group, the largest member will be less than + twice the smallest member of the group. + Derivation: + + An evenly distributed series would be xmin, xmin + d, xmin + 2d... + + mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n + and max element is xmin + (n - 1) * d + + Now, mean < 3 * xmin / 2 + + 3 * xmin > 2 * mean + 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n + 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d + n * xmin > n * (n - 1) * d + xmin > (n - 1) * d + 2 * xmin > xmin + (n-1) * d + 2 * xmin > xmax + + Hence, proved. + + Similarly, it is trivial to prove that for a similar aggregation by using + the maximum element, the maximum element in the group must be at most 4/3 + times the mean. + + Args: + bench: The benchmark object + func: The function name + var: The function variant name + """ + means = [] + lst = bench['functions'][func][var]['timings'] + last = len(lst) - 1 + while lst: + for i in range(last + 1): + avg = mean(lst[i:]) + if avg > 0.75 * lst[last]: + means.insert(0, avg) + lst = lst[:i] + last = i - 1 + break + bench['functions'][func][var]['timings'] = means + + +def do_for_all_timings(bench, callback): + """Call a function for all timing objects for each function and its + variants. + + Args: + bench: The benchmark object + callback: The callback function + """ + for func in bench['functions'].keys(): + for k in bench['functions'][func].keys(): + if 'timings' not in bench['functions'][func][k].keys(): + continue + + callback(bench, func, k) + + +def compress_timings(points): + """Club points with close enough values into a single mean value + + See split_list for details on how the clubbing is done. + + Args: + points: The set of points. + """ + do_for_all_timings(points, split_list) + + +def parse_bench(filename, schema_filename): + """Parse the input file + + Parse and validate the json file containing the benchmark outputs. Return + the resulting object. + Args: + filename: Name of the benchmark output file. + Return: + The bench dictionary. + """ + with open(schema_filename, 'r') as schemafile: + schema = json.load(schemafile) + with open(filename, 'r') as benchfile: + bench = json.load(benchfile) + validator.validate(bench, schema) + do_for_all_timings(bench, lambda b, f, v: + b['functions'][f][v]['timings'].sort()) + return bench diff -pruN a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py --- a/benchtests/scripts/validate_benchout.py 2015-05-07 11:58:40.000000000 +0530 +++ b/benchtests/scripts/validate_benchout.py 2015-05-07 15:32:41.844584032 +0530 @@ -27,37 +27,26 @@ import sys import os try: - import jsonschema + import import_bench as bench except ImportError: - print('Could not find jsonschema module. Output not validated.') + print('Import Error: Output will not be validated.') # Return success because we don't want the bench target to fail just # because the jsonschema module was not found. sys.exit(os.EX_OK) -def validate_bench(benchfile, schemafile): - """Validate benchmark file - - Validate a benchmark output file against a JSON schema. +def print_and_exit(message, exitcode): + """Prints message to stderr and returns the exit code. Args: - benchfile: The file name of the bench.out file. - schemafile: The file name of the JSON schema file to validate - bench.out against. + message: The message to print + exitcode: The exit code to return - Exceptions: - jsonschema.ValidationError: When bench.out is not valid - jsonschema.SchemaError: When the JSON schema is not valid - IOError: If any of the files are not found. + Returns: + The passed exit code """ - with open(benchfile, 'r') as bfile: - with open(schemafile, 'r') as sfile: - bench = json.load(bfile) - schema = json.load(sfile) - jsonschema.validate(bench, schema) - - # If we reach here, we're all good. - print("Benchmark output in %s is valid." % benchfile) + print(message, file=sys.stderr) + return exitcode def main(args): @@ -73,11 +62,23 @@ def main(args): Exceptions thrown by validate_bench """ if len(args) != 2: - print("Usage: %s " % sys.argv[0], - file=sys.stderr) - return os.EX_USAGE + return print_and_exit("Usage: %s " + % sys.argv[0], os.EX_USAGE) + + try: + bench.parse_bench(args[0], args[1]) + except IOError as e: + return print_and_exit("IOError(%d): %s" % (e.errno, e.strerror), + os.EX_OSFILE) + + except bench.validator.ValidationError as e: + return print_and_exit("Invalid benchmark output: %s" % e.message, + os.EX_DATAERR) + + except bench.validator.SchemaError as e: + return print_and_exit("Invalid schema: %s" % e.message, os.EX_DATAERR) - validate_bench(args[0], args[1]) + print("Benchmark output in %s is valid." % args[0]) return os.EX_OK