diff -r cd083843b67a pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/interpreter/pycode.py Wed Jan 05 16:14:35 2011 -0500 @@ -14,6 +14,7 @@ from pypy.interpreter.astcompiler.consts import (CO_OPTIMIZED, CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS, CO_NESTED, CO_GENERATOR, CO_CONTAINSGLOBALS) +from pypy.interpreter.pytraceback import offset2lineno from pypy.rlib.rarithmetic import intmask from pypy.rlib.debug import make_sure_not_resized from pypy.rlib import jit @@ -81,6 +82,7 @@ self.hidden_applevel = hidden_applevel self.magic = magic self._signature = cpython_code_signature(self) + self._cached_source = None self._initialize() def _initialize(self): @@ -403,3 +405,25 @@ def repr(self, space): return space.wrap(self.get_repr()) repr.unwrap_spec = ['self', ObjSpace] + + def get_linenum_for_offset(self, offset): + # Given a bytecode offset, return a 1-based index into the lines of the + # source code + return offset2lineno(self, offset) + + def _ensure_source(self): + # Lazily grab the source lines into self._cached_source (or raise + # an IOError) + if not self._cached_source: + f = open(self.co_filename, 'r') + source = [line.rstrip() for line in f.readlines()] + f.close() + self._cached_source = source + + def get_source_text(self, linenum): + # Given a 1-based index, get the corresponding line of source code (or + # raise an IOError) + self._ensure_source() + return self._cached_source[linenum - 1] + + diff -r cd083843b67a pypy/objspace/flow/model.py --- a/pypy/objspace/flow/model.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/objspace/flow/model.py Wed Jan 05 16:14:35 2011 -0500 @@ -31,6 +31,120 @@ __metaclass__ = type +class SourceLoc(object): + # A srcloc is a specific location within the RPython source code, + # intended for human display + __slots__ = ('code', # code object + 'linenum' # 1-based index, as displayed to a user + ) + def __init__(self, code, linenum): + self.code = code + self.linenum = linenum + + def get_text(self): + # Get the actual source text of this line + return self.code.get_source_text(self.linenum) + + def __eq__(self, other): + return self.code == other.code and self.linenum == other.linenum + + def __ne__(self, other): + if other: + return self.code != other.code or self.linenum != other.linenum + else: + return True + +class CodeLoc(object): + # A codeloc is a specific location within the RPython bytecode + __slots__ = ('code', # code object + 'offset' # int index into bytecode, or -1 + ) + + def __init__(self, code, offset): + self.code = code + self.offset = offset + + def __str__(self): + if self.offset >= 0: + return "%s@%d" % (self.code.co_name, self.offset) + else: + return "" + + def __ne__(self, other): + if other: + return self.code != other.code or self.offset != other.offset + else: + return True + + def __cmp__(self, other): + # Partial ordering, for those locations that have an offset: + if other: + if self.offset >= 0 and other.offset >= 0: + return self.offset - other.offset + return 0 + + def get_source_loc(self): + # Convert to a SourceLoc: + return SourceLoc(self.code, self.code.get_linenum_for_offset(self.offset)) + +class OperationLoc(object): + # An oploc is the location within the RPython source code of a given + # operation + # + # This is a list consisting of CodeLoc instances, some of which may be None + # + # For the simple case, this is list of length 1 with a single CodeLoc + # + # For an operation inside an inlined callsite, we have a list of length 2: + # [codeloc of callsite, + # codeloc of operation within inlined body] + # + # For more interesting inlined cases, we have a chain of source locations: + # [codeloc of callsite, + # codeloc of inner callsite, + # ... , + # codeloc of innermost inlined callsite, + # codeloc of operation within inlined body] + # + + __slots__ = ('codelocs', ) + + def __init__(self, codelocs): + self.codelocs = codelocs + + def __str__(self): + return '[' + ' > '.join(str(codeloc) for codeloc in self.codelocs) + ']' + + def __cmp__(self, other): + return cmp(self.codelocs, other.codelocs) + +def block_comparator(blk0, blk1): + ''' + Sort function for blocks, putting them in an ordering that attempts to + maximize readability of the generated C code + ''' + # print 'comparing %r and %r' % (blk0, blk1) + # Put the start/end block at the top/bottom: + if blk0.isstartblock: + return -1 + + if blk1.isstartblock: + return 1 + + # Order blocks by the offset, where present: + if blk0.operations: + if blk1.operations: + return cmp(blk0.operations[0].oploc, blk1.operations[0].oploc) + else: + return -1 + else: + if blk1.operations: + return 1 + else: + return 0 + +def edge_comparator(edge0, edge1): + return block_comparator(edge0.target, edge1.target) class FunctionGraph(object): __slots__ = ['startblock', 'returnblock', 'exceptblock', '__dict__'] @@ -94,6 +208,21 @@ seen[block] = True stack += block.exits[::-1] + def iterblocks_by_source(self): + # Try to preserve logical source ordering in the blocks + block = self.startblock + yield block + seen = {block: True} + stack = list(block.exits[::-1]) + stack.sort(edge_comparator) + while stack: + block = stack.pop().target + if block not in seen: + yield block + seen[block] = True + stack += block.exits[::-1] + stack.sort(edge_comparator) + def iterlinks(self): block = self.startblock seen = {block: True} @@ -183,14 +312,14 @@ self.exits = [] # list of Link(s) def at(self): - if self.operations and self.operations[0].offset >= 0: - return "@%d" % self.operations[0].offset + if self.operations: + return str(self.operations[0].oploc) else: return "" def __str__(self): if self.operations: - txt = "block@%d" % self.operations[0].offset + txt = "block%s" % self.operations[0].oploc else: if (not self.exits) and len(self.inputargs) == 1: txt = "return block" @@ -245,6 +374,21 @@ from pypy.translator.tool.graphpage import try_show try_show(self) + def isreturnblock(self): + return (not self.operations) and (not self.exits) and len(self.inputargs) == 1 + + def get_base_label(self, blocknum): + # Generate a more friendly C label for this block + if self.operations: + txt = "block" + elif (not self.exits) and len(self.inputargs) == 1: + txt = "return_block" + elif (not self.exits) and len(self.inputargs) == 2: + txt = "raise_block" + else: + txt = "codeless_block" + return '%s%d' % (txt, blocknum) + class Variable(object): __slots__ = ["_name", "_nr", "concretetype"] @@ -331,13 +475,15 @@ class SpaceOperation(object): - __slots__ = "opname args result offset".split() + __slots__ = "opname args result oploc".split() - def __init__(self, opname, args, result, offset=-1): + def __init__(self, opname, args, result, oploc=None): self.opname = intern(opname) # operation name self.args = list(args) # mixed list of var/const self.result = result # either Variable or Constant instance - self.offset = offset # offset in code string + if oploc is None: + oploc = OperationLoc([None]) + self.oploc = oploc def __eq__(self, other): return (self.__class__ is other.__class__ and @@ -352,8 +498,9 @@ return hash((self.opname,tuple(self.args),self.result)) def __repr__(self): - return "%r = %s(%s)" % (self.result, self.opname, - ", ".join(map(repr, self.args))) + return "%r = %s(%s) (%s)" % (self.result, self.opname, + ", ".join(map(repr, self.args)), + self.oploc) class Atom(object): def __init__(self, name): @@ -448,8 +595,7 @@ for op in oplist: copyop = SpaceOperation(op.opname, [copyvar(v) for v in op.args], - copyvar(op.result), op.offset) - #copyop.offset = op.offset + copyvar(op.result), op.oploc) result.append(copyop) return result newblock.operations = copyoplist(block.operations) diff -r cd083843b67a pypy/objspace/flow/objspace.py --- a/pypy/objspace/flow/objspace.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/objspace/flow/objspace.py Wed Jan 05 16:14:35 2011 -0500 @@ -310,7 +310,9 @@ def do_operation(self, name, *args_w): spaceop = SpaceOperation(name, args_w, Variable()) if hasattr(self, 'executioncontext'): # not here during bootstrapping - spaceop.offset = self.executioncontext.crnt_offset + codeloc = CodeLoc(self.executioncontext.code, + self.executioncontext.crnt_offset) + spaceop.oploc = OperationLoc([codeloc]) self.executioncontext.recorder.append(spaceop) return spaceop.result diff -r cd083843b67a pypy/objspace/flow/test/test_model.py --- a/pypy/objspace/flow/test/test_model.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/objspace/flow/test/test_model.py Wed Jan 05 16:14:35 2011 -0500 @@ -132,3 +132,25 @@ assert v2.renamed assert v2.name.startswith("foobar_") and v2.name != v.name assert v2.name.split('_', 1)[1].isdigit() + +def test_source_locations(): + # Invent some random offsets into the code: + co = sample_function.__code__ + codelocA = CodeLoc(co, 42) + codelocB = CodeLoc(co, 87) + + assert str(codelocA) == 'sample_function@42' + assert str(codelocB) == 'sample_function@87' + + assert cmp(codelocA, codelocB) < 0 + assert cmp(codelocB, codelocA) > 0 + + oplocA = OperationLoc([codelocA]) + oplocB = OperationLoc([codelocB]) + + assert str(oplocA) == '[sample_function@42]' + assert str(oplocB) == '[sample_function@87]' + + assert cmp(oplocA, oplocB) < 0 + assert cmp(oplocB, oplocA) > 0 + diff -r cd083843b67a pypy/rpython/rtyper.py --- a/pypy/rpython/rtyper.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/rpython/rtyper.py Wed Jan 05 16:14:35 2011 -0500 @@ -800,7 +800,7 @@ return vars def genop(self, opname, args_v, resulttype=None): - return self.llops.genop(opname, args_v, resulttype) + return self.llops.genop(opname, args_v, resulttype, self.spaceop.oploc) def gendirectcall(self, ll_function, *args_v): return self.llops.gendirectcall(ll_function, *args_v) @@ -935,7 +935,7 @@ v.concretetype)) return v - def genop(self, opname, args_v, resulttype=None): + def genop(self, opname, args_v, resulttype=None, oploc=None): try: for v in args_v: v.concretetype @@ -944,7 +944,7 @@ " and pass its result to genop()," " never hop.args_v directly.") vresult = Variable() - self.append(SpaceOperation(opname, args_v, vresult)) + self.append(SpaceOperation(opname, args_v, vresult, oploc)) if resulttype is None: vresult.concretetype = Void return None diff -r cd083843b67a pypy/translator/backendopt/inline.py --- a/pypy/translator/backendopt/inline.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/translator/backendopt/inline.py Wed Jan 05 16:14:35 2011 -0500 @@ -4,6 +4,7 @@ from pypy.translator.unsimplify import copyvar from pypy.objspace.flow.model import Variable, Constant, Block, Link from pypy.objspace.flow.model import SpaceOperation, c_last_exception +from pypy.objspace.flow.model import OperationLoc from pypy.objspace.flow.model import FunctionGraph from pypy.objspace.flow.model import traverse, mkentrymap, checkgraph from pypy.annotation import model as annmodel @@ -231,6 +232,7 @@ self.varmap = {} self._copied_blocks = {} self.op = block.operations[index_operation] + self.callsite_oploc = self.op.oploc self.graph_to_inline = self.get_graph_from_op(self.op) self.exception_guarded = False if (block.exitswitch == c_last_exception and @@ -297,7 +299,9 @@ def copy_operation(self, op): args = [self.get_new_name(arg) for arg in op.args] - result = SpaceOperation(op.opname, args, self.get_new_name(op.result)) + new_oploc = OperationLoc(self.callsite_oploc.codelocs[:] + op.oploc.codelocs[:]) + result = SpaceOperation(op.opname, args, self.get_new_name(op.result), + new_oploc) return result def copy_block(self, block): diff -r cd083843b67a pypy/translator/c/funcgen.py --- a/pypy/translator/c/funcgen.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/translator/c/funcgen.py Wed Jan 05 16:14:35 2011 -0500 @@ -1,4 +1,6 @@ import sys +import inspect +import dis from pypy.translator.c.support import USESLOTS # set to False if necessary while refactoring from pypy.translator.c.support import cdecl from pypy.translator.c.support import llvalue_from_constant, gen_assignments @@ -22,6 +24,38 @@ KEEP_INLINED_GRAPHS = False +def block_comparator(blk0, blk1): + ''' + Sort function for blocks, putting them in an ordering that attempts to + maximize readability of the generated C code + ''' + # print 'comparing %r and %r' % (blk0, blk1) + # Put the start/end block at the top/bottom: + if blk0.isstartblock: + return -1 + + if blk1.isstartblock: + return 1 + + # Order blocks by the offset, where present: + if blk0.operations: + if blk1.operations: + return cmp(blk0.operations[0].oploc, blk1.operations[0].oploc) + else: + return -1 + else: + if blk1.operations: + return 1 + else: + return 0 + +def escape_c_comments(py_src): + # Escape C comments within RPython source, to avoid generating bogus + # comments in our generated C source: + py_src = py_src.replace('/*', '') + py_src = py_src.replace('*/', '') + return py_src + class FunctionCodeGenerator(object): """ Collects information about a function which we have to generate @@ -210,14 +244,57 @@ def cfunction_body(self): graph = self.graph - yield 'goto block0;' # to avoid a warning "this label is not used" + # Try to print python source code: + if hasattr(graph, 'func'): + filename = inspect.getfile(graph.func) + #yield '/* name: %r */' % filename + try: + src, startline = inspect.getsourcelines(graph.func) + except IOError: + pass # No source found + except IndexError: + pass # Bulletproofing + else: + yield '/* Python source %r' % filename + for i, line in enumerate(src): + line = line.rstrip() + line = escape_c_comments(line) + # FuncNode.funcgen_implementation treats lines ending in ':' + # as C blocks, which messes up the formatting. + # Work around this: + if line.endswith(':'): + line += ' ' + yield ' * %4d : %s' % (startline + i, line) + yield ' */' + + label = graph.startblock.get_base_label(self.blocknum[graph.startblock]) + yield 'goto %s;' % label # to avoid a warning "this label is not used" + + # Sort the blocks into a (hopefully) readable order: + blocks = list(graph.iterblocks_by_source()) + blocks.sort(block_comparator) # generate the body of each block - for block in graph.iterblocks(): + for block in blocks: + cursrcloc = None myblocknum = self.blocknum[block] yield '' - yield 'block%d:' % myblocknum + yield '%s:' % block.get_base_label(myblocknum) + #yield "/* repr(block): %r */" % (block, ) + #yield "/* type(block): %r */" % (type(block), ) for i, op in enumerate(block.operations): + #yield "/* type(op): %r */" % (type(op), ) + #yield "/* op.oploc: %s */" % (op.oploc, ) + codeloc = op.oploc.codelocs[-1] + if codeloc: + srcloc = codeloc.get_source_loc() + if srcloc != cursrcloc: + try: + yield "/* %s:%d : %s */" % (codeloc.code.co_name, srcloc.linenum, escape_c_comments(srcloc.get_text())) + cursrcloc = srcloc + except IOError: + pass + for line in self.gen_op(op): yield line if len(block.exits) == 0: @@ -310,7 +387,7 @@ assignments.append((a2typename, dest, src)) for line in gen_assignments(assignments): yield line - label = 'block%d' % self.blocknum[link.target] + label = link.target.get_base_label(self.blocknum[link.target]) if link.target in self.innerloops: loop = self.innerloops[link.target] if link is loop.links[-1]: # link that ends a loop diff -r cd083843b67a pypy/translator/c/test/test_genc.py --- a/pypy/translator/c/test/test_genc.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/translator/c/test/test_genc.py Wed Jan 05 16:14:35 2011 -0500 @@ -1,4 +1,5 @@ import autopath, sys, os, py +import re from pypy.rpython.lltypesystem.lltype import * from pypy.annotation import model as annmodel from pypy.translator.translator import TranslationContext @@ -498,3 +499,130 @@ else: assert 0, "the call was not found in the C source" assert 'PYPY_INHIBIT_TAIL_CALL();' in lines[i+1] + +def get_generated_c_source(fn, types): + # Return a (optimized fn, c source code, c source filename) 3-tuple + t = Translation(fn) + t.annotate(types) + c_filename_path = t.source_c() + h = c_filename_path.open() + src = h.read() + h.close() + c_fn = t.compile_c() + return (c_fn, src, c_filename_path) + +def extract_c_function(c_src, fname): + # Extract the source for a given C function out of a the given src string + # Makes assumptions about the layout of the source + pattern = '^(.+) \**%s\(.*\) {$' % fname + within_fn = False + result = '' + for line in c_src.splitlines(): + if within_fn: + result += line + '\n' + if line.startswith('}'): + return result + else: + m = re.match(pattern, line) + if m: + within_fn = True + result += line + '\n' + return result + + + +def test_generated_c_source(): + # Verify that generate C source "looks good" + # We'll use is_perfect_number, as it contains a loop and a conditional + + # Generate C source code + from pypy.translator.test.snippet import is_perfect_number + c_fn, c_src, c_filename_path = get_generated_c_source(is_perfect_number, + [int]) + + # Locate the C source for the type-specialized function: + c_fn_src = extract_c_function(c_src, 'pypy_g_is_perfect_number') + + # Verify that the C source contains embedded comments containing the lines + # of the python source: + expected_comment_lines = [ + '/* is_perfect_number:31 : while div < n: */', + '/* is_perfect_number:32 : if n % div == 0: */', + '/* is_perfect_number:33 : sum += div */', + '/* is_perfect_number:34 : div += 1 */', + '/* is_perfect_number:35 : return n == sum */'] + for exp_line in expected_comment_lines: + assert exp_line in c_fn_src + + # Verify that the lines occur in the correct order + # ...we do this by filtering the function's generated C source to just + # those lines containing our comments (and dropping whitespace): + lines = c_fn_src.splitlines() + lines = [line.strip() + for line in lines + if '/* is_perfect_number:' in line] + + # ...we should now have exact equality: the ordering should be as expected, + # and each comment should appear exactly once: + assert lines == expected_comment_lines + + # Ensure that the generated C function does the right thing: + assert c_fn(5) == False + assert c_fn(6) == True + assert c_fn(7) == False + + assert c_fn(5.0) == False + assert c_fn(6.0) == True + assert c_fn(7.0) == False + + assert c_fn(5L) == False + assert c_fn(6L) == True + assert c_fn(7L) == False + + try: + c_fn('hello world') + except: + pass + else: + raise 'Was expected exception' + +def test_escaping_c_comments(): + # Ensure that c comments within RPython code get escaped when we generate + # our .c code (to avoid generating bogus C) + # See e.g. pypy.module.cpyext.dictobject's PyDict_Next, which has a + # docstring embedding a C comment + def c_style_comment(a, b): + '''Here is a C-style comment within an RPython docstring: + /* hello world */ + ''' + # and here's one in a string literal: + return '/* hello world a:%s b:%s */' % (a, b) + + def cplusplus_style_comment(a, b): + '''Here is a C++-style comment within an RPython docstring: + // hello world + ''' + # and here are some in string literals, and one as the floor division + # operator: + return '// hello world: a // b = %s' % (a // b) + + for fn_name, exp_output in [('c_style_comment', + '/* hello world a:6 b:3 */'), + ('cplusplus_style_comment', + '// hello world: a // b = 2')]: + fn = locals()[fn_name] + + c_fn, c_src, c_filename_path = get_generated_c_source(fn, [int, int]) + # If the above survived, then the C compiler managed to handle + # the generated C code + + # Verify that the generated code works (i.e. that we didn't + # accidentally change the meaning): + assert c_fn(6, 3) == exp_output + + # Ensure that at least part of the docstrings made it into the C + # code: + c_fn_src = extract_c_function(c_src, 'pypy_g_' + fn_name) + assert 'Here is a ' in c_fn_src + assert 'style comment within an RPython docstring' in c_fn_src + diff -r cd083843b67a pypy/translator/driver.py --- a/pypy/translator/driver.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/translator/driver.py Wed Jan 05 16:14:35 2011 -0500 @@ -539,6 +539,7 @@ dstname = self.compute_exe_name() + '.staticdata.info' shutil.copy(str(fname), str(dstname)) self.log.info('Static data info written to %s' % dstname) + return c_source_filename # task_source_c = taskdef(task_source_c, ['database_c'], "Generating c source") diff -r cd083843b67a pypy/translator/gensupp.py --- a/pypy/translator/gensupp.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/translator/gensupp.py Wed Jan 05 16:14:35 2011 -0500 @@ -16,8 +16,8 @@ def visit(block): if isinstance(block, Block): # first we order by offset in the code string - if block.operations: - ofs = block.operations[0].offset + if block.operations and block.operations[0].oploc.codelocs[0]: + ofs = block.operations[0].oploc.codelocs[0].offset else: ofs = sys.maxint # then we order by input variable name or value diff -r cd083843b67a pypy/translator/interactive.py --- a/pypy/translator/interactive.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/translator/interactive.py Wed Jan 05 16:14:35 2011 -0500 @@ -138,7 +138,7 @@ def source_c(self, argtypes=None, **kwds): self.update_options(argtypes, kwds) self.ensure_backend('c') - self.driver.source_c() + return self.driver.source_c() def source_cl(self, argtypes=None, **kwds): self.update_options(argtypes, kwds) diff -r cd083843b67a pypy/translator/llsupport/wrapper.py --- a/pypy/translator/llsupport/wrapper.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/translator/llsupport/wrapper.py Wed Jan 05 16:14:35 2011 -0500 @@ -59,6 +59,8 @@ # "return result" block = Block(wrapper_inputargs) wgraph = FunctionGraph('pyfn_' + (newname or func.func_name), block) + if hasattr(graph, 'func'): + wgraph.func = graph.func translator.update_call_graph(wgraph, graph, object()) translator.graphs.append(wgraph) block.operations[:] = newops diff -r cd083843b67a pypy/translator/simplify.py --- a/pypy/translator/simplify.py Mon Dec 20 17:17:45 2010 +0100 +++ b/pypy/translator/simplify.py Wed Jan 05 16:14:35 2011 -0500 @@ -294,7 +294,7 @@ return renaming.get(v, v) def rename_op(op): args = [rename(a) for a in op.args] - op = SpaceOperation(op.opname, args, rename(op.result), op.offset) + op = SpaceOperation(op.opname, args, rename(op.result), op.oploc) # special case... if op.opname == 'indirect_call': if isinstance(op.args[0], Constant):