Blob Blame History Raw
From 1098b2c170ae0c83fd8878c3e2f2adcefe96a79b Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Wed, 28 Jun 2023 19:02:48 +0200
Subject: [PATCH 1/2] Fix issues due to breaking tokenize changes in 3.12

---
 IPython/core/inputsplitter.py                | 13 +++++-
 IPython/core/inputtransformer.py             |  7 ++--
 IPython/core/inputtransformer2.py            | 42 ++++++++++++++++----
 IPython/core/tests/test_inputtransformer2.py |  8 +++-
 IPython/utils/tests/test_pycolorize.py       |  5 ++-
 IPython/utils/tokenutil.py                   | 27 ++++++++++++-
 6 files changed, 83 insertions(+), 19 deletions(-)

diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py
index 10707d3d6b6..d33bd5707a9 100644
--- a/IPython/core/inputsplitter.py
+++ b/IPython/core/inputsplitter.py
@@ -44,6 +44,7 @@
                                            assign_from_system,
                                            assemble_python_lines,
                                            )
+from IPython.utils import tokenutil
 
 # These are available in this module for backwards compatibility.
 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
@@ -128,7 +129,7 @@ def partial_tokens(s):
     readline = io.StringIO(s).readline
     token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
     try:
-        for token in tokenize.generate_tokens(readline):
+        for token in tokenutil.generate_tokens_catch_errors(readline):
             yield token
     except tokenize.TokenError as e:
         # catch EOF error
@@ -150,9 +151,17 @@ def find_next_indent(code):
         tokens.pop()
     if not tokens:
         return 0
-    while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
+
+    while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT, tokenize.ERRORTOKEN}):
         tokens.pop()
 
+    # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
+    # of input. We need to remove those if we're in a multiline statement
+    if tokens[-1].type == IN_MULTILINE_STATEMENT:
+        while tokens[-2].type in {tokenize.NL}:
+            tokens.pop(-2)
+
+
     if tokens[-1].type == INCOMPLETE_STRING:
         # Inside a multiline string
         return 0
diff --git a/IPython/core/inputtransformer.py b/IPython/core/inputtransformer.py
index 77f69f388f8..81cd1fa08c3 100644
--- a/IPython/core/inputtransformer.py
+++ b/IPython/core/inputtransformer.py
@@ -9,10 +9,11 @@
 import functools
 import re
 import tokenize
-from tokenize import generate_tokens, untokenize, TokenError
+from tokenize import untokenize, TokenError
 from io import StringIO
 
 from IPython.core.splitinput import LineInfo
+from IPython.utils import tokenutil
 
 #-----------------------------------------------------------------------------
 # Globals
@@ -127,7 +128,7 @@ def __init__(self, func):
 
     def reset_tokenizer(self):
         it = iter(self.buf)
-        self.tokenizer = generate_tokens(it.__next__)
+        self.tokenizer = tokenutil.generate_tokens_catch_errors(it.__next__)
 
     def push(self, line):
         self.buf.append(line + '\n')
@@ -295,7 +296,7 @@ def _line_tokens(line):
     readline = StringIO(line).readline
     toktypes = set()
     try:
-        for t in generate_tokens(readline):
+        for t in tokenutil.generate_tokens_catch_errors(readline):
             toktypes.add(t[0])
     except TokenError as e:
         # There are only two cases where a TokenError is raised.
diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py
index 37f0e7699c4..7e22e26a028 100644
--- a/IPython/core/inputtransformer2.py
+++ b/IPython/core/inputtransformer2.py
@@ -13,10 +13,13 @@
 import ast
 from codeop import CommandCompiler, Compile
 import re
+import sys
 import tokenize
 from typing import List, Tuple, Optional, Any
 import warnings
 
+from IPython.utils import tokenutil
+
 _indent_re = re.compile(r'^[ \t]+')
 
 def leading_empty_lines(lines):
@@ -269,9 +272,7 @@ def transform(self, lines: List[str]):
 class SystemAssign(TokenTransformBase):
     """Transformer for assignments from system commands (a = !foo)"""
     @classmethod
-    def find(cls, tokens_by_line):
-        """Find the first system assignment (a = !foo) in the cell.
-        """
+    def find_pre_312(cls, tokens_by_line):
         for line in tokens_by_line:
             assign_ix = _find_assign_op(line)
             if (assign_ix is not None) \
@@ -287,6 +288,25 @@ def find(cls, tokens_by_line):
                         break
                     ix += 1
 
+    @classmethod
+    def find_post_312(cls, tokens_by_line):
+        for line in tokens_by_line:
+            assign_ix = _find_assign_op(line)
+            if (assign_ix is not None) \
+                    and not line[assign_ix].line.strip().startswith('=') \
+                    and (len(line) >= assign_ix + 2) \
+                    and (line[assign_ix + 1].type == tokenize.OP) \
+                    and (line[assign_ix + 1].string == '!'):
+                return cls(line[assign_ix + 1].start)
+
+    @classmethod
+    def find(cls, tokens_by_line):
+        """Find the first system assignment (a = !foo) in the cell.
+        """
+        if sys.version_info < (3, 12):
+            return cls.find_pre_312(tokens_by_line)
+        return cls.find_post_312(tokens_by_line)
+
     def transform(self, lines: List[str]):
         """Transform a system assignment found by the ``find()`` classmethod.
         """
@@ -511,7 +531,8 @@ def make_tokens_by_line(lines:List[str]):
         )
     parenlev = 0
     try:
-        for token in tokenize.generate_tokens(iter(lines).__next__):
+        for token in tokenutil.generate_tokens_catch_errors(iter(lines).__next__,
+                                                            extra_errors_to_catch=['expected EOF']):
             tokens_by_line[-1].append(token)
             if (token.type == NEWLINE) \
                     or ((token.type == NL) and (parenlev <= 0)):
@@ -677,9 +698,13 @@ def check_complete(self, cell: str):
         if not lines:
             return 'complete', None
 
-        if lines[-1].endswith('\\'):
-            # Explicit backslash continuation
-            return 'incomplete', find_last_indent(lines)
+        for line in reversed(lines):
+            if not line.strip():
+                continue
+            elif line.strip('\n').endswith('\\'):
+                return 'incomplete', find_last_indent(lines)
+            else:
+                break
 
         try:
             for transform in self.cleanup_transforms:
@@ -717,7 +742,8 @@ def check_complete(self, cell: str):
         if not tokens_by_line:
             return 'incomplete', find_last_indent(lines)
 
-        if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
+        if (tokens_by_line[-1][-1].type != tokenize.ENDMARKER
+                and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN):
             # We're in a multiline string or expression
             return 'incomplete', find_last_indent(lines)
 
diff --git a/IPython/core/tests/test_inputtransformer2.py b/IPython/core/tests/test_inputtransformer2.py
index cddb32f7138..0792f7ccda7 100644
--- a/IPython/core/tests/test_inputtransformer2.py
+++ b/IPython/core/tests/test_inputtransformer2.py
@@ -297,14 +297,18 @@ def __init__(self, s):
         _find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
     )
 
-
+extra_closing_paren_param = (
+    pytest.param("(\n))", "invalid", None)
+    if sys.version_info >= (3, 12)
+    else pytest.param("(\n))", "incomplete", 0)
+)
 examples = [
     pytest.param("a = 1", "complete", None),
     pytest.param("for a in range(5):", "incomplete", 4),
     pytest.param("for a in range(5):\n    if a > 0:", "incomplete", 8),
     pytest.param("raise = 2", "invalid", None),
     pytest.param("a = [1,\n2,", "incomplete", 0),
-    pytest.param("(\n))", "incomplete", 0),
+    extra_closing_paren_param,
     pytest.param("\\\r\n", "incomplete", 0),
     pytest.param("a = '''\n   hi", "incomplete", 3),
     pytest.param("def a():\n x=1\n global x", "invalid", None),
diff --git a/IPython/utils/tests/test_pycolorize.py b/IPython/utils/tests/test_pycolorize.py
index 986b9178800..df2acd035b0 100644
--- a/IPython/utils/tests/test_pycolorize.py
+++ b/IPython/utils/tests/test_pycolorize.py
@@ -18,6 +18,7 @@
 #-----------------------------------------------------------------------------
 
 # our own
+import sys
 from IPython.utils.PyColorize import Parser
 import io
 import pytest
@@ -40,7 +41,7 @@ def function(arg, *args, kwarg=True, **kwargs):
     False == None
 
     with io.open(ru'unicode', encoding='utf-8'):
-        raise ValueError("\n escape \r sequence")
+        raise ValueError("escape \r sequence")
 
     print("wěird ünicoðe")
 
@@ -64,6 +65,6 @@ def test_parse_sample(style):
 
 def test_parse_error(style):
     p = Parser(style=style)
-    f1 = p.format(")", "str")
+    f1 = p.format(r"\ " if sys.version_info >= (3, 12) else ")", "str")
     if style != "NoColor":
         assert "ERROR" in f1
diff --git a/IPython/utils/tokenutil.py b/IPython/utils/tokenutil.py
index 697d2b504a1..c9228dc6121 100644
--- a/IPython/utils/tokenutil.py
+++ b/IPython/utils/tokenutil.py
@@ -21,6 +21,31 @@ def generate_tokens(readline):
         # catch EOF error
         return
 
+def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
+    default_errors_to_catch = ['unterminated string literal', 'invalid non-printable character',
+                               'after line continuation character']
+    assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
+    errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
+
+    tokens = []
+    try:
+        for token in tokenize.generate_tokens(readline):
+            tokens.append(token)
+            yield token
+    except tokenize.TokenError as exc:
+        if any(error in exc.args[0] for error in errors_to_catch):
+            if tokens:
+                start = tokens[-1].start[0], tokens[-1].end[0]
+                end = start
+                line = tokens[-1].line
+            else:
+                start = end = (1, 0)
+                line = ''
+            yield tokenize.TokenInfo(tokenize.ERRORTOKEN, '', start, end, line)
+        else:
+            # Catch EOF
+            raise
+
 def line_at_cursor(cell, cursor_pos=0):
     """Return the line in a cell at a given cursor position
 
@@ -123,5 +148,3 @@ def token_at_cursor(cell, cursor_pos=0):
         return names[-1]
     else:
         return ''
-    
-

From 124787ee1e64eaa219edaeca19efe1b65bec720f Mon Sep 17 00:00:00 2001
From: Matthias Bussonnier <bussonniermatthias@gmail.com>
Date: Wed, 5 Jul 2023 09:51:55 +0200
Subject: [PATCH 2/2] run formatter

---
 IPython/core/inputsplitter.py                |  7 ++++-
 IPython/core/inputtransformer2.py            | 30 +++++++++++---------
 IPython/core/tests/test_inputtransformer2.py |  1 +
 IPython/utils/tokenutil.py                   | 13 ++++++---
 4 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py
index d33bd5707a9..a4401184bdd 100644
--- a/IPython/core/inputsplitter.py
+++ b/IPython/core/inputsplitter.py
@@ -152,7 +152,12 @@ def find_next_indent(code):
     if not tokens:
         return 0
 
-    while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT, tokenize.ERRORTOKEN}):
+    while tokens[-1].type in {
+        tokenize.DEDENT,
+        tokenize.NEWLINE,
+        tokenize.COMMENT,
+        tokenize.ERRORTOKEN,
+    }:
         tokens.pop()
 
     # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py
index 7e22e26a028..949cf383e27 100644
--- a/IPython/core/inputtransformer2.py
+++ b/IPython/core/inputtransformer2.py
@@ -292,17 +292,18 @@ def find_pre_312(cls, tokens_by_line):
     def find_post_312(cls, tokens_by_line):
         for line in tokens_by_line:
             assign_ix = _find_assign_op(line)
-            if (assign_ix is not None) \
-                    and not line[assign_ix].line.strip().startswith('=') \
-                    and (len(line) >= assign_ix + 2) \
-                    and (line[assign_ix + 1].type == tokenize.OP) \
-                    and (line[assign_ix + 1].string == '!'):
+            if (
+                (assign_ix is not None)
+                and not line[assign_ix].line.strip().startswith("=")
+                and (len(line) >= assign_ix + 2)
+                and (line[assign_ix + 1].type == tokenize.OP)
+                and (line[assign_ix + 1].string == "!")
+            ):
                 return cls(line[assign_ix + 1].start)
 
     @classmethod
     def find(cls, tokens_by_line):
-        """Find the first system assignment (a = !foo) in the cell.
-        """
+        """Find the first system assignment (a = !foo) in the cell."""
         if sys.version_info < (3, 12):
             return cls.find_pre_312(tokens_by_line)
         return cls.find_post_312(tokens_by_line)
@@ -531,8 +532,9 @@ def make_tokens_by_line(lines:List[str]):
         )
     parenlev = 0
     try:
-        for token in tokenutil.generate_tokens_catch_errors(iter(lines).__next__,
-                                                            extra_errors_to_catch=['expected EOF']):
+        for token in tokenutil.generate_tokens_catch_errors(
+            iter(lines).__next__, extra_errors_to_catch=["expected EOF"]
+        ):
             tokens_by_line[-1].append(token)
             if (token.type == NEWLINE) \
                     or ((token.type == NL) and (parenlev <= 0)):
@@ -701,8 +703,8 @@ def check_complete(self, cell: str):
         for line in reversed(lines):
             if not line.strip():
                 continue
-            elif line.strip('\n').endswith('\\'):
-                return 'incomplete', find_last_indent(lines)
+            elif line.strip("\n").endswith("\\"):
+                return "incomplete", find_last_indent(lines)
             else:
                 break
 
@@ -742,8 +744,10 @@ def check_complete(self, cell: str):
         if not tokens_by_line:
             return 'incomplete', find_last_indent(lines)
 
-        if (tokens_by_line[-1][-1].type != tokenize.ENDMARKER
-                and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN):
+        if (
+            tokens_by_line[-1][-1].type != tokenize.ENDMARKER
+            and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN
+        ):
             # We're in a multiline string or expression
             return 'incomplete', find_last_indent(lines)
 
diff --git a/IPython/core/tests/test_inputtransformer2.py b/IPython/core/tests/test_inputtransformer2.py
index 0792f7ccda7..ec7cb91a9bb 100644
--- a/IPython/core/tests/test_inputtransformer2.py
+++ b/IPython/core/tests/test_inputtransformer2.py
@@ -297,6 +297,7 @@ def __init__(self, s):
         _find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
     )
 
+
 extra_closing_paren_param = (
     pytest.param("(\n))", "invalid", None)
     if sys.version_info >= (3, 12)
diff --git a/IPython/utils/tokenutil.py b/IPython/utils/tokenutil.py
index c9228dc6121..5fd8a1fbe1b 100644
--- a/IPython/utils/tokenutil.py
+++ b/IPython/utils/tokenutil.py
@@ -21,9 +21,13 @@ def generate_tokens(readline):
         # catch EOF error
         return
 
+
 def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
-    default_errors_to_catch = ['unterminated string literal', 'invalid non-printable character',
-                               'after line continuation character']
+    default_errors_to_catch = [
+        "unterminated string literal",
+        "invalid non-printable character",
+        "after line continuation character",
+    ]
     assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
     errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
 
@@ -40,12 +44,13 @@ def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
                 line = tokens[-1].line
             else:
                 start = end = (1, 0)
-                line = ''
-            yield tokenize.TokenInfo(tokenize.ERRORTOKEN, '', start, end, line)
+                line = ""
+            yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
         else:
             # Catch EOF
             raise
 
+
 def line_at_cursor(cell, cursor_pos=0):
     """Return the line in a cell at a given cursor position