From 1098b2c170ae0c83fd8878c3e2f2adcefe96a79b Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Wed, 28 Jun 2023 19:02:48 +0200
Subject: [PATCH 1/2] Fix issues due to breaking tokenize changes in 3.12
---
IPython/core/inputsplitter.py | 13 +++++-
IPython/core/inputtransformer.py | 7 ++--
IPython/core/inputtransformer2.py | 42 ++++++++++++++++----
IPython/core/tests/test_inputtransformer2.py | 8 +++-
IPython/utils/tests/test_pycolorize.py | 5 ++-
IPython/utils/tokenutil.py | 27 ++++++++++++-
6 files changed, 83 insertions(+), 19 deletions(-)
diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py
index 10707d3d6b6..d33bd5707a9 100644
--- a/IPython/core/inputsplitter.py
+++ b/IPython/core/inputsplitter.py
@@ -44,6 +44,7 @@
assign_from_system,
assemble_python_lines,
)
+from IPython.utils import tokenutil
# These are available in this module for backwards compatibility.
from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
@@ -128,7 +129,7 @@ def partial_tokens(s):
readline = io.StringIO(s).readline
token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
try:
- for token in tokenize.generate_tokens(readline):
+ for token in tokenutil.generate_tokens_catch_errors(readline):
yield token
except tokenize.TokenError as e:
# catch EOF error
@@ -150,9 +151,17 @@ def find_next_indent(code):
tokens.pop()
if not tokens:
return 0
- while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
+
+ while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT, tokenize.ERRORTOKEN}):
tokens.pop()
+ # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
+ # of input. We need to remove those if we're in a multiline statement
+ if tokens[-1].type == IN_MULTILINE_STATEMENT:
+ while tokens[-2].type in {tokenize.NL}:
+ tokens.pop(-2)
+
+
if tokens[-1].type == INCOMPLETE_STRING:
# Inside a multiline string
return 0
diff --git a/IPython/core/inputtransformer.py b/IPython/core/inputtransformer.py
index 77f69f388f8..81cd1fa08c3 100644
--- a/IPython/core/inputtransformer.py
+++ b/IPython/core/inputtransformer.py
@@ -9,10 +9,11 @@
import functools
import re
import tokenize
-from tokenize import generate_tokens, untokenize, TokenError
+from tokenize import untokenize, TokenError
from io import StringIO
from IPython.core.splitinput import LineInfo
+from IPython.utils import tokenutil
#-----------------------------------------------------------------------------
# Globals
@@ -127,7 +128,7 @@ def __init__(self, func):
def reset_tokenizer(self):
it = iter(self.buf)
- self.tokenizer = generate_tokens(it.__next__)
+ self.tokenizer = tokenutil.generate_tokens_catch_errors(it.__next__)
def push(self, line):
self.buf.append(line + '\n')
@@ -295,7 +296,7 @@ def _line_tokens(line):
readline = StringIO(line).readline
toktypes = set()
try:
- for t in generate_tokens(readline):
+ for t in tokenutil.generate_tokens_catch_errors(readline):
toktypes.add(t[0])
except TokenError as e:
# There are only two cases where a TokenError is raised.
diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py
index 37f0e7699c4..7e22e26a028 100644
--- a/IPython/core/inputtransformer2.py
+++ b/IPython/core/inputtransformer2.py
@@ -13,10 +13,13 @@
import ast
from codeop import CommandCompiler, Compile
import re
+import sys
import tokenize
from typing import List, Tuple, Optional, Any
import warnings
+from IPython.utils import tokenutil
+
_indent_re = re.compile(r'^[ \t]+')
def leading_empty_lines(lines):
@@ -269,9 +272,7 @@ def transform(self, lines: List[str]):
class SystemAssign(TokenTransformBase):
"""Transformer for assignments from system commands (a = !foo)"""
@classmethod
- def find(cls, tokens_by_line):
- """Find the first system assignment (a = !foo) in the cell.
- """
+ def find_pre_312(cls, tokens_by_line):
for line in tokens_by_line:
assign_ix = _find_assign_op(line)
if (assign_ix is not None) \
@@ -287,6 +288,25 @@ def find(cls, tokens_by_line):
break
ix += 1
+ @classmethod
+ def find_post_312(cls, tokens_by_line):
+ for line in tokens_by_line:
+ assign_ix = _find_assign_op(line)
+ if (assign_ix is not None) \
+ and not line[assign_ix].line.strip().startswith('=') \
+ and (len(line) >= assign_ix + 2) \
+ and (line[assign_ix + 1].type == tokenize.OP) \
+ and (line[assign_ix + 1].string == '!'):
+ return cls(line[assign_ix + 1].start)
+
+ @classmethod
+ def find(cls, tokens_by_line):
+ """Find the first system assignment (a = !foo) in the cell.
+ """
+ if sys.version_info < (3, 12):
+ return cls.find_pre_312(tokens_by_line)
+ return cls.find_post_312(tokens_by_line)
+
def transform(self, lines: List[str]):
"""Transform a system assignment found by the ``find()`` classmethod.
"""
@@ -511,7 +531,8 @@ def make_tokens_by_line(lines:List[str]):
)
parenlev = 0
try:
- for token in tokenize.generate_tokens(iter(lines).__next__):
+ for token in tokenutil.generate_tokens_catch_errors(iter(lines).__next__,
+ extra_errors_to_catch=['expected EOF']):
tokens_by_line[-1].append(token)
if (token.type == NEWLINE) \
or ((token.type == NL) and (parenlev <= 0)):
@@ -677,9 +698,13 @@ def check_complete(self, cell: str):
if not lines:
return 'complete', None
- if lines[-1].endswith('\\'):
- # Explicit backslash continuation
- return 'incomplete', find_last_indent(lines)
+ for line in reversed(lines):
+ if not line.strip():
+ continue
+ elif line.strip('\n').endswith('\\'):
+ return 'incomplete', find_last_indent(lines)
+ else:
+ break
try:
for transform in self.cleanup_transforms:
@@ -717,7 +742,8 @@ def check_complete(self, cell: str):
if not tokens_by_line:
return 'incomplete', find_last_indent(lines)
- if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
+ if (tokens_by_line[-1][-1].type != tokenize.ENDMARKER
+ and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN):
# We're in a multiline string or expression
return 'incomplete', find_last_indent(lines)
diff --git a/IPython/core/tests/test_inputtransformer2.py b/IPython/core/tests/test_inputtransformer2.py
index cddb32f7138..0792f7ccda7 100644
--- a/IPython/core/tests/test_inputtransformer2.py
+++ b/IPython/core/tests/test_inputtransformer2.py
@@ -297,14 +297,18 @@ def __init__(self, s):
_find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
)
-
+extra_closing_paren_param = (
+ pytest.param("(\n))", "invalid", None)
+ if sys.version_info >= (3, 12)
+ else pytest.param("(\n))", "incomplete", 0)
+)
examples = [
pytest.param("a = 1", "complete", None),
pytest.param("for a in range(5):", "incomplete", 4),
pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8),
pytest.param("raise = 2", "invalid", None),
pytest.param("a = [1,\n2,", "incomplete", 0),
- pytest.param("(\n))", "incomplete", 0),
+ extra_closing_paren_param,
pytest.param("\\\r\n", "incomplete", 0),
pytest.param("a = '''\n hi", "incomplete", 3),
pytest.param("def a():\n x=1\n global x", "invalid", None),
diff --git a/IPython/utils/tests/test_pycolorize.py b/IPython/utils/tests/test_pycolorize.py
index 986b9178800..df2acd035b0 100644
--- a/IPython/utils/tests/test_pycolorize.py
+++ b/IPython/utils/tests/test_pycolorize.py
@@ -18,6 +18,7 @@
#-----------------------------------------------------------------------------
# our own
+import sys
from IPython.utils.PyColorize import Parser
import io
import pytest
@@ -40,7 +41,7 @@ def function(arg, *args, kwarg=True, **kwargs):
False == None
with io.open(ru'unicode', encoding='utf-8'):
- raise ValueError("\n escape \r sequence")
+ raise ValueError("escape \r sequence")
print("wěird ünicoðe")
@@ -64,6 +65,6 @@ def test_parse_sample(style):
def test_parse_error(style):
p = Parser(style=style)
- f1 = p.format(")", "str")
+ f1 = p.format(r"\ " if sys.version_info >= (3, 12) else ")", "str")
if style != "NoColor":
assert "ERROR" in f1
diff --git a/IPython/utils/tokenutil.py b/IPython/utils/tokenutil.py
index 697d2b504a1..c9228dc6121 100644
--- a/IPython/utils/tokenutil.py
+++ b/IPython/utils/tokenutil.py
@@ -21,6 +21,31 @@ def generate_tokens(readline):
# catch EOF error
return
+def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
+ default_errors_to_catch = ['unterminated string literal', 'invalid non-printable character',
+ 'after line continuation character']
+ assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
+ errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
+
+ tokens = []
+ try:
+ for token in tokenize.generate_tokens(readline):
+ tokens.append(token)
+ yield token
+ except tokenize.TokenError as exc:
+ if any(error in exc.args[0] for error in errors_to_catch):
+ if tokens:
+ start = tokens[-1].start[0], tokens[-1].end[0]
+ end = start
+ line = tokens[-1].line
+ else:
+ start = end = (1, 0)
+ line = ''
+ yield tokenize.TokenInfo(tokenize.ERRORTOKEN, '', start, end, line)
+ else:
+ # Catch EOF
+ raise
+
def line_at_cursor(cell, cursor_pos=0):
"""Return the line in a cell at a given cursor position
@@ -123,5 +148,3 @@ def token_at_cursor(cell, cursor_pos=0):
return names[-1]
else:
return ''
-
-
From 124787ee1e64eaa219edaeca19efe1b65bec720f Mon Sep 17 00:00:00 2001
From: Matthias Bussonnier <bussonniermatthias@gmail.com>
Date: Wed, 5 Jul 2023 09:51:55 +0200
Subject: [PATCH 2/2] run formatter
---
IPython/core/inputsplitter.py | 7 ++++-
IPython/core/inputtransformer2.py | 30 +++++++++++---------
IPython/core/tests/test_inputtransformer2.py | 1 +
IPython/utils/tokenutil.py | 13 ++++++---
4 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py
index d33bd5707a9..a4401184bdd 100644
--- a/IPython/core/inputsplitter.py
+++ b/IPython/core/inputsplitter.py
@@ -152,7 +152,12 @@ def find_next_indent(code):
if not tokens:
return 0
- while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT, tokenize.ERRORTOKEN}):
+ while tokens[-1].type in {
+ tokenize.DEDENT,
+ tokenize.NEWLINE,
+ tokenize.COMMENT,
+ tokenize.ERRORTOKEN,
+ }:
tokens.pop()
# Starting in Python 3.12, the tokenize module adds implicit newlines at the end
diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py
index 7e22e26a028..949cf383e27 100644
--- a/IPython/core/inputtransformer2.py
+++ b/IPython/core/inputtransformer2.py
@@ -292,17 +292,18 @@ def find_pre_312(cls, tokens_by_line):
def find_post_312(cls, tokens_by_line):
for line in tokens_by_line:
assign_ix = _find_assign_op(line)
- if (assign_ix is not None) \
- and not line[assign_ix].line.strip().startswith('=') \
- and (len(line) >= assign_ix + 2) \
- and (line[assign_ix + 1].type == tokenize.OP) \
- and (line[assign_ix + 1].string == '!'):
+ if (
+ (assign_ix is not None)
+ and not line[assign_ix].line.strip().startswith("=")
+ and (len(line) >= assign_ix + 2)
+ and (line[assign_ix + 1].type == tokenize.OP)
+ and (line[assign_ix + 1].string == "!")
+ ):
return cls(line[assign_ix + 1].start)
@classmethod
def find(cls, tokens_by_line):
- """Find the first system assignment (a = !foo) in the cell.
- """
+ """Find the first system assignment (a = !foo) in the cell."""
if sys.version_info < (3, 12):
return cls.find_pre_312(tokens_by_line)
return cls.find_post_312(tokens_by_line)
@@ -531,8 +532,9 @@ def make_tokens_by_line(lines:List[str]):
)
parenlev = 0
try:
- for token in tokenutil.generate_tokens_catch_errors(iter(lines).__next__,
- extra_errors_to_catch=['expected EOF']):
+ for token in tokenutil.generate_tokens_catch_errors(
+ iter(lines).__next__, extra_errors_to_catch=["expected EOF"]
+ ):
tokens_by_line[-1].append(token)
if (token.type == NEWLINE) \
or ((token.type == NL) and (parenlev <= 0)):
@@ -701,8 +703,8 @@ def check_complete(self, cell: str):
for line in reversed(lines):
if not line.strip():
continue
- elif line.strip('\n').endswith('\\'):
- return 'incomplete', find_last_indent(lines)
+ elif line.strip("\n").endswith("\\"):
+ return "incomplete", find_last_indent(lines)
else:
break
@@ -742,8 +744,10 @@ def check_complete(self, cell: str):
if not tokens_by_line:
return 'incomplete', find_last_indent(lines)
- if (tokens_by_line[-1][-1].type != tokenize.ENDMARKER
- and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN):
+ if (
+ tokens_by_line[-1][-1].type != tokenize.ENDMARKER
+ and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN
+ ):
# We're in a multiline string or expression
return 'incomplete', find_last_indent(lines)
diff --git a/IPython/core/tests/test_inputtransformer2.py b/IPython/core/tests/test_inputtransformer2.py
index 0792f7ccda7..ec7cb91a9bb 100644
--- a/IPython/core/tests/test_inputtransformer2.py
+++ b/IPython/core/tests/test_inputtransformer2.py
@@ -297,6 +297,7 @@ def __init__(self, s):
_find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
)
+
extra_closing_paren_param = (
pytest.param("(\n))", "invalid", None)
if sys.version_info >= (3, 12)
diff --git a/IPython/utils/tokenutil.py b/IPython/utils/tokenutil.py
index c9228dc6121..5fd8a1fbe1b 100644
--- a/IPython/utils/tokenutil.py
+++ b/IPython/utils/tokenutil.py
@@ -21,9 +21,13 @@ def generate_tokens(readline):
# catch EOF error
return
+
def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
- default_errors_to_catch = ['unterminated string literal', 'invalid non-printable character',
- 'after line continuation character']
+ default_errors_to_catch = [
+ "unterminated string literal",
+ "invalid non-printable character",
+ "after line continuation character",
+ ]
assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
@@ -40,12 +44,13 @@ def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
line = tokens[-1].line
else:
start = end = (1, 0)
- line = ''
- yield tokenize.TokenInfo(tokenize.ERRORTOKEN, '', start, end, line)
+ line = ""
+ yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
else:
# Catch EOF
raise
+
def line_at_cursor(cell, cursor_pos=0):
"""Return the line in a cell at a given cursor position