Blob Blame History Raw
diff -up texlive-base-20200327/source/texk/texlive/linked_scripts/pygmentex/pygmentex.py.10 texlive-base-20200327/source/texk/texlive/linked_scripts/pygmentex/pygmentex.py
--- texlive-base-20200327/source/texk/texlive/linked_scripts/pygmentex/pygmentex.py.10	2014-08-20 17:53:18.000000000 -0400
+++ texlive-base-20200327/source/texk/texlive/linked_scripts/pygmentex/pygmentex.py	2021-03-18 14:34:36.879392389 -0400
@@ -1,4 +1,4 @@
-#! /usr/bin/env python2
+#! /usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """
@@ -8,11 +8,11 @@
     PygmenTeX is a converter that do syntax highlighting of snippets of
     source code extracted from a LaTeX file.
 
-    :copyright: Copyright 2014 by José Romildo Malaquias
+    :copyright: Copyright 2020 by José Romildo Malaquias
     :license: BSD, see LICENSE for details
 """
 
-__version__ = '0.8'
+__version__ = '0.10'
 __docformat__ = 'restructuredtext'
 
 import sys
@@ -27,6 +27,7 @@ from pygments.formatters.latex import La
 from pygments.util import get_bool_opt, get_int_opt
 from pygments.lexer import Lexer
 from pygments.token import Token
+from pygments.util import guess_decode
 
 ###################################################
 # The following code is in >=pygments-2.0
@@ -56,24 +57,24 @@ class EnhancedLatexFormatter(LatexFormat
             realoutfile = outfile
             outfile = StringIO()
 
-        outfile.write(u'\\begin{Verbatim}[commandchars=\\\\\\{\\}')
+        outfile.write(r'\begin{Verbatim}[commandchars=\\\{\}')
         if self.linenos:
             start, step = self.linenostart, self.linenostep
-            outfile.write(u',numbers=left' +
-                          (start and u',firstnumber=%d' % start or u'') +
-                          (step and u',stepnumber=%d' % step or u''))
+            outfile.write(',numbers=left' +
+                          (start and ',firstnumber=%d' % start or '') +
+                          (step and ',stepnumber=%d' % step or ''))
         if self.mathescape or self.texcomments or self.escapeinside:
-            outfile.write(u',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}')
+            outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}')
         if self.verboptions:
-            outfile.write(u',' + self.verboptions)
-        outfile.write(u']\n')
+            outfile.write(',' + self.verboptions)
+        outfile.write(']\n')
 
         for ttype, value in tokensource:
             if ttype in Token.Comment:
                 if self.texcomments:
                     # Try to guess comment starting lexeme and escape it ...
                     start = value[0:1]
-                    for i in xrange(1, len(value)):
+                    for i in range(1, len(value)):
                         if start[0] != value[i]:
                             break
                         start += value[i]
@@ -129,7 +130,7 @@ class EnhancedLatexFormatter(LatexFormat
             else:
                 outfile.write(value)
 
-        outfile.write(u'\\end{Verbatim}\n')
+        outfile.write('\\end{Verbatim}\n')
 
         if self.full:
             realoutfile.write(DOC_TEMPLATE %
@@ -232,7 +233,7 @@ DISPLAY_LINENOS_SNIPPET_TEMPLATE = r'''
 '''
 
 
-def pyg(outfile, n, opts, extra_opts, text, usedstyles, inline_delim = ''):
+def pyg(outfile, outencoding, n, opts, extra_opts, text, usedstyles, inline_delim = ''):
     try:
         lexer = get_lexer_by_name(opts['lang'])
     except ClassNotFound as err:
@@ -260,27 +261,8 @@ def pyg(outfile, n, opts, extra_opts, te
     if tabsize:
         lexer.tabsize = tabsize
 
-    encoding = opts['encoding']
-    if encoding == 'guess':
-        try:
-            import chardet
-        except ImportError:
-            try:
-                text = text.decode('utf-8')
-                if text.startswith(u'\ufeff'):
-                    text = text[len(u'\ufeff'):]
-                    encoding = 'utf-8'
-            except UnicodeDecodeError:
-                text = text.decode('latin1')
-                encoding = 'latin1'
-        else:
-            encoding = chardet.detect(text)['encoding']
-            text = text.decode(encoding)
-    else:
-        text = text.decode(encoding)
-
     lexer.encoding = ''
-    _fmter.encoding = encoding
+    # _fmter.encoding = outencoding
 
     stylename = opts['sty']
 
@@ -367,7 +349,7 @@ _re_input = re.compile(
     r'^<@@pygmented@input@(\d+)\n(.*)\n([\s\S]*?)\n>@@pygmented@input@\1$',
     re.MULTILINE)
 
-def convert(code, outfile):
+def convert(code, outfile, outencoding):
     """
     Convert ``code``
     """
@@ -393,6 +375,7 @@ def convert(code, outfile):
         m = _re_inline.match(code, pos)
         if m:
             pyg(outfile,
+                outencoding,
                 m.group(1),
                 parse_opts(opts.copy(), m.group(2)),
                 '',
@@ -405,6 +388,7 @@ def convert(code, outfile):
         m = _re_display.match(code, pos)
         if m:
             pyg(outfile,
+                outencoding,
                 m.group(1),
                 parse_opts(opts.copy(), m.group(2)),
                 '',
@@ -415,15 +399,16 @@ def convert(code, outfile):
 
         m = _re_input.match(code, pos)
         if m:
+            opts_new = parse_opts(opts, m.group(2))
             try:
-                filecontents = open(m.group(3), 'rb').read()
+                filecontents, inencoding = read_input(m.group(3), opts_new['encoding'])
             except Exception as err:
-                sys.stderr.write('Error: cannot read input file: ')
-                sys.stderr.write(str(err))
+                print('Error: cannot read input file: ', err, file=sys.stderr)
             else:
                 pyg(outfile,
+                    outencoding,
                     m.group(1),
-                    parse_opts(opts, m.group(2)),
+                    opts_new,
                     "",
                     filecontents,
                     usedstyles)
@@ -435,6 +420,16 @@ def convert(code, outfile):
 
     outfile.write(GENERIC_DEFINITIONS_2)
 
+def read_input(filename, encoding):
+    with open(filename, 'rb') as infp:
+        code = infp.read()
+
+    if not encoding or encoding == 'guess':
+        code, encoding = guess_decode(code)
+    else:
+        code = code.decode(encoding)
+
+    return code, encoding
 
 
 USAGE = """\
@@ -486,7 +481,7 @@ def main(args = sys.argv):
         return 0
 
     if opts.pop('-V', None) is not None:
-        print('PygmenTeX version %s, (c) 2010 by José Romildo.' % __version__)
+        print('PygmenTeX version %s, (c) 2020 by José Romildo.' % __version__)
         return 0
  
     if len(args) != 1:
@@ -494,10 +489,9 @@ def main(args = sys.argv):
         return 2
     infn = args[0]
     try:
-        code = open(infn, 'rb').read()
+        code, inencoding = read_input(infn, "guess")
     except Exception as err:
-        sys.stderr.write('Error: cannot read input file: ')
-        sys.stderr.write(str(err))
+        print('Error: cannot read input file: ', err, file=sys.stderr)
         return 1
 
     outfn = opts.pop('-o', None)
@@ -507,11 +501,10 @@ def main(args = sys.argv):
     try:
         outfile = open(outfn, 'w')
     except Exception as err:
-        sys.stderr.write('Error: cannot open output file: ')
-        sys.stderr.write(str(err))
+        print('Error: cannot open output file: ', err, file=sys.stderr)
         return 1
 
-    convert(code, outfile)
+    convert(code, outfile, inencoding)
 
     return 0