Blame texlive-20200327-dviasm-py3.patch

876c81a
diff -up texlive-base-20200327/source/texk/texlive/linked_scripts/dviasm/dviasm.py.py3fix texlive-base-20200327/source/texk/texlive/linked_scripts/dviasm/dviasm.py
876c81a
--- texlive-base-20200327/source/texk/texlive/linked_scripts/dviasm/dviasm.py.py3fix	2019-11-27 17:03:14.000000000 -0500
876c81a
+++ texlive-base-20200327/source/texk/texlive/linked_scripts/dviasm/dviasm.py	2020-09-18 17:30:44.000000000 -0400
876c81a
@@ -5,8 +5,8 @@
876c81a
 #
876c81a
 # Copyright (C) 2007-2008 by Jin-Hwan Cho <chofchof@ktug.or.kr>
876c81a
 # Copyright (C) 2011-2017 by Khaled Hosny <khaledhosny@eglug.org>
876c81a
-# Copyright (C) 2019 by Arthur Reutenauer <arthur@reutenauer.eu>
876c81a
-# Copyright (C) 2019 by Hironobu Yamashita <h.y.acetaminophen@gmail.com>
876c81a
+# Copyright (C) 2019      by Arthur Reutenauer <arthur@reutenauer.eu>
876c81a
+# Copyright (C) 2019-2020 by Hironobu Yamashita <h.y.acetaminophen@gmail.com>
876c81a
 #
876c81a
 # This program is free software: you can redistribute it and/or modify
876c81a
 # it under the terms of the GNU General Public License as published by
876c81a
@@ -180,22 +180,26 @@ def GetInt(s):
876c81a
   except: return -1
876c81a
 
876c81a
 def GetStrASCII(s): # used in Parse()
876c81a
-  if len(s) > 1 and ((s[0] == "'" and s[-1] == "'") or (s[0] == '"' and s[-1] == '"')): return [ord(c) for c in s[1:-1].decode('unicode_escape')]
876c81a
+  if len(s) > 1 and ((s[0] == "'" and s[-1] == "'") or (s[0] == '"' and s[-1] == '"')):
876c81a
+    return [ord(c) for c in s[1:-1].decode('unicode_escape')]
876c81a
   else: return ''
876c81a
 
876c81a
 def UCS2toJIS(c):
876c81a
-  s = c.encode('iso2022-jp')
876c81a
+  try:
876c81a
+    s = c.encode('iso2022-jp')
876c81a
+  except UnicodeEncodeError:
876c81a
+    s = c.encode('raw_unicode_escape')
876c81a
   if len(s) == 1: return ord(s)
876c81a
   else:           return (s[3] << 8) + s[4]
876c81a
 
876c81a
 def GetStrUTF8(s): # used in Parse()
876c81a
   if len(s) > 1 and ((s[0] == "'" and s[-1] == "'") or (s[0] == '"' and s[-1] == '"')):
876c81a
-    t = s[1:-1]
876c81a
+    t = s[1:-1].encode('raw_unicode_escape').decode('unicode_escape')
876c81a
     if is_ptex: return [UCS2toJIS(c) for c in t]
876c81a
     else:       return [ord(c)       for c in t]
876c81a
   else:         return ''
876c81a
 
876c81a
-def PutStrASCII(t): # unsed in Dump()
876c81a
+def PutStrASCII(t): # used in Dump()
876c81a
   s = ''
876c81a
   for o in t:
876c81a
     if o == 92:         s += '\\\\'
876c81a
@@ -206,7 +210,7 @@ def PutStrASCII(t): # unsed in Dump()
876c81a
       warning('Not support characters > 65535; may skip %d.\n' % o)
876c81a
   return "'%s'" % s
876c81a
 
876c81a
-def PutStrLatin1(t): # unsed in Dump()
876c81a
+def PutStrLatin1(t): # used in Dump()
876c81a
   s = ''
876c81a
   for o in t:
876c81a
     if o == 92:                           s += '\\\\'
876c81a
@@ -221,14 +225,14 @@ def DecodeISO2022JP(c):
876c81a
   try:
876c81a
     s = bytes.fromhex("1b 24 42 %02x %02x" % (c//256, c%256)).decode('iso2022-jp')
876c81a
   except UnicodeDecodeError:
876c81a
-    s = ''
876c81a
+    s = chr(c)
876c81a
   return s
876c81a
 
876c81a
-def PutStrUTF8(t): # unsed in Dump()
876c81a
+def PutStrUTF8(t): # used in Dump()
876c81a
   s = ''
876c81a
   if is_subfont:
876c81a
     for o in t:
876c81a
-      s += chr((subfont_idx << 8) + o).encode('utf8')
876c81a
+      s += chr((subfont_idx << 8) + o)
876c81a
   else: # not the case of subfont
876c81a
     for o in t:
876c81a
       if o == 92:         s += '\\\\'
876c81a
@@ -239,16 +243,6 @@ def PutStrUTF8(t): # unsed in Dump()
876c81a
       else:               s += chr(o)
876c81a
   return "'%s'" % s
876c81a
 
876c81a
-def PutStrSJIS(t): # unsed in Dump()
876c81a
-  s = ''
876c81a
-  for o in t:
876c81a
-    if o == 92:         s += '\\\\'
876c81a
-    elif 32 <= o < 127: s += chr(o)
876c81a
-    elif o < 128:       s += ('\\x%02x' % o)
876c81a
-    else:
876c81a
-      s += DecodeISO2022JP(o).encode('sjis')
876c81a
-  return "'%s'" % s
876c81a
-
876c81a
 def IsFontChanged(f, z):
876c81a
   global cur_font, cur_ssize, subfont_idx, is_subfont
876c81a
   for n in subfont_list:
876c81a
@@ -461,7 +455,7 @@ class DVI(object):
876c81a
       if o == SET_RULE:
876c81a
         s.append([SET_RULE, [p, SignedQuad(fp)]])
876c81a
       elif o in (PUT1, PUT2, PUT3, PUT4):
876c81a
-        s.append([PUT1, p])
876c81a
+        s.append([PUT1, [p]])
876c81a
       elif o == PUT_RULE:
876c81a
         s.append([PUT_RULE, [p, SignedQuad(fp)]])
876c81a
       elif o == NOP:
876c81a
@@ -498,7 +492,7 @@ class DVI(object):
876c81a
       elif o < FNT_NUM_0 + 64 or o in (FNT1, FNT2, FNT3, FNT4):
876c81a
         s.append([FNT1, p])
876c81a
       elif o in (XXX1, XXX2, XXX3, XXX4):
876c81a
-        q = fp.read(p).decode('utf8')
876c81a
+        q = fp.read(p)
876c81a
         s.append([XXX1, q])
876c81a
       elif o in (FNT_DEF1, FNT_DEF2, FNT_DEF3, FNT_DEF4):
876c81a
         self.DefineFont(p, fp)
876c81a
@@ -622,11 +616,11 @@ class DVI(object):
876c81a
         if cmd[0] == SET1:
876c81a
           for o in cmd[1]:
876c81a
             if o < 128: s.append(bytes.fromhex('%02x' % (SET_CHAR_0 + o)))
876c81a
-            else:       s.append(self.CmdPair([SET1, o]))
876c81a
+            else:       s.append(self.CmdPairU([SET1, o]))
876c81a
         elif cmd[0] in (SET_RULE, PUT_RULE):
876c81a
           s.append(bytes.fromhex('%02x' % cmd[0]) + PutSignedQuad(cmd[1][0]) + PutSignedQuad(cmd[1][1]))
876c81a
         elif cmd[0] == PUT1:
876c81a
-          s.append(self.CmdPair([PUT1, cmd[1][0]]))
876c81a
+          s.append(self.CmdPairU([PUT1, cmd[1][0]]))
876c81a
         elif cmd[0] in (RIGHT1, DOWN1):
876c81a
           s.append(self.CmdPair(cmd))
876c81a
         elif cmd[0] in (W0, X0, Y0, Z0):
876c81a
@@ -648,12 +642,22 @@ class DVI(object):
876c81a
           z = cmd[1]; s.append(self.CmdPair(cmd))
876c81a
         elif cmd[0] == FNT1:
876c81a
           if cmd[1] < 64: s.append(bytes.fromhex('%02x' % (FNT_NUM_0 + cmd[1])))
876c81a
-          else:           s.append(self.CmdPair(cmd))
876c81a
+          else:           s.append(self.CmdPairU(cmd))
876c81a
         elif cmd[0] == XXX1:
876c81a
-          cmd1 = cmd[1].encode('utf8')
876c81a
-          l = len(cmd[1])
876c81a
-          if l < 256: s.append(bytes.fromhex('%02x' % XXX1) + bytes.fromhex('%02x' % l) + cmd1)
876c81a
-          else:       s.append(bytes.fromhex('%02x' % XXX4) + PutSignedQuad(l) + cmd1)
876c81a
+          if options.xxx_encoding == "none":
876c81a
+            l = len(cmd[1]) # leave encoding untouched
876c81a
+          else:
876c81a
+            cmd1 = cmd[1].encode(options.xxx_encoding)
876c81a
+            l = len(cmd1)
876c81a
+          if l < 256:
876c81a
+            s.append(bytes.fromhex('%02x' % XXX1) + bytes.fromhex('%02x' % l))
876c81a
+          else:
876c81a
+            s.append(bytes.fromhex('%02x' % XXX4) + PutSignedQuad(l))
876c81a
+          if options.xxx_encoding == "none":
876c81a
+            for o in cmd[1]:
876c81a
+              s.append(bytes.fromhex('%02x' % ord(o)))
876c81a
+          else:
876c81a
+              s.append(cmd1)
876c81a
         elif cmd[0] == DIR:
876c81a
           s.append(bytes.fromhex('%02x' % DIR) + bytes.fromhex('%02x' % cmd[1]))
876c81a
         elif cmd[0] == BEGIN_REFLECT:
876c81a
@@ -685,7 +689,8 @@ class DVI(object):
876c81a
   def WriteFontDefinitions(self, fp):
876c81a
     s = []
876c81a
     for e in sorted(self.font_def.keys()):
876c81a
-      if self.font_def[e]['native']:
876c81a
+      try:
876c81a
+        self.font_def[e]['native']
876c81a
         flags = self.font_def[e]['flags']
876c81a
         s.append(PutByte(NATIVE_FONT_DEF))
876c81a
         s.append(PutSignedQuad(e))
876c81a
@@ -698,7 +703,7 @@ class DVI(object):
876c81a
         if flags & XDV_FLAG_EXTEND: s.append(PutSignedQuad(self.font_def[e]['extend']))
876c81a
         if flags & XDV_FLAG_SLANT: s.append(PutSignedQuad(self.font_def[e]['slant']))
876c81a
         if flags & XDV_FLAG_EMBOLDEN: s.append(PutSignedQuad(self.font_def[e]['embolden']))
876c81a
-      else:
876c81a
+      except KeyError:
876c81a
         l, q = PutUnsigned(e)
876c81a
         s.append(PutByte(FNT_DEF1 + l))
876c81a
         s.append(q)
876c81a
@@ -710,6 +715,10 @@ class DVI(object):
876c81a
         s.append(self.font_def[e]['name'].encode('utf8'))
876c81a
     fp.write(b''.join(s))
876c81a
 
876c81a
+  def CmdPairU(self, cmd):
876c81a
+    l, q = PutUnsigned(cmd[1])
876c81a
+    return bytes.fromhex('%02x' % (cmd[0] + l)) + q
876c81a
+
876c81a
   def CmdPair(self, cmd):
876c81a
     l, q = PutSigned(cmd[1])
876c81a
     return bytes.fromhex('%02x' % (cmd[0] + l)) + q
876c81a
@@ -718,7 +727,7 @@ class DVI(object):
876c81a
   # Parse: Text -> Internal Format
876c81a
   ##########################################################
876c81a
   def Parse(self, fn, encoding=''):
876c81a
-    fp = open(fn, 'r')
876c81a
+    fp = open(fn, 'r', encoding=encoding)
876c81a
     s = fp.read()
876c81a
     fp.close()
876c81a
     self.ParseFromString(s, encoding=encoding)
876c81a
@@ -807,7 +816,10 @@ class DVI(object):
876c81a
         else:
876c81a
           self.cur_page.append([SET1, ol])
876c81a
       elif key == 'put':
876c81a
-        self.cur_page.append([PUT1, GetStr(val)])
876c81a
+        ol = GetStr(val)
876c81a
+        if len(ol) != 1:
876c81a
+          warning('only one character is allowed for put!')
876c81a
+        self.cur_page.append([PUT1, ol])
876c81a
       elif key == 'setrule':
876c81a
         v = val.split(' ')
876c81a
         if len(v) != 2:
876c81a
@@ -895,7 +907,7 @@ class DVI(object):
876c81a
   # Dump: Internal Format -> Text
876c81a
   ##########################################################
876c81a
   def Dump(self, fn, tabsize=2, encoding=''):
876c81a
-    fp = open(fn, 'w')
876c81a
+    fp = open(fn, 'w', encoding=encoding)
876c81a
     self.DumpToFile(fp, tabsize=tabsize, encoding=encoding)
876c81a
     fp.close()
876c81a
 
876c81a
@@ -903,7 +915,6 @@ class DVI(object):
876c81a
     global PutStr
876c81a
     if   encoding == 'ascii':  PutStr = PutStrASCII
876c81a
     elif encoding == 'latin1': PutStr = PutStrLatin1
876c81a
-    elif encoding == 'sjis':   PutStr = PutStrSJIS
876c81a
     else:                      PutStr = PutStrUTF8
876c81a
     # DumpPreamble
876c81a
     fp.write("[preamble]\n")
876c81a
@@ -921,10 +932,10 @@ class DVI(object):
876c81a
     # DumpFontDefinitions
876c81a
     fp.write("\n[font definitions]\n")
876c81a
     for e in sorted(self.font_def.keys()):
876c81a
-      fp.write("fntdef: %s" % self.font_def[e]['name'])
876c81a
+      fp.write("fntdef: %s " % self.font_def[e]['name'])
876c81a
       if self.font_def[e]['design_size'] != self.font_def[e]['scaled_size']:
876c81a
-        fp.write(" (%s) " % self.byconv(self.font_def[e]['design_size']))
876c81a
-      fp.write(" at %s\n" % self.byconv(self.font_def[e]['scaled_size']))
876c81a
+        fp.write("(%s) " % self.byconv(self.font_def[e]['design_size']))
876c81a
+      fp.write("at %s\n" % self.byconv(self.font_def[e]['scaled_size']))
876c81a
     # DumpPages
876c81a
     for page in self.pages:
876c81a
       fp.write("\n[page" + (" %d"*10 % tuple(page['count'])) + "]\n")
876c81a
@@ -939,7 +950,10 @@ class DVI(object):
876c81a
           fp.write("push:\n")
876c81a
           indent += tabsize
876c81a
         elif cmd[0] == XXX1:
876c81a
-          fp.write("xxx: %s\n" % repr(cmd[1]))
876c81a
+          if options.xxx_encoding == "none":
876c81a
+            fp.write("xxx: %s\n" % PutStrASCII(cmd[1])) # leave encoding untouched
876c81a
+          else:
876c81a
+            fp.write("xxx: '%s'\n" % cmd[1].decode(options.xxx_encoding))
876c81a
         elif cmd[0] == DIR:
876c81a
           fp.write("dir: %d\n" % cmd[1])
876c81a
         elif cmd[0] == BEGIN_REFLECT:
876c81a
@@ -1092,7 +1106,6 @@ class DVI(object):
876c81a
       f['slant'] = slant
876c81a
       f['embolden'] = embolden
876c81a
     else:
876c81a
-      f['native'] = False
876c81a
       f['name'] = n
876c81a
 
876c81a
     if q[:2] == "at": q = q[2:]
876c81a
@@ -1144,16 +1157,18 @@ def ProcessOptions():
876c81a
 DVIasm is a Python script to support changing or creating DVI files
876c81a
 via disassembling into text, editing, and then reassembling into
876c81a
 binary format. It is fully documented at
876c81a
+  http://tug.org/TUGboat/Articles/tb28-2/tb89cho.pdf
876c81a
+  http://ajt.ktug.kr/assets/2008/5/1/0201cho.pdf
876c81a
 
876c81a
-http://tug.org/TUGboat/Articles/tb28-2/tb89cho.pdf 
876c81a
-http://ajt.ktug.kr/assets/2008/5/1/0201cho.pdf"""
876c81a
+Please report bugs to
876c81a
+  https://github.com/aminophen/dviasm"""
876c81a
+
876c81a
+  version = """This is %prog-20200918
876c81a
 
876c81a
-  version = """This is %prog-20191126
876c81a
-  
876c81a
 Copyright (C) 2007-2008 by Jin-Hwan Cho <chofchof@ktug.or.kr>
876c81a
 Copyright (C) 2011-2017 by Khaled Hosny <khaledhosny@eglug.org>
876c81a
-Copyright (C) 2019 by Arthur Reutenauer <arthur@reutenauer.eu>
876c81a
-Copyright (C) 2019 by Hironobu Yamashita <h.y.acetaminophen@gmail.com>
876c81a
+Copyright (C) 2019      by Arthur Reutenauer <arthur@reutenauer.eu>
876c81a
+Copyright (C) 2019-2020 by Hironobu Yamashita <h.y.acetaminophen@gmail.com>
876c81a
 
876c81a
 This is free software; you can redistribute it and/or modify
876c81a
 it under the terms of the GNU General Public License as published by
876c81a
@@ -1173,6 +1188,10 @@ the Free Software Foundation, either ver
876c81a
                     action="store", type="string", dest="encoding",
876c81a
                     metavar="STR",
876c81a
                     help="encoding for input/output [default=%default]")
876c81a
+  parser.add_option("-x", "--xxx-encoding",
876c81a
+                    action="store", type="string", dest="xxx_encoding",
876c81a
+                    metavar="STR",
876c81a
+                    help="encoding for interpreting xxx strings [default=%default]")
876c81a
   parser.add_option("-t", "--tabsize",
876c81a
                     action="store", type="int", dest="tabsize",
876c81a
                     metavar="INT",
876c81a
@@ -1184,18 +1203,20 @@ the Free Software Foundation, either ver
876c81a
                     action="append", type="string", dest="subfont",
876c81a
                     metavar="STR",
876c81a
                     help="the list of fonts with UCS2 subfont scheme (comma separated); disable internal subfont list if STR is empty")
876c81a
-  parser.set_defaults(unit='pt', encoding='utf8', tabsize=2)
876c81a
+  parser.set_defaults(unit='pt', encoding='utf8', xxx_encoding='none', tabsize=2)
876c81a
   (options, args) = parser.parse_args()
876c81a
   if not options.unit in ['sp', 'pt', 'bp', 'mm', 'cm', 'in']:
876c81a
     parser.error("invalid unit name '%s'!" % options.unit)
876c81a
-  if options.tabsize < 0: 
876c81a
+  if options.tabsize < 0:
876c81a
     parser.error("negative tabsize!")
876c81a
-  if not options.encoding in ['ascii', 'latin1', 'utf8', 'sjis']:
876c81a
+  if not options.xxx_encoding in ['none', 'utf8', 'sjis', 'eucjp']:
876c81a
+    parser.error("invalid xxx-encoding '%s'!" % options.xxx_encoding)
876c81a
+  if not options.encoding in ['ascii', 'latin1', 'utf8', 'sjis', 'eucjp']:
876c81a
     parser.error("invalid encoding '%s'!" % options.encoding)
876c81a
   if options.ptex:
876c81a
     global is_ptex
876c81a
     is_ptex = True
876c81a
-    if not options.encoding in ['utf8', 'sjis']:
876c81a
+    if not options.encoding in ['utf8', 'sjis', 'eucjp']:
876c81a
       parser.error("invalid encoding '%s' for Japanese pTeX!" % options.encoding)
876c81a
   if options.subfont:
876c81a
     global subfont_list
876c81a
@@ -1235,4 +1256,4 @@ if __name__ == '__main__':
876c81a
   else: # dump -> dvi
876c81a
     aDVI.Parse(args[0], encoding=options.encoding)
876c81a
     if options.output: aDVI.Save(options.output)
876c81a
-    else:              aDVI.SaveToFile(sys.stdout)
876c81a
+    else:              aDVI.SaveToFile(sys.stdout.buffer)