Separate functions for the conversion of the python2 binary string / python3 binarray to ascii/hexdump
This fixes issue with --print-header and --print-directory
--- officeparser-42c2d40372fe271f2039ca1adc145d2aef8c9545/officeparser.py 2019-10-26 19:54:01.632237407 +0200
+++ officeparser-42c2d40372fe271f2039ca1adc145d2aef8c9545.new/officeparser.py 2019-10-26 19:53:55.603999504 +0200
@@ -56,6 +56,23 @@
maximum_length = (0xFFFF >> bit_count) + 3
return length_mask, offset_mask, bit_count, maximum_length
+
+def to_hex(data):
+ if PY3:
+ hex = ' '.join(['{0:02X}'.format(x) for x in data])
+ else:
+ hex = ' '.join(['{0:02X}'.format(ord(x)) for x in data])
+ return hex
+
+def to_ascii(data):
+ if PY3:
+ # In Python 3 we have numbers we need to convert to chars
+ ascii = ''.join([chr(x) for x in data if x != 0])
+ else:
+ # In Python 2 we have chars we need to convert to numbers to check them
+ ascii = ''.join([x for x in data if ord(x) != 0])
+ return ascii
+
def decompress_stream(compressed_container):
# MS-OVBA
# 2.4.1.2
@@ -342,8 +359,8 @@
_csectMiniFat = {15}
_sectDifStart = {16}
_csectDif = {17}""".format(
- ' '.join(['{0:02X}'.format(ord(x)) for x in self._abSig]),
- ' '.join(['{0:02X}'.format(ord(x)) for x in self._clid]),
+ to_hex(self._abSig),
+ to_hex(self._clid),
'{0:04X}'.format(self._uMinorVersion),
'{0}'.format(self._uDllVersion),
'{0:04X}'.format(self._uByteOrder),
@@ -409,12 +426,7 @@
self._ab = self.directory[0]
self._cb = self.directory[1]
# convert wide chars into ASCII
- if PY3:
- # In Python 3 we have numbers we need to convert to chars
- self.name = ''.join([chr(x) for x in self._ab[0:self._cb] if x != 0])
- else:
- # In Python 2 we have chars we need to convert to numbers to check them
- self.name = ''.join([x for x in self._ab[0:self._cb] if ord(x) != 0])
+ self.name = to_ascii(self._ab[0:self._cb])
self._mse = self.directory[2]
self._bflags = self.directory[3]
self._sidLeftSib = self.directory[4]
@@ -444,8 +456,7 @@
_sectStart = {11}
_ulSize = {12}
_dptPropType = {13}""".format(
- "{0}\n {1}".format(self.name,
- ' '.join(['{0:02X}'.format(ord(x)) for x in self._ab[0:self._cb]])),
+ "{0}\n {1}".format(self.name,to_hex(self._ab[0:self._cb])),
#unicode(self._ab).encode('us-ascii', 'ignore'),
'{0:04X}'.format(self._cb),
stgty_to_str(self._mse),
@@ -453,7 +464,7 @@
'{0:04X}'.format(self._sidLeftSib),
'{0:04X}'.format(self._sidRightSib),
'{0:04X}'.format(self._sidChild),
- ' '.join(['{0:02X}'.format(ord(x)) for x in self._clsId]),
+ to_hex(self._clsId),
'{0:04X}'.format(self._dwUserFlags),
'{0}'.format(self._time[0]),
'{0}'.format(self._time[1]),