From 2839b3e6c412ca2f1cb593da75043e1e358a4fca Mon Sep 17 00:00:00 2001 From: Valentina Mukhamedzhanova Date: Oct 03 2014 08:28:14 +0000 Subject: Revert porting to Python 3. --- diff --git a/port-tests-to-python3.patch b/port-tests-to-python3.patch deleted file mode 100644 index 61ce736..0000000 --- a/port-tests-to-python3.patch +++ /dev/null @@ -1,658 +0,0 @@ -commit 8560a386c3ea1e868a8e294c1e318a6ee5319580 -Author: Tomas Radej -Date: Wed Aug 20 13:32:32 2014 +0200 - - Ported test suite - -diff --git a/test/grabberperf.py b/test/grabberperf.py -index 820da2c..d9142fa 100644 ---- a/test/grabberperf.py -+++ b/test/grabberperf.py -@@ -21,11 +21,15 @@ - - import sys - import os --from os.path import dirname, join as joinpath - import tempfile - import time -+import six -+ -+# Hack for Python 3 -+sys.path.insert(0, os.path.expandvars(os.path.abspath('..'))) - --import urlgrabber.grabber as grabber -+from os.path import dirname, join as joinpath -+from urlgrabber import grabber - from urlgrabber.grabber import URLGrabber, urlgrab, urlopen, urlread - from urlgrabber.progress import text_progress_meter - -@@ -48,7 +52,7 @@ def main(): - os.unlink(tempdst) - - def setuptemp(size): -- if DEBUG: print 'writing %d KB to temporary file (%s).' % (size / 1024, tempsrc) -+ if DEBUG: print('writing %d KB to temporary file (%s).' % (size / 1024, tempsrc)) - file = open(tempsrc, 'w', 1024) - chars = '0123456789' - for i in range(size): -@@ -65,9 +69,9 @@ def speedtest(size): - - try: - from urlgrabber.progress import text_progress_meter -- except ImportError, e: -+ except ImportError as e: - tpm = None -- print 'not using progress meter' -+ print('not using progress meter') - else: - tpm = text_progress_meter(fo=open('/dev/null', 'w')) - -@@ -83,15 +87,15 @@ def speedtest(size): - # module. - - # get it nicely cached before we start comparing -- if DEBUG: print 'pre-caching' -+ if DEBUG: print('pre-caching') - for i in range(100): - urlgrab(tempsrc, tempdst, copy_local=1, throttle=None, proxies=proxies) - -- if DEBUG: print 'running speed test.' -+ if DEBUG: print('running speed test.') - reps = 500 - for i in range(reps): - if DEBUG: -- print '\r%4i/%-4i' % (i+1, reps), -+ six.print_('\r%4i/%-4i' % (i+1, reps), end=' ') - sys.stdout.flush() - t = time.time() - urlgrab(tempsrc, tempdst, -@@ -111,14 +115,14 @@ def speedtest(size): - while 1: - s = in_fo.read(1024 * 8) - if not s: break -- out_fo.write(s) -+ out_fo.write(s if not six.PY3 else s.encode('utf-8')) - in_fo.close() - out_fo.close() - none_times.append(1000 * (time.time() - t)) - -- if DEBUG: print '\r' -+ if DEBUG: print('\r') - -- print "%d KB Results:" % (size / 1024) -+ print("%d KB Results:" % (size / 1024)) - print_result('full', full_times) - print_result('raw', raw_times) - print_result('none', none_times) -@@ -131,7 +135,7 @@ def print_result(label, result_list): - for i in result_list: mean += i - mean = mean/len(result_list) - median = result_list[int(len(result_list)/2)] -- print format % (label, mean, median, result_list[0], result_list[-1]) -+ print(format % (label, mean, median, result_list[0], result_list[-1])) - - if __name__ == '__main__': - main() -diff --git a/test/munittest.py b/test/munittest.py -index 16a61ae..7e7969e 100644 ---- a/test/munittest.py -+++ b/test/munittest.py -@@ -103,9 +103,9 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - import time - import sys - import traceback --import string - import os - import types -+import collections - - ############################################################################## - # Exported classes and functions -@@ -190,7 +190,7 @@ class TestResult: - - def _exc_info_to_string(self, err): - """Converts a sys.exc_info()-style tuple of values into a string.""" -- return string.join(traceback.format_exception(*err), '') -+ return ''.join(traceback.format_exception(*err)) - - def __repr__(self): - return "<%s run=%i errors=%i failures=%i>" % \ -@@ -251,8 +251,8 @@ class TestCase: - testMethod = getattr(self, methodName) - self._testMethodDoc = testMethod.__doc__ - except AttributeError: -- raise ValueError, "no such test method in %s: %s" % \ -- (self.__class__, methodName) -+ raise ValueError("no such test method in %s: %s" % \ -+ (self.__class__, methodName)) - - def setUp(self): - "Hook method for setting up the test fixture before exercising it." -@@ -276,7 +276,7 @@ class TestCase: - the specified test method's docstring. - """ - doc = self._testMethodDoc -- return doc and string.strip(string.split(doc, "\n")[0]) or None -+ return doc and doc.split("\n")[0].strip() or None - - def id(self): - return "%s.%s" % (_strclass(self.__class__), self._testMethodName) -@@ -361,15 +361,15 @@ class TestCase: - - def fail(self, msg=None): - """Fail immediately, with the given message.""" -- raise self.failureException, msg -+ raise self.failureException(msg) - - def failIf(self, expr, msg=None): - "Fail the test if the expression is true." -- if expr: raise self.failureException, msg -+ if expr: raise self.failureException(msg) - - def failUnless(self, expr, msg=None): - """Fail the test unless the expression is true.""" -- if not expr: raise self.failureException, msg -+ if not expr: raise self.failureException(msg) - - def failUnlessRaises(self, excClass, callableObj, *args, **kwargs): - """Fail unless an exception of class excClass is thrown -@@ -386,23 +386,21 @@ class TestCase: - else: - if hasattr(excClass,'__name__'): excName = excClass.__name__ - else: excName = str(excClass) -- raise self.failureException, excName -+ raise self.failureException(excName) - - def failUnlessEqual(self, first, second, msg=None): - """Fail if the two objects are unequal as determined by the '==' - operator. - """ - if not first == second: -- raise self.failureException, \ -- (msg or '%s != %s' % (`first`, `second`)) -+ raise self.failureException(msg or '%s != %s' % (repr(first), repr(second))) - - def failIfEqual(self, first, second, msg=None): - """Fail if the two objects are equal as determined by the '==' - operator. - """ - if first == second: -- raise self.failureException, \ -- (msg or '%s == %s' % (`first`, `second`)) -+ raise self.failureException(msg or '%s == %s' % (repr(first), repr(second))) - - def failUnlessAlmostEqual(self, first, second, places=7, msg=None): - """Fail if the two objects are unequal as determined by their -@@ -413,8 +411,7 @@ class TestCase: - as significant digits (measured from the most significant digit). - """ - if round(second-first, places) != 0: -- raise self.failureException, \ -- (msg or '%s != %s within %s places' % (`first`, `second`, `places` )) -+ raise self.failureException(msg or '%s != %s within %s places' % (repr(first), repr(second), repr(places) )) - - def failIfAlmostEqual(self, first, second, places=7, msg=None): - """Fail if the two objects are equal as determined by their -@@ -425,8 +422,7 @@ class TestCase: - as significant digits (measured from the most significant digit). - """ - if round(second-first, places) == 0: -- raise self.failureException, \ -- (msg or '%s == %s within %s places' % (`first`, `second`, `places`)) -+ raise self.failureException(msg or '%s == %s within %s places' % (repr(first), repr(second), repr(places))) - - assertEqual = assertEquals = failUnlessEqual - -@@ -442,15 +438,15 @@ class TestCase: - - def skip(self, msg=None): - """Skip the test""" -- raise self.skipException, msg -+ raise self.skipException(msg) - - def skipIf(self, expr, msg=None): - "Skip the test if the expression is true." -- if expr: raise self.skipException, msg -+ if expr: raise self.skipException(msg) - - def skipUnless(self, expr, msg=None): - """Skip the test unless the expression is true.""" -- if not expr: raise self.skipException, msg -+ if not expr: raise self.skipException(msg) - - - -@@ -554,7 +550,7 @@ class FunctionTestCase(TestCase): - def shortDescription(self): - if self._description is not None: return self._description - doc = self._testFunc.__doc__ -- return doc and string.strip(string.split(doc, "\n")[0]) or None -+ return doc and doc.split("\n")[0].strip() or None - - - -@@ -567,13 +563,12 @@ class TestLoader: - criteria and returning them wrapped in a Test - """ - testMethodPrefix = 'test' -- sortTestMethodsUsing = cmp - suiteClass = TestSuite - - def loadTestsFromTestCase(self, testCaseClass): - """Return a suite of all tests cases contained in testCaseClass""" - name_list = self.getTestCaseNames(testCaseClass) -- instance_list = map(testCaseClass, name_list) -+ instance_list = list(map(testCaseClass, name_list)) - description = getattr(testCaseClass, '__doc__') \ - or testCaseClass.__name__ - description = (description.splitlines()[0]).strip() -@@ -585,7 +580,7 @@ class TestLoader: - tests = [] - for name in dir(module): - obj = getattr(module, name) -- if (isinstance(obj, (type, types.ClassType)) and -+ if (isinstance(obj, type) and - issubclass(obj, TestCase) and - not obj in [TestCase, FunctionTestCase]): - tests.append(self.loadTestsFromTestCase(obj)) -@@ -603,15 +598,15 @@ class TestLoader: - - The method optionally resolves the names relative to a given module. - """ -- parts = string.split(name, '.') -+ parts = name.split('.') - if module is None: - if not parts: -- raise ValueError, "incomplete test name: %s" % name -+ raise ValueError("incomplete test name: %s" % name) - else: - parts_copy = parts[:] - while parts_copy: - try: -- module = __import__(string.join(parts_copy,'.')) -+ module = __import__('.'.join(parts_copy,)) - break - except ImportError: - del parts_copy[-1] -@@ -624,20 +619,19 @@ class TestLoader: - import unittest - if type(obj) == types.ModuleType: - return self.loadTestsFromModule(obj) -- elif (isinstance(obj, (type, types.ClassType)) and -+ elif (isinstance(obj, type) and - issubclass(obj, unittest.TestCase)): - return self.loadTestsFromTestCase(obj) - elif type(obj) == types.UnboundMethodType: -- return obj.im_class(obj.__name__) -- elif callable(obj): -+ return obj.__self__.__class__(obj.__name__) -+ elif isinstance(obj, collections.Callable): - test = obj() - if not isinstance(test, unittest.TestCase) and \ - not isinstance(test, unittest.TestSuite): -- raise ValueError, \ -- "calling %s returned %s, not a test" % (obj,test) -+ raise ValueError("calling %s returned %s, not a test" % (obj,test)) - return test - else: -- raise ValueError, "don't know how to make test from: %s" % obj -+ raise ValueError("don't know how to make test from: %s" % obj) - - def loadTestsFromNames(self, names, module=None): - """Return a suite of all tests cases found using the given sequence -@@ -651,14 +645,13 @@ class TestLoader: - def getTestCaseNames(self, testCaseClass): - """Return a sorted sequence of method names found within testCaseClass - """ -- testFnNames = filter(lambda n,p=self.testMethodPrefix: n[:len(p)] == p, -- dir(testCaseClass)) -+ testFnNames = list(filter(lambda n,p=self.testMethodPrefix: n[:len(p)] == p, -+ dir(testCaseClass))) - for baseclass in testCaseClass.__bases__: - for testFnName in self.getTestCaseNames(baseclass): - if testFnName not in testFnNames: # handle overridden methods - testFnNames.append(testFnName) -- if self.sortTestMethodsUsing: -- testFnNames.sort(self.sortTestMethodsUsing) -+ testFnNames.sort() - return testFnNames - - -@@ -670,21 +663,20 @@ defaultTestLoader = TestLoader() - # Patches for old functions: these functions should be considered obsolete - ############################################################################## - --def _makeLoader(prefix, sortUsing, suiteClass=None): -+def _makeLoader(prefix, suiteClass=None): - loader = TestLoader() -- loader.sortTestMethodsUsing = sortUsing - loader.testMethodPrefix = prefix - if suiteClass: loader.suiteClass = suiteClass - return loader - --def getTestCaseNames(testCaseClass, prefix, sortUsing=cmp): -- return _makeLoader(prefix, sortUsing).getTestCaseNames(testCaseClass) -+def getTestCaseNames(testCaseClass, prefix): -+ return _makeLoader(prefix).getTestCaseNames(testCaseClass) - --def makeSuite(testCaseClass, prefix='test', sortUsing=cmp, suiteClass=TestSuite): -- return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromTestCase(testCaseClass) -+def makeSuite(testCaseClass, prefix='test',suiteClass=TestSuite): -+ return _makeLoader(prefix, suiteClass).loadTestsFromTestCase(testCaseClass) - --def findTestCases(module, prefix='test', sortUsing=cmp, suiteClass=TestSuite): -- return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromModule(module) -+def findTestCases(module, prefix='test',suiteClass=TestSuite): -+ return _makeLoader(prefix, suiteClass).loadTestsFromModule(module) - - - ############################################################################## -@@ -825,8 +817,8 @@ class TextTestRunner: - self.stream.writeln() - if not result.wasSuccessful(): - self.stream.write("FAILED (") -- failed, errored, skipped = map(len, \ -- (result.failures, result.errors, result.skipped)) -+ failed, errored, skipped = list(map(len, \ -+ (result.failures, result.errors, result.skipped))) - if failed: - self.stream.write("failures=%d" % failed) - if errored: -@@ -871,7 +863,7 @@ Examples: - argv=None, testRunner=None, testLoader=defaultTestLoader): - if type(module) == type(''): - self.module = __import__(module) -- for part in string.split(module,'.')[1:]: -+ for part in module.split('.')[1:]: - self.module = getattr(self.module, part) - else: - self.module = module -@@ -886,8 +878,8 @@ Examples: - self.runTests() - - def usageExit(self, msg=None): -- if msg: print msg -- print self.USAGE % self.__dict__ -+ if msg: print(msg) -+ print(self.USAGE % self.__dict__) - sys.exit(2) - - def parseArgs(self, argv): -@@ -910,7 +902,7 @@ Examples: - else: - self.testNames = (self.defaultTest,) - self.createTests() -- except getopt.error, msg: -+ except getopt.error as msg: - self.usageExit(msg) - - def createTests(self): -diff --git a/test/runtests.py b/test/runtests.py -index c48bd1d..78a5974 100644 ---- a/test/runtests.py -+++ b/test/runtests.py -@@ -54,7 +54,7 @@ def parse_args(): - return (descriptions,verbosity) - - def usage(): -- print __doc__ -+ print(__doc__) - - if __name__ == '__main__': - main() -diff --git a/test/test_byterange.py b/test/test_byterange.py -index 0f75807..0863be8 100644 ---- a/test/test_byterange.py -+++ b/test/test_byterange.py -@@ -24,8 +24,11 @@ - # $Id: test_byterange.py,v 1.6 2004/03/31 17:02:00 mstenner Exp $ - - import sys -+import six - --from cStringIO import StringIO -+from io import StringIO -+ -+import urlgrabber - from urlgrabber.byterange import RangeableFileObject - - from base_test_code import * -@@ -37,7 +40,7 @@ class RangeableFileObjectTestCase(TestCase): - # 0 1 2 3 4 5 6 7 8 9 - # 0123456789012345678901234567890123456789012345678901234567 890123456789012345678901234567890 - self.test = 'Why cannot we write the entire 24 volumes of Encyclopaedia\nBrittanica on the head of a pin?\n' -- self.fo = StringIO(self.test) -+ self.fo = StringIO(unicode(self.test) if not six.PY3 else self.test) - self.rfo = RangeableFileObject(self.fo, (20,69)) - - def tearDown(self): -@@ -61,7 +64,8 @@ class RangeableFileObjectTestCase(TestCase): - - def test_readall(self): - """RangeableFileObject.read(): to end of file.""" -- rfo = RangeableFileObject(StringIO(self.test),(11,)) -+ text_compat = unicode(self.test) if not six.PY3 else self.test -+ rfo = RangeableFileObject(StringIO(text_compat),(11,)) - self.assertEquals(self.test[11:],rfo.read()) - - def test_readline(self): -diff --git a/test/test_grabber.py b/test/test_grabber.py -index 8e45d25..bd36d66 100644 ---- a/test/test_grabber.py -+++ b/test/test_grabber.py -@@ -24,11 +24,13 @@ - # $Id: test_grabber.py,v 1.31 2006/12/08 00:14:16 mstenner Exp $ - - import sys -+import six - import os --import string, tempfile, random, cStringIO, os --import urllib2 -+import tempfile, random, os -+from six.moves import urllib - import socket - -+from io import StringIO - from base_test_code import * - - import urlgrabber -@@ -41,12 +43,12 @@ class FileObjectTests(TestCase): - - def setUp(self): - self.filename = tempfile.mktemp() -- fo = file(self.filename, 'wb') -- fo.write(reference_data) -+ fo = open(self.filename, 'wb') -+ fo.write(reference_data.encode('utf-8')) - fo.close() - -- self.fo_input = cStringIO.StringIO(reference_data) -- self.fo_output = cStringIO.StringIO() -+ self.fo_input = StringIO(unicode(reference_data) if not six.PY3 else reference_data) -+ self.fo_output = StringIO() - (url, parts) = grabber.default_grabber.opts.urlparser.parse( - self.filename, grabber.default_grabber.opts) - self.wrapper = grabber.PyCurlFileObject( -@@ -73,7 +75,7 @@ class FileObjectTests(TestCase): - def test_readlines(self): - "PyCurlFileObject .readlines() method" - li = self.wrapper.readlines() -- self.fo_output.write(string.join(li, '')) -+ self.fo_output.write(''.join(li)) - self.assert_(reference_data == self.fo_output.getvalue()) - - def test_smallread(self): -@@ -90,7 +92,7 @@ class HTTPTests(TestCase): - filename = tempfile.mktemp() - grabber.urlgrab(ref_http, filename) - -- fo = file(filename, 'rb') -+ fo = open(filename, 'rb' if not six.PY3 else 'r') - contents = fo.read() - fo.close() - -@@ -136,7 +138,7 @@ class URLGrabberTestCase(TestCase): - - def setUp(self): - -- self.meter = text_progress_meter( fo=cStringIO.StringIO() ) -+ self.meter = text_progress_meter( fo=StringIO() ) - pass - - def tearDown(self): -@@ -149,7 +151,7 @@ class URLGrabberTestCase(TestCase): - values into the URLGrabber constructor and checks that - they've been set properly. - """ -- opener = urllib2.OpenerDirector() -+ opener = urllib.request.OpenerDirector() - g = URLGrabber( progress_obj=self.meter, - throttle=0.9, - bandwidth=20, -@@ -225,13 +227,13 @@ class URLParserTestCase(TestCase): - self.assertEquals(parts, urllist[2]) - else: - if url == urllist[1] and parts == urllist[2]: -- print 'OK: %s' % urllist[0] -+ print('OK: %s' % urllist[0]) - else: -- print 'ERROR: %s' % urllist[0] -- print ' ' + urllist[1] -- print ' ' + url -- print ' ' + urllist[2] -- print ' ' + parts -+ print('ERROR: %s' % urllist[0]) -+ print(' ' + urllist[1]) -+ print(' ' + url) -+ print(' ' + urllist[2]) -+ print(' ' + parts) - - - url_tests_all = ( -@@ -380,7 +382,7 @@ class CheckfuncTestCase(TestCase): - - if hasattr(obj, 'filename'): - # we used urlgrab -- fo = file(obj.filename) -+ fo = open(obj.filename) - data = fo.read() - fo.close() - else: -@@ -447,12 +449,12 @@ class RegetTestBase: - except: pass - - def _make_half_zero_file(self): -- fo = file(self.filename, 'wb') -- fo.write('0'*self.hl) -+ fo = open(self.filename, 'wb' if not six.PY3 else 'w') -+ fo.write('0'*int(self.hl)) - fo.close() - - def _read_file(self): -- fo = file(self.filename, 'rb') -+ fo = open(self.filename, 'rb' if not six.PY3 else 'r') - data = fo.read() - fo.close() - return data -@@ -470,7 +472,7 @@ class FTPRegetTests(RegetTestBase, TestCase): - # this tests to see if the server is available. If it's not, - # then these tests will be skipped - try: -- fo = urllib2.urlopen(self.url).close() -+ fo = urllib.request.urlopen(self.url).close() - except IOError: - self.skip() - -@@ -480,8 +482,8 @@ class FTPRegetTests(RegetTestBase, TestCase): - self.grabber.urlgrab(self.url, self.filename, reget='simple') - data = self._read_file() - -- self.assertEquals(data[:self.hl], '0'*self.hl) -- self.assertEquals(data[self.hl:], self.ref[self.hl:]) -+ self.assertEquals(data[:int(self.hl)], '0'*int(self.hl)) -+ self.assertEquals(data[int(self.hl):], self.ref[int(self.hl):]) - - class HTTPRegetTests(FTPRegetTests): - def setUp(self): -@@ -498,8 +500,8 @@ class HTTPRegetTests(FTPRegetTests): - self.grabber.urlgrab(self.url, self.filename, reget='check_timestamp') - data = self._read_file() - -- self.assertEquals(data[:self.hl], '0'*self.hl) -- self.assertEquals(data[self.hl:], self.ref[self.hl:]) -+ self.assertEquals(data[:int(self.hl)], '0'*int(self.hl)) -+ self.assertEquals(data[int(self.hl):], self.ref[int(self.hl):]) - except NotImplementedError: - self.skip() - -@@ -521,7 +523,7 @@ class FileRegetTests(HTTPRegetTests): - def setUp(self): - self.ref = short_reference_data - tmp = tempfile.mktemp() -- tmpfo = file(tmp, 'wb') -+ tmpfo = open(tmp, 'wb' if not six.PY3 else 'w') - tmpfo.write(self.ref) - tmpfo.close() - self.tmp = tmp -@@ -545,7 +547,7 @@ class ProFTPDSucksTests(TestCase): - def setUp(self): - self.url = ref_proftp - try: -- fo = urllib2.urlopen(self.url).close() -+ fo = urllib.request.urlopen(self.url).close() - except IOError: - self.skip() - -@@ -592,7 +594,7 @@ class ProxyFTPAuthTests(ProxyHTTPAuthTests): - if not self.have_proxy(): - self.skip() - try: -- fo = urllib2.urlopen(self.url).close() -+ fo = urllib.request.urlopen(self.url).close() - except IOError: - self.skip() - self.g = URLGrabber() -diff --git a/test/test_mirror.py b/test/test_mirror.py -index 7f493d0..c46cd33 100644 ---- a/test/test_mirror.py -+++ b/test/test_mirror.py -@@ -24,8 +24,9 @@ - # $Id: test_mirror.py,v 1.12 2005/10/22 21:57:27 mstenner Exp $ - - import sys -+import six - import os --import string, tempfile, random, cStringIO, os -+import string, tempfile, random, os - - import urlgrabber.grabber - from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions -@@ -268,7 +269,8 @@ class ActionTests(TestCase): - self.assertEquals(self.g.calls, expected_calls) - self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) - --import thread, socket -+from six.moves import _thread as thread -+import socket - LOCALPORT = 'localhost', 2000 - - class HttpReplyCode(TestCase): -@@ -282,11 +284,14 @@ class HttpReplyCode(TestCase): - while 1: - c, a = s.accept() - if self.exit: c.close(); break -- while not c.recv(4096).endswith('\r\n\r\n'): pass -- c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) -+ ending_compat = '\r\n\r\n' if not six.PY3 else b'\r\n\r\n' -+ while not c.recv(4096).endswith(ending_compat): pass -+ http_compat = 'HTTP/1.1 %d %s\r\n' % self.reply -+ c.sendall(http_compat if not six.PY3 else http_compat.encode('utf-8')) - if self.content is not None: -- c.sendall('Content-Length: %d\r\n\r\n' % len(self.content)) -- c.sendall(self.content) -+ cont_length_compat = 'Content-Length: %d\r\n\r\n' % len(self.content) -+ c.sendall(cont_length_compat if not six.PY3 else cont_length_compat.encode('utf-8')) -+ c.sendall(self.content if not six.PY3 else self.content.encode('utf-8')) - c.close() - s.close() - self.exit = False diff --git a/port-to-python3.patch b/port-to-python3.patch deleted file mode 100644 index 41b6742..0000000 --- a/port-to-python3.patch +++ /dev/null @@ -1,1175 +0,0 @@ -commit 7d6b90e17d333535549e2d3ec1cf41845a9b876f -Author: Tomas Radej -Date: Wed Aug 20 13:32:18 2014 +0200 - - Ported main code - -diff --git a/urlgrabber/__init__.py b/urlgrabber/__init__.py -index b3047b0..636849c 100644 ---- a/urlgrabber/__init__.py -+++ b/urlgrabber/__init__.py -@@ -52,4 +52,4 @@ __author__ = 'Michael D. Stenner , ' \ - 'Zdenek Pavlas ' - __url__ = 'http://urlgrabber.baseurl.org/' - --from grabber import urlgrab, urlopen, urlread -+from urlgrabber.grabber import urlgrab, urlopen, urlread -diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py -index 5efa160..ffaed8e 100644 ---- a/urlgrabber/byterange.py -+++ b/urlgrabber/byterange.py -@@ -18,24 +18,22 @@ - # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - - -+import email -+import mimetypes - import os -+import six - import stat --import urllib --import urllib2 --import rfc822 -+from six.moves import urllib - - DEBUG = None - --try: -- from cStringIO import StringIO --except ImportError, msg: -- from StringIO import StringIO -+from io import StringIO - - class RangeError(IOError): - """Error raised when an unsatisfiable range is requested.""" - pass - --class HTTPRangeHandler(urllib2.BaseHandler): -+class HTTPRangeHandler(urllib.request.BaseHandler): - """Handler that enables HTTP Range headers. - - This was extremely simple. The Range header is a HTTP feature to -@@ -120,7 +118,7 @@ class RangeableFileObject: - in self.fo. This includes methods.""" - if hasattr(self.fo, name): - return getattr(self.fo, name) -- raise AttributeError, name -+ raise AttributeError(name) - - def tell(self): - """Return the position within the range. -@@ -211,37 +209,36 @@ class RangeableFileObject: - raise RangeError(9, 'Requested Range Not Satisfiable') - pos+= bufsize - --class FileRangeHandler(urllib2.FileHandler): -+class FileRangeHandler(urllib.request.FileHandler): - """FileHandler subclass that adds Range support. - This class handles Range headers exactly like an HTTP - server would. - """ - def open_local_file(self, req): -- import mimetypes -- import mimetools - host = req.get_host() - file = req.get_selector() -- localfile = urllib.url2pathname(file) -+ localfile = urllib.request.url2pathname(file) - stats = os.stat(localfile) - size = stats[stat.ST_SIZE] -- modified = rfc822.formatdate(stats[stat.ST_MTIME]) -+ modified = email.utils.formatdate(stats[stat.ST_MTIME]) - mtype = mimetypes.guess_type(file)[0] - if host: -- host, port = urllib.splitport(host) -+ host, port = urllib.parse.splitport(host) - if port or socket.gethostbyname(host) not in self.get_names(): -- raise urllib2.URLError('file not on local host') -+ raise urllib.error.URLError('file not on local host') - fo = open(localfile,'rb') - brange = req.headers.get('Range',None) - brange = range_header_to_tuple(brange) - assert brange != () - if brange: - (fb,lb) = brange -- if lb == '': lb = size -+ if lb == '': -+ lb = size - if fb < 0 or fb > size or lb > size: - raise RangeError(9, 'Requested Range Not Satisfiable') - size = (lb - fb) - fo = RangeableFileObject(fo, (fb,lb)) -- headers = mimetools.Message(StringIO( -+ headers = email.message.Message(StringIO( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - return urllib.addinfourl(fo, headers, 'file:'+file) -@@ -254,42 +251,39 @@ class FileRangeHandler(urllib2.FileHandler): - # follows: - # -- range support modifications start/end here - --from urllib import splitport, splituser, splitpasswd, splitattr, \ -- unquote, addclosehook, addinfourl - import ftplib - import socket - import sys --import mimetypes --import mimetools -+from six.moves.urllib.parse import urlparse, unquote -+ -+# Very old functions and classes, undocumented in current Python releases -+if six.PY3: -+ from urllib.request import splitattr -+ from urllib.response import addinfourl -+else: -+ from urllib import splitattr -+ from urllib import addinfourl - --class FTPRangeHandler(urllib2.FTPHandler): -+ -+class FTPRangeHandler(urllib.request.FTPHandler): - def ftp_open(self, req): - host = req.get_host() - if not host: -- raise IOError, ('ftp error', 'no host given') -- host, port = splitport(host) -- if port is None: -- port = ftplib.FTP_PORT -- else: -- port = int(port) -+ raise IOError('ftp error', 'no host given') - -- # username/password handling -- user, host = splituser(host) -- if user: -- user, passwd = splitpasswd(user) -- else: -- passwd = None -+ parsed = urlparse(host) -+ port = parsed.port or ftplib.FTP_PORT -+ user = unquote(parsed.username or '') -+ passwd = unquote(parsed.passwd or '') - host = unquote(host) -- user = unquote(user or '') -- passwd = unquote(passwd or '') - - try: - host = socket.gethostbyname(host) -- except socket.error, msg: -- raise urllib2.URLError(msg) -+ except socket.error as msg: -+ raise urllib.error.URLError(msg) - path, attrs = splitattr(req.get_selector()) - dirs = path.split('/') -- dirs = map(unquote, dirs) -+ dirs = list(map(unquote, dirs)) - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: - dirs = dirs[1:] -@@ -336,24 +330,36 @@ class FTPRangeHandler(urllib2.FTPHandler): - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - sf = StringIO(headers) -- headers = mimetools.Message(sf) -+ headers = email.message.Message(sf) - return addinfourl(fp, headers, req.get_full_url()) -- except ftplib.all_errors, msg: -- raise IOError, ('ftp error', msg), sys.exc_info()[2] -+ except ftplib.all_errors as msg: -+ error = IOError('ftp error', msg) -+ six.reraise(error.__class__, error, sys.exc_info()[2]) - - def connect_ftp(self, user, passwd, host, port, dirs): - fw = ftpwrapper(user, passwd, host, port, dirs) - return fw - --class ftpwrapper(urllib.ftpwrapper): -+# Very old functions and classes, undocumented in current Python releases -+if six.PY3: -+ from urllib.request import ftpwrapper, addclosehook -+else: -+ from urllib import ftpwrapper, addclosehook -+ -+ -+class ftpwrapper(ftpwrapper): - # range support note: - # this ftpwrapper code is copied directly from - # urllib. The only enhancement is to add the rest - # argument and pass it on to ftp.ntransfercmd - def retrfile(self, file, type, rest=None): - self.endtransfer() -- if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 -- else: cmd = 'TYPE ' + type; isdir = 0 -+ if type in ('d', 'D'): -+ cmd = 'TYPE A' -+ isdir = 1 -+ else: -+ cmd = 'TYPE ' + type -+ isdir = 0 - try: - self.ftp.voidcmd(cmd) - except ftplib.all_errors: -@@ -364,22 +370,23 @@ class ftpwrapper(urllib.ftpwrapper): - # Use nlst to see if the file exists at all - try: - self.ftp.nlst(file) -- except ftplib.error_perm, reason: -- raise IOError, ('ftp error', reason), sys.exc_info()[2] -+ except ftplib.error_perm as reason: -+ error = IOError('ftp error', reason) -+ six.reraise(error.__class__, error, sys.exc_info()[2]) - # Restore the transfer mode! - self.ftp.voidcmd(cmd) - # Try to retrieve as a file - try: - cmd = 'RETR ' + file - conn = self.ftp.ntransfercmd(cmd, rest) -- except ftplib.error_perm, reason: -+ except ftplib.error_perm as reason: - if str(reason)[:3] == '501': - # workaround for REST not supported error - fp, retrlen = self.retrfile(file, type) - fp = RangeableFileObject(fp, (rest,'')) - return (fp, retrlen) - elif str(reason)[:3] != '550': -- raise IOError, ('ftp error', reason), sys.exc_info()[2] -+ six.reraise(IOError, ('ftp error', reason), sys.exc_info()[2]) - if not conn: - # Set transfer mode to ASCII! - self.ftp.voidcmd('TYPE A') -@@ -458,6 +465,7 @@ def range_tuple_normalize(range_tup): - # check if range is over the entire file - if (fb,lb) == (0,''): return None - # check that the range is valid -- if lb < fb: raise RangeError(9, 'Invalid byte range: %s-%s' % (fb,lb)) -+ if lb != '' and lb < fb: -+ raise RangeError(9, 'Invalid byte range: %s-%s' % (fb, lb)) - return (fb,lb) - -diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index f8deeb8..35c091e 100644 ---- a/urlgrabber/grabber.py -+++ b/urlgrabber/grabber.py -@@ -499,22 +499,24 @@ BANDWIDTH THROTTLING - - import os - import sys --import urlparse - import time -+import collections -+import fcntl -+import pycurl -+import select -+import six -+import socket -+import stat - import string --import urllib --import urllib2 --from httplib import responses --import mimetools --import thread - import types --import stat --import pycurl -+from email.message import Message - from ftplib import parse150 --from StringIO import StringIO --from httplib import HTTPException --import socket, select, fcntl --from byterange import range_tuple_normalize, range_tuple_to_header, RangeError -+from six.moves import _thread as thread -+from six.moves import urllib -+from six.moves.http_client import responses, HTTPException -+from urlgrabber.byterange import range_tuple_normalize, range_tuple_to_header, RangeError -+ -+from io import StringIO - - try: - import xattr -@@ -535,7 +537,7 @@ except: - try: - # this part isn't going to do much - need to talk to gettext - from i18n import _ --except ImportError, msg: -+except ImportError as msg: - def _(st): return st - - ######################################################################## -@@ -635,6 +637,8 @@ def _(st): - - def _to_utf8(obj, errors='replace'): - '''convert 'unicode' to an encoded utf-8 byte string ''' -+ if six.PY3: -+ return obj - # stolen from yum.i18n - if isinstance(obj, unicode): - obj = obj.encode('utf-8', errors) -@@ -791,14 +795,14 @@ class URLParser: - if opts.prefix: - url = self.add_prefix(url, opts.prefix) - -- parts = urlparse.urlparse(url) -+ parts = urllib.parse.urlparse(url) - (scheme, host, path, parm, query, frag) = parts - -- if not scheme or (len(scheme) == 1 and scheme in string.letters): -+ if not scheme or (len(scheme) == 1 and scheme in string.ascii_letters): - # if a scheme isn't specified, we guess that it's "file:" - if url[0] not in '/\\': url = os.path.abspath(url) -- url = 'file:' + urllib.pathname2url(url) -- parts = urlparse.urlparse(url) -+ url = 'file:' + urllib.request.pathname2url(url) -+ parts = urllib.parse.urlparse(url) - quote = 0 # pathname2url quotes, so we won't do it again - - if scheme in ['http', 'https']: -@@ -809,7 +813,7 @@ class URLParser: - if quote: - parts = self.quote(parts) - -- url = urlparse.urlunparse(parts) -+ url = urllib.parse.urlunparse(parts) - return url, parts - - def add_prefix(self, url, prefix): -@@ -833,7 +837,7 @@ class URLParser: - passing into urlgrabber. - """ - (scheme, host, path, parm, query, frag) = parts -- path = urllib.quote(path) -+ path = urllib.parse.quote(path) - return (scheme, host, path, parm, query, frag) - - hexvals = '0123456789ABCDEF' -@@ -850,7 +854,7 @@ class URLParser: - (scheme, host, path, parm, query, frag) = parts - if ' ' in path: - return 1 -- ind = string.find(path, '%') -+ ind = path.find('%') - if ind > -1: - while ind > -1: - if len(path) < ind+3: -@@ -859,7 +863,7 @@ class URLParser: - if code[0] not in self.hexvals or \ - code[1] not in self.hexvals: - return 1 -- ind = string.find(path, '%', ind+1) -+ ind = path.find('%', ind+1) - return 0 - return 1 - -@@ -879,13 +883,13 @@ class URLGrabberOptions: - def __getattr__(self, name): - if self.delegate and hasattr(self.delegate, name): - return getattr(self.delegate, name) -- raise AttributeError, name -+ raise AttributeError(name) - - def raw_throttle(self): - """Calculate raw throttle value from throttle and bandwidth - values. - """ -- if self.throttle <= 0: -+ if self.throttle is None or self.throttle <= 0: - return 0 - elif type(self.throttle) == type(0): - return float(self.throttle) -@@ -937,7 +941,7 @@ class URLGrabberOptions: - def _set_attributes(self, **kwargs): - """Update object attributes with those provided in kwargs.""" - self.__dict__.update(kwargs) -- if kwargs.has_key('range'): -+ if 'range' in kwargs: - # normalize the supplied range value - self.range = range_tuple_normalize(self.range) - if not self.reget in [None, 'simple', 'check_timestamp']: -@@ -1006,7 +1010,7 @@ class URLGrabberOptions: - return self.format() - - def format(self, indent=' '): -- keys = self.__dict__.keys() -+ keys = list(self.__dict__.keys()) - if self.delegate is not None: - keys.remove('delegate') - keys.sort() -@@ -1026,7 +1030,7 @@ def _do_raise(obj): - def _run_callback(cb, obj): - if not cb: - return -- if callable(cb): -+ if isinstance(cb, collections.Callable): - return cb(obj) - cb, arg, karg = cb - return cb(obj, *arg, **karg) -@@ -1058,16 +1062,15 @@ class URLGrabber(object): - tries = tries + 1 - exception = None - callback = None -- if DEBUG: DEBUG.info('attempt %i/%s: %s', -- tries, opts.retry, args[0]) -+ if DEBUG: DEBUG.info('attempt %i/%s: %s', tries, opts.retry, args[0]) - try: -- r = apply(func, (opts,) + args, {}) -+ r = func(*(opts,) + args, **{}) - if DEBUG: DEBUG.info('success') - return r -- except URLGrabError, e: -+ except URLGrabError as e: - exception = e - callback = opts.failure_callback -- except KeyboardInterrupt, e: -+ except KeyboardInterrupt as e: - exception = e - callback = opts.interrupt_callback - if not callback: -@@ -1082,13 +1085,13 @@ class URLGrabber(object): - - if (opts.retry is None) or (tries == opts.retry): - if DEBUG: DEBUG.info('retries exceeded, re-raising') -- raise -+ raise exception - - retrycode = getattr(exception, 'errno', None) - if (retrycode is not None) and (retrycode not in opts.retrycodes): - if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising', - retrycode, opts.retrycodes) -- raise -+ raise exception - - def urlopen(self, url, opts=None, **kwargs): - """open the url and return a file object -@@ -1119,14 +1122,14 @@ class URLGrabber(object): - (scheme, host, path, parm, query, frag) = parts - opts.find_proxy(url, scheme) - if filename is None: -- filename = os.path.basename( urllib.unquote(path) ) -+ filename = os.path.basename( urllib.parse.unquote(path) ) - if not filename: - # This is better than nothing. - filename = 'index.html' - if scheme == 'file' and not opts.copy_local: - # just return the name of the local file - don't make a - # copy currently -- path = urllib.url2pathname(path) -+ path = urllib.request.url2pathname(path) - if host: - path = os.path.normpath('//' + host + path) - if not os.path.exists(path): -@@ -1170,7 +1173,7 @@ class URLGrabber(object): - - try: - return self._retry(opts, retryfunc, url, filename) -- except URLGrabError, e: -+ except URLGrabError as e: - _TH.update(url, 0, 0, e) - opts.exception = e - return _run_callback(opts.failfunc, opts) -@@ -1219,7 +1222,7 @@ class URLGrabber(object): - - def _make_callback(self, callback_obj): - # not used, left for compatibility -- if callable(callback_obj): -+ if isinstance(callback_obj, collections.Callable): - return callback_obj, (), {} - else: - return callback_obj -@@ -1235,13 +1238,13 @@ class PyCurlFileObject(object): - self._hdr_dump = '' - self._parsed_hdr = None - self.url = url -- self.scheme = urlparse.urlsplit(self.url)[0] -+ self.scheme = urllib.parse.urlsplit(self.url)[0] - self.filename = filename - self.append = False - self.reget_time = None - self.opts = opts - if self.opts.reget == 'check_timestamp': -- raise NotImplementedError, "check_timestamp regets are not implemented in this ver of urlgrabber. Please report this." -+ raise NotImplementedError("check_timestamp regets are not implemented in this ver of urlgrabber. Please report this.") - self._complete = False - self._rbuf = '' - self._rbufsize = 1024*8 -@@ -1266,7 +1269,7 @@ class PyCurlFileObject(object): - - if hasattr(self.fo, name): - return getattr(self.fo, name) -- raise AttributeError, name -+ raise AttributeError(name) - - def _retrieve(self, buf): - try: -@@ -1280,7 +1283,7 @@ class PyCurlFileObject(object): - if self.opts.progress_obj: - size = self.size + self._reget_length - self.opts.progress_obj.start(self._prog_reportname, -- urllib.unquote(self.url), -+ urllib.parse.unquote(self.url), - self._prog_basename, - size=size, - text=self.opts.text) -@@ -1295,10 +1298,16 @@ class PyCurlFileObject(object): - start = self._range[0] - pos - stop = self._range[1] - pos - if start < len(buf) and stop > 0: -- self.fo.write(buf[max(start, 0):stop]) -+ if not six.PY3 or isinstance(self.fo, StringIO): -+ self.fo.write(buf[max(start, 0):stop].decode('utf-8')) -+ else: -+ self.fo.write(buf[max(start, 0):stop]) - else: -- self.fo.write(buf) -- except IOError, e: -+ if not six.PY3 or isinstance(self.fo, StringIO): -+ self.fo.write(buf.decode('utf-8')) -+ else: -+ self.fo.write(buf) -+ except IOError as e: - self._cb_error = URLGrabError(16, exception2msg(e)) - return -1 - return len(buf) -@@ -1319,10 +1328,12 @@ class PyCurlFileObject(object): - # but we can't do that w/o making it do 2 connects, which sucks - # so we cheat and stuff it in here in the hdr_retrieve - if self.scheme in ['http','https']: -- if buf.lower().find('content-length:') != -1: -- length = buf.split(':')[1] -+ content_length_str = 'content-length:' if not six.PY3 else b'content-length:' -+ if buf.lower().find(content_length_str) != -1: -+ split_str = ':' if not six.PY3 else b':' -+ length = buf.split(split_str)[1] - self.size = int(length) -- elif (self.append or self.opts.range) and self._hdr_dump == '' and ' 200 ' in buf: -+ elif (self.append or self.opts.range) and self._hdr_dump == '' and b' 200 ' in buf: - # reget was attempted but server sends it all - # undo what we did in _build_range() - self.append = False -@@ -1333,23 +1344,26 @@ class PyCurlFileObject(object): - self.fo.truncate(0) - elif self.scheme in ['ftp']: - s = None -- if buf.startswith('213 '): -+ if buf.startswith(b'213 '): - s = buf[3:].strip() - if len(s) >= 14: - s = None # ignore MDTM responses -- elif buf.startswith('150 '): -- s = parse150(buf) -+ elif buf.startswith(b'150 '): -+ s = parse150(buf if not six.PY3 else buf.decode('utf-8')) - if s: - self.size = int(s) - -- if buf.lower().find('location') != -1: -- location = ':'.join(buf.split(':')[1:]) -+ location_str = 'location' if not six.PY3 else b'location' -+ if buf.lower().find(location_str) != -1: -+ buf_compat = buf if not six.PY3 else buf.decode('utf-8') -+ location = ':'.join(buf_compat.split(':')[1:]) - location = location.strip() -- self.scheme = urlparse.urlsplit(location)[0] -+ self.scheme = urllib.parse.urlsplit(location)[0] - self.url = location - -- self._hdr_dump += buf -- if len(self._hdr_dump) != 0 and buf == '\r\n': -+ self._hdr_dump += buf if not six.PY3 else buf.decode('utf-8') -+ end_str = '\r\n' if not six.PY3 else b'\r\n' -+ if len(self._hdr_dump) != 0 and buf == end_str: - self._hdr_ended = True - if DEBUG: DEBUG.debug('header ended:') - -@@ -1365,7 +1379,7 @@ class PyCurlFileObject(object): - hdrfp = StringIO() - hdrfp.write(self._hdr_dump[statusend:]) - hdrfp.seek(0) -- self._parsed_hdr = mimetools.Message(hdrfp) -+ self._parsed_hdr = Message(hdrfp) - return self._parsed_hdr - - hdr = property(_return_hdr_obj) -@@ -1490,7 +1504,7 @@ class PyCurlFileObject(object): - - try: - self.curl_obj.perform() -- except pycurl.error, e: -+ except pycurl.error as e: - # XXX - break some of these out a bit more clearly - # to other URLGrabErrors from - # http://curl.haxx.se/libcurl/c/libcurl-errors.html -@@ -1498,7 +1512,7 @@ class PyCurlFileObject(object): - - code = self.http_code - errcode = e.args[0] -- errurl = urllib.unquote(self.url) -+ errurl = urllib.parse.unquote(self.url) - - if self._error[0]: - errcode = self._error[0] -@@ -1588,7 +1602,7 @@ class PyCurlFileObject(object): - if self._error[1]: - msg = self._error[1] - err = URLGrabError(14, msg) -- err.url = urllib.unquote(self.url) -+ err.url = urllib.parse.unquote(self.url) - raise err - - def _do_open(self): -@@ -1605,7 +1619,7 @@ class PyCurlFileObject(object): - def _build_range(self): - reget_length = 0 - rt = None -- if self.opts.reget and type(self.filename) in types.StringTypes: -+ if self.opts.reget and type(self.filename) in (type(str()), six.text_type): - # we have reget turned on and we're dumping to a file - try: - s = os.stat(self.filename) -@@ -1655,22 +1669,22 @@ class PyCurlFileObject(object): - else: - fo = opener.open(req) - hdr = fo.info() -- except ValueError, e: -+ except ValueError as e: - err = URLGrabError(1, _('Bad URL: %s : %s') % (self.url, e, )) - err.url = self.url - raise err - -- except RangeError, e: -+ except RangeError as e: - err = URLGrabError(9, _('%s on %s') % (e, self.url)) - err.url = self.url - raise err -- except urllib2.HTTPError, e: -+ except urllib.error.HTTPError as e: - new_e = URLGrabError(14, _('%s on %s') % (e, self.url)) - new_e.code = e.code - new_e.exception = e - new_e.url = self.url - raise new_e -- except IOError, e: -+ except IOError as e: - if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout): - err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) - err.url = self.url -@@ -1680,12 +1694,12 @@ class PyCurlFileObject(object): - err.url = self.url - raise err - -- except OSError, e: -+ except OSError as e: - err = URLGrabError(5, _('%s on %s') % (e, self.url)) - err.url = self.url - raise err - -- except HTTPException, e: -+ except HTTPException as e: - err = URLGrabError(7, _('HTTP Exception (%s) on %s: %s') % \ - (e.__class__.__name__, self.url, e)) - err.url = self.url -@@ -1700,19 +1714,21 @@ class PyCurlFileObject(object): - if self._complete: - return - _was_filename = False -- if type(self.filename) in types.StringTypes and self.filename: -+ if self.filename and type(self.filename) in (type(str()), six.text_type): - _was_filename = True - self._prog_reportname = str(self.filename) - self._prog_basename = os.path.basename(self.filename) - -- if self.append: mode = 'ab' -- else: mode = 'wb' -+ if self.append: -+ mode = 'ab' -+ else: -+ mode = 'wb' - -- if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \ -- (self.filename, mode)) -+ if DEBUG: -+ DEBUG.info('opening local file "%s" with mode %s' % (self.filename, mode)) - try: - self.fo = open(self.filename, mode) -- except IOError, e: -+ except IOError as e: - err = URLGrabError(16, _(\ - 'error opening local file from %s, IOError: %s') % (self.url, e)) - err.url = self.url -@@ -1731,7 +1747,7 @@ class PyCurlFileObject(object): - - try: - self._do_perform() -- except URLGrabError, e: -+ except URLGrabError as e: - self.fo.flush() - self.fo.close() - raise e -@@ -1754,7 +1770,7 @@ class PyCurlFileObject(object): - if mod_time != -1: - try: - os.utime(self.filename, (mod_time, mod_time)) -- except OSError, e: -+ except OSError as e: - err = URLGrabError(16, _(\ - 'error setting timestamp on file %s from %s, OSError: %s') - % (self.filename, self.url, e)) -@@ -1763,7 +1779,7 @@ class PyCurlFileObject(object): - # re open it - try: - self.fo = open(self.filename, 'r') -- except IOError, e: -+ except IOError as e: - err = URLGrabError(16, _(\ - 'error opening file from %s, IOError: %s') % (self.url, e)) - err.url = self.url -@@ -1809,25 +1825,27 @@ class PyCurlFileObject(object): - else: readamount = min(amt, self._rbufsize) - try: - new = self.fo.read(readamount) -- except socket.error, e: -+ except socket.error as e: - err = URLGrabError(4, _('Socket Error on %s: %s') % (self.url, e)) - err.url = self.url - raise err - -- except socket.timeout, e: -+ except socket.timeout as e: - raise URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) - err.url = self.url - raise err - -- except IOError, e: -+ except IOError as e: - raise URLGrabError(4, _('IOError on %s: %s') %(self.url, e)) - err.url = self.url - raise err - - newsize = len(new) -- if not newsize: break # no more to read -+ if not newsize: -+ break # no more to read - -- if amt: amt = amt - newsize -+ if amt: -+ amt = amt - newsize - buf.append(new) - bufsize = bufsize + newsize - self._tsize = newsize -@@ -1835,7 +1853,7 @@ class PyCurlFileObject(object): - #if self.opts.progress_obj: - # self.opts.progress_obj.update(self._amount_read) - -- self._rbuf = string.join(buf, '') -+ self._rbuf = ''.join(buf) - return - - def _progress_update(self, download_total, downloaded, upload_total, uploaded): -@@ -1879,12 +1897,12 @@ class PyCurlFileObject(object): - if not self._complete: self._do_grab() - return self.fo.readline() - -- i = string.find(self._rbuf, '\n') -+ i = self._rbuf.find('\n') - while i < 0 and not (0 < limit <= len(self._rbuf)): - L = len(self._rbuf) - self._fill_buffer(L + self._rbufsize) - if not len(self._rbuf) > L: break -- i = string.find(self._rbuf, '\n', L) -+ i = self._rbuf.find('\n', L) - - if i < 0: i = len(self._rbuf) - else: i = i+1 -@@ -1968,9 +1986,9 @@ def _dumps(v): - if v is None: return 'None' - if v is True: return 'True' - if v is False: return 'False' -- if type(v) in (int, long, float): -+ if type(v) in six.integer_types + (float,): - return str(v) -- if type(v) == unicode: -+ if not six.PY3 and type(v) == unicode: - v = v.encode('UTF8') - if type(v) == str: - def quoter(c): return _quoter_map.get(c, c) -@@ -1979,17 +1997,21 @@ def _dumps(v): - return "(%s)" % ','.join(map(_dumps, v)) - if type(v) == list: - return "[%s]" % ','.join(map(_dumps, v)) -- raise TypeError, 'Can\'t serialize %s' % v -+ raise TypeError('Can\'t serialize %s' % v) - - def _loads(s): - def decode(v): - if v == 'None': return None - if v == 'True': return True - if v == 'False': return False -- try: return int(v) -- except ValueError: pass -- try: return float(v) -- except ValueError: pass -+ try: -+ return int(v) -+ except ValueError: -+ pass -+ try: -+ return float(v) -+ except ValueError: -+ pass - if len(v) >= 2 and v[0] == v[-1] == "'": - ret = []; i = 1 - while True: -@@ -2033,9 +2055,11 @@ def _readlines(fd): - buf = os.read(fd, 4096) - if not buf: return None - # whole lines only, no buffering -- while buf[-1] != '\n': -+ buf_compat = buf if not six.PY3 else buf.decode('utf-8') -+ while buf_compat[-1] != '\n': - buf += os.read(fd, 4096) -- return buf[:-1].split('\n') -+ buf_compat = buf if not six.PY3 else buf.decode('utf-8') -+ return buf_compat[:-1].split('\n') - - import subprocess - -@@ -2071,7 +2095,8 @@ class _ExternalDownloader: - arg = [] - for k in self._options: - v = getattr(opts, k) -- if v is None: continue -+ if v is None: -+ continue - arg.append('%s=%s' % (k, _dumps(v))) - if opts.progress_obj and opts.multi_progress_obj: - arg.append('progress_obj=True') -@@ -2080,7 +2105,8 @@ class _ExternalDownloader: - - self.cnt += 1 - self.running[self.cnt] = opts -- os.write(self.stdin, arg +'\n') -+ result = arg +'\n' -+ os.write(self.stdin, result if not six.PY3 else result.encode('utf-8')) - - def perform(self): - ret = [] -@@ -2091,7 +2117,7 @@ class _ExternalDownloader: - for line in lines: - # parse downloader output - line = line.split(' ', 6) -- _id, size = map(int, line[:2]) -+ _id, size = list(map(int, line[:2])) - if len(line) == 2: - self.running[_id]._progress.update(size) - continue -@@ -2121,7 +2147,7 @@ class _ExternalDownloaderPool: - self.cache = {} - - def start(self, opts): -- host = urlparse.urlsplit(opts.url).netloc -+ host = urllib.parse.urlsplit(opts.url).netloc - dl = self.cache.pop(host, None) - if not dl: - dl = _ExternalDownloader() -@@ -2144,8 +2170,9 @@ class _ExternalDownloaderPool: - ret.extend(done) - - # dl finished, move it to the cache -- host = urlparse.urlsplit(done[0][0].url).netloc -- if host in self.cache: self.cache[host].abort() -+ host = urllib.parse.urlsplit(done[0][0].url).netloc -+ if host in self.cache: -+ self.cache[host].abort() - self.epoll.unregister(fd) - self.cache[host] = self.running.pop(fd) - return ret -@@ -2189,7 +2216,7 @@ def parallel_wait(meter=None): - opts.tries = tries - try: - dl.start(opts) -- except OSError, e: -+ except OSError as e: - # can't spawn downloader, give up immediately - opts.exception = URLGrabError(5, exception2msg(e)) - _run_callback(opts.failfunc, opts) -@@ -2212,7 +2239,8 @@ def parallel_wait(meter=None): - if ug_err is None: - if opts.checkfunc: - try: _run_callback(opts.checkfunc, opts) -- except URLGrabError, ug_err: pass -+ except URLGrabError: -+ pass - - if opts.progress_obj: - if opts.multi_progress_obj: -@@ -2242,8 +2270,9 @@ def parallel_wait(meter=None): - retry = opts.retry or 0 - if opts.failure_callback: - opts.exception = ug_err -- try: _run_callback(opts.failure_callback, opts) -- except URLGrabError, ug_err: -+ try: -+ _run_callback(opts.failure_callback, opts) -+ except URLGrabError: - retry = 0 # no retries - if opts.tries < retry and ug_err.errno in opts.retrycodes: - start(opts, opts.tries + 1) # simple retry -@@ -2293,8 +2322,7 @@ def parallel_wait(meter=None): - # check global limit - while len(dl.running) >= default_grabber.opts.max_connections: - perform() -- if DEBUG: -- DEBUG.info('max_connections: %d/%d', len(dl.running), default_grabber.opts.max_connections) -+ if DEBUG: DEBUG.info('max_connections: %d/%d', len(dl.running), default_grabber.opts.max_connections) - - if opts.mirror_group: - mg, errors, failed, removed = opts.mirror_group -@@ -2345,12 +2373,12 @@ def parallel_wait(meter=None): - limit = 1 - while host_con.get(key, 0) >= (limit or 2): - perform() -- if DEBUG: -- DEBUG.info('max_connections(%s): %d/%s', key, host_con.get(key, 0), limit) -+ if DEBUG: DEBUG.info('max_connections(%s): %d/%s', key, host_con.get(key, 0), limit) - - start(opts, 1) -- except IOError, e: -- if e.errno != 4: raise -+ except IOError as e: -+ if e.errno != 4: -+ raise - raise KeyboardInterrupt - - finally: -@@ -2399,7 +2427,7 @@ class _TH: - def update(url, dl_size, dl_time, ug_err, baseurl=None): - # Use hostname from URL. If it's a file:// URL, use baseurl. - # If no baseurl, do not update timedhosts. -- host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl -+ host = urllib.parse.urlsplit(url).netloc.split('@')[-1] or baseurl - if not host: return - - _TH.load() -@@ -2431,7 +2459,7 @@ class _TH: - _TH.load() - - # Use just the hostname, unless it's a file:// baseurl. -- host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl -+ host = urllib.parse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl - - default_speed = default_grabber.opts.default_speed - try: speed, fail, ts = _TH.hosts[host] -@@ -2447,68 +2475,67 @@ class _TH: - def _main_test(): - try: url, filename = sys.argv[1:3] - except ValueError: -- print 'usage:', sys.argv[0], \ -- ' [copy_local=0|1] [close_connection=0|1]' -+ print('usage:', sys.argv[0], \ -+ ' [copy_local=0|1] [close_connection=0|1]') - sys.exit() - - kwargs = {} - for a in sys.argv[3:]: -- k, v = string.split(a, '=', 1) -+ k, v = a.split('=', 1) - kwargs[k] = int(v) - - set_throttle(1.0) - set_bandwidth(32 * 1024) -- print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, -- default_grabber.bandwidth) -+ print("throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, -+ default_grabber.bandwidth)) - -- try: from progress import text_progress_meter -- except ImportError, e: pass -+ try: from .progress import text_progress_meter -+ except ImportError: pass - else: kwargs['progress_obj'] = text_progress_meter() - -- try: name = apply(urlgrab, (url, filename), kwargs) -- except URLGrabError, e: print e -- else: print 'LOCAL FILE:', name -+ try: name = urlgrab(*(url, filename), **kwargs) -+ except URLGrabError as e: print(e) -+ else: print('LOCAL FILE:', name) - - - def _retry_test(): - try: url, filename = sys.argv[1:3] - except ValueError: -- print 'usage:', sys.argv[0], \ -- ' [copy_local=0|1] [close_connection=0|1]' -+ print('usage:', sys.argv[0], \ -+ ' [copy_local=0|1] [close_connection=0|1]') - sys.exit() - - kwargs = {} - for a in sys.argv[3:]: -- k, v = string.split(a, '=', 1) -+ k, v = a.split('=', 1) - kwargs[k] = int(v) - -- try: from progress import text_progress_meter -- except ImportError, e: pass -+ try: from .progress import text_progress_meter -+ except ImportError: pass - else: kwargs['progress_obj'] = text_progress_meter() - - def cfunc(filename, hello, there='foo'): -- print hello, there -+ print(hello, there) - import random - rnum = random.random() - if rnum < .5: -- print 'forcing retry' -+ print('forcing retry') - raise URLGrabError(-1, 'forcing retry') - if rnum < .75: -- print 'forcing failure' -+ print('forcing failure') - raise URLGrabError(-2, 'forcing immediate failure') -- print 'success' -+ print('success') - return - - kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'}) -- try: name = apply(retrygrab, (url, filename), kwargs) -- except URLGrabError, e: print e -- else: print 'LOCAL FILE:', name -+ try: name = retrygrab(*(url, filename), **kwargs) -+ except URLGrabError as e: print(e) -+ else: print('LOCAL FILE:', name) - - def _file_object_test(filename=None): -- import cStringIO - if filename is None: - filename = __file__ -- print 'using file "%s" for comparisons' % filename -+ print('using file "%s" for comparisons' % filename) - fo = open(filename) - s_input = fo.read() - fo.close() -@@ -2517,14 +2544,13 @@ def _file_object_test(filename=None): - _test_file_object_readall, - _test_file_object_readline, - _test_file_object_readlines]: -- fo_input = cStringIO.StringIO(s_input) -- fo_output = cStringIO.StringIO() -+ fo_input = StringIO(s_input) -+ fo_output = StringIO() - wrapper = PyCurlFileObject(fo_input, None, 0) -- print 'testing %-30s ' % testfunc.__name__, -- testfunc(wrapper, fo_output) -+ print('testing %-30s ' % testfunc.__name__, testfunc(wrapper, fo_output)) - s_output = fo_output.getvalue() -- if s_output == s_input: print 'passed' -- else: print 'FAILED' -+ if s_output == s_input: print('passed') -+ else: print('FAILED') - - def _test_file_object_smallread(wrapper, fo_output): - while 1: -@@ -2544,7 +2570,7 @@ def _test_file_object_readline(wrapper, fo_output): - - def _test_file_object_readlines(wrapper, fo_output): - li = wrapper.readlines() -- fo_output.write(string.join(li, '')) -+ fo_output.write(''.join(li)) - - if __name__ == '__main__': - _main_test() -diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py -index 988a309..f3c2664 100644 ---- a/urlgrabber/mirror.py -+++ b/urlgrabber/mirror.py -@@ -92,13 +92,14 @@ CUSTOMIZATION - - - import sys -+import six - import random --import thread # needed for locking to make this threadsafe -+from six.moves import _thread as thread # needed for locking to make this threadsafe - --from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 --from grabber import _run_callback, _do_raise --from grabber import exception2msg --from grabber import _TH -+from urlgrabber.grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 -+from urlgrabber.grabber import _run_callback, _do_raise -+from urlgrabber.grabber import exception2msg -+from urlgrabber.grabber import _TH - - def _(st): - return st -@@ -286,7 +287,7 @@ class MirrorGroup: - def _parse_mirrors(self, mirrors): - parsed_mirrors = [] - for m in mirrors: -- if isinstance(m, basestring): -+ if isinstance(m, six.string_types): - m = {'mirror': _to_utf8(m)} - parsed_mirrors.append(m) - return parsed_mirrors -@@ -423,7 +424,7 @@ class MirrorGroup: - if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) - try: - return func_ref( *(fullurl,), opts=opts, **kw ) -- except URLGrabError, e: -+ except URLGrabError as e: - if DEBUG: DEBUG.info('MIRROR: failed') - gr.errors.append((fullurl, exception2msg(e))) - obj = CallbackObject() -@@ -446,7 +447,7 @@ class MirrorGroup: - func = 'urlgrab' - try: - return self._mirror_try(func, url, kw) -- except URLGrabError, e: -+ except URLGrabError as e: - obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs) - return _run_callback(kwargs.get('failfunc', _do_raise), obj) - -diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py -index 9b77c54..2235397 100644 ---- a/urlgrabber/progress.py -+++ b/urlgrabber/progress.py -@@ -19,9 +19,10 @@ - - - import sys -+import six - import time - import math --import thread -+from six.moves import _thread as thread - import fcntl - import struct - import termios -@@ -606,7 +607,7 @@ class TextMultiFileMeter(MultiFileMeter): - try: - format = "%-30.30s %6.6s %s" - fn = meter.text or meter.basename -- if type(message) in (type(''), type(u'')): -+ if type(message) in (type(''), type('')): - message = message.splitlines() - if not message: message = [''] - out = '%-79s' % (format % (fn, 'FAILED', message[0] or '')) -@@ -778,7 +779,7 @@ def format_number(number, SI=0, space=' '): - depth = depth + 1 - number = number / step - -- if type(number) == type(1) or type(number) == type(1L): -+ if type(number) in six.integer_types: - # it's an int or a long, which means it didn't get divided, - # which means it's already short enough - format = '%i%s%s' -@@ -806,7 +807,7 @@ def _tst(fn, cur, tot, beg, size, *args): - tm.end(size) - - def _mtst(datas, *args): -- print '-' * 79 -+ print('-' * 79) - tm = TextMultiFileMeter(threaded=False) - - dl_sizes = {} diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 353327a..13d78a6 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,22 +3,19 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.10.1 -Release: 5%{?dist} +Release: 6%{?dist} Source0: http://urlgrabber.baseurl.org/download/urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch Patch2: BZ-1051554-speed-on-404-mirror.patch -Patch3: port-to-python3.patch -Patch4: port-tests-to-python3.patch -Patch5: urlgrabber-stringio.patch License: LGPLv2+ Group: Development/Libraries BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root BuildArch: noarch -BuildRequires: python-devel, python-pycurl, python-six +BuildRequires: python-devel, python-pycurl Url: http://urlgrabber.baseurl.org/ Provides: urlgrabber = %{version}-%{release} -Requires: python-pycurl, python-six +Requires: python-pycurl %description A high-level cross-protocol url-grabber for python supporting HTTP, FTP @@ -29,9 +26,6 @@ authentication, proxies and more. %setup -q -n urlgrabber-%{version} %patch1 -p1 %patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 %build python setup.py build @@ -54,6 +48,9 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Fri Oct 03 2014 Valentina Mukhamedzhanova - 3.10.1-6 +- Revert porting to Python 3. + * Tue Sep 09 2014 Tomas Radej - 3.10.1-5 - Really fixed UTF behaviour diff --git a/urlgrabber-stringio.patch b/urlgrabber-stringio.patch deleted file mode 100644 index 0560a49..0000000 --- a/urlgrabber-stringio.patch +++ /dev/null @@ -1,433 +0,0 @@ -diff --git a/test/test_grabber.py b/test/test_grabber.py -index bd36d66..bd54329 100644 ---- a/test/test_grabber.py -+++ b/test/test_grabber.py -@@ -42,7 +42,7 @@ from urlgrabber.progress import text_progress_meter - class FileObjectTests(TestCase): - - def setUp(self): -- self.filename = tempfile.mktemp() -+ _, self.filename = tempfile.mkstemp() - fo = open(self.filename, 'wb') - fo.write(reference_data.encode('utf-8')) - fo.close() -@@ -61,35 +61,36 @@ class FileObjectTests(TestCase): - def test_readall(self): - "PYCurlFileObject .read() method" - s = self.wrapper.read() -- self.fo_output.write(s) -+ self.fo_output.write(unicode(s) if not six.PY3 else s) - self.assert_(reference_data == self.fo_output.getvalue()) - - def test_readline(self): - "PyCurlFileObject .readline() method" - while 1: - s = self.wrapper.readline() -- self.fo_output.write(s) -+ self.fo_output.write(unicode(s) if not six.PY3 else s) - if not s: break - self.assert_(reference_data == self.fo_output.getvalue()) - - def test_readlines(self): - "PyCurlFileObject .readlines() method" - li = self.wrapper.readlines() -- self.fo_output.write(''.join(li)) -+ out = ''.join(li) -+ self.fo_output.write(unicode(out) if not six.PY3 else out) - self.assert_(reference_data == self.fo_output.getvalue()) - - def test_smallread(self): - "PyCurlFileObject .read(N) with small N" - while 1: - s = self.wrapper.read(23) -- self.fo_output.write(s) -+ self.fo_output.write(unicode(s) if not six.PY3 else s) - if not s: break - self.assert_(reference_data == self.fo_output.getvalue()) - - class HTTPTests(TestCase): - def test_reference_file(self): - "download reference file via HTTP" -- filename = tempfile.mktemp() -+ _, filename = tempfile.mkstemp() - grabber.urlgrab(ref_http, filename) - - fo = open(filename, 'rb' if not six.PY3 else 'r') -@@ -123,7 +124,7 @@ class URLGrabberModuleTestCase(TestCase): - - def test_urlgrab(self): - "module-level urlgrab() function" -- outfile = tempfile.mktemp() -+ _, outfile = tempfile.mkstemp() - filename = urlgrabber.urlgrab('http://www.python.org', - filename=outfile) - os.unlink(outfile) -@@ -367,7 +368,7 @@ class CheckfuncTestCase(TestCase): - def setUp(self): - cf = (self._checkfunc, ('foo',), {'bar': 'baz'}) - self.g = grabber.URLGrabber(checkfunc=cf) -- self.filename = tempfile.mktemp() -+ _, self.filename = tempfile.mkstemp() - self.data = short_reference_data - - def tearDown(self): -@@ -440,7 +441,7 @@ class RegetTestBase: - def setUp(self): - self.ref = short_reference_data - self.grabber = grabber.URLGrabber(reget='check_timestamp') -- self.filename = tempfile.mktemp() -+ _, self.filename = tempfile.mkstemp() - self.hl = len(self.ref) / 2 - self.url = 'OVERRIDE THIS' - -@@ -522,7 +523,7 @@ class HTTPRegetTests(FTPRegetTests): - class FileRegetTests(HTTPRegetTests): - def setUp(self): - self.ref = short_reference_data -- tmp = tempfile.mktemp() -+ _, tmp = tempfile.mkstemp() - tmpfo = open(tmp, 'wb' if not six.PY3 else 'w') - tmpfo.write(self.ref) - tmpfo.close() -@@ -534,7 +535,7 @@ class FileRegetTests(HTTPRegetTests): - - self.grabber = grabber.URLGrabber(reget='check_timestamp', - copy_local=1) -- self.filename = tempfile.mktemp() -+ _, self.filename = tempfile.mkstemp() - self.hl = len(self.ref) / 2 - - def tearDown(self): -diff --git a/test/test_mirror.py b/test/test_mirror.py -index c46cd33..b923dd1 100644 ---- a/test/test_mirror.py -+++ b/test/test_mirror.py -@@ -50,7 +50,7 @@ class BasicTests(TestCase): - - def test_urlgrab(self): - """MirrorGroup.urlgrab""" -- filename = tempfile.mktemp() -+ _, filename = tempfile.mkstemp() - url = 'short_reference' - self.mg.urlgrab(url, filename) - -@@ -84,7 +84,7 @@ class SubclassTests(TestCase): - def fetchwith(self, mgclass): - self.mg = mgclass(self.g, self.fullmirrors) - -- filename = tempfile.mktemp() -+ _, filename = tempfile.mkstemp() - url = 'short_reference' - self.mg.urlgrab(url, filename) - -@@ -137,7 +137,7 @@ class BadMirrorTests(TestCase): - - def test_simple_grab(self): - """test that a bad mirror raises URLGrabError""" -- filename = tempfile.mktemp() -+ _, filename = tempfile.mkstemp() - url = 'reference' - self.assertRaises(URLGrabError, self.mg.urlgrab, url, filename) - -@@ -150,7 +150,7 @@ class FailoverTests(TestCase): - - def test_simple_grab(self): - """test that a the MG fails over past a bad mirror""" -- filename = tempfile.mktemp() -+ _, filename = tempfile.mkstemp() - url = 'reference' - elist = [] - def cb(e, elist=elist): elist.append(e) -diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py -index ffaed8e..95287fc 100644 ---- a/urlgrabber/byterange.py -+++ b/urlgrabber/byterange.py -@@ -27,7 +27,7 @@ from six.moves import urllib - - DEBUG = None - --from io import StringIO -+from io import BytesIO - - class RangeError(IOError): - """Error raised when an unsatisfiable range is requested.""" -@@ -238,8 +238,8 @@ class FileRangeHandler(urllib.request.FileHandler): - raise RangeError(9, 'Requested Range Not Satisfiable') - size = (lb - fb) - fo = RangeableFileObject(fo, (fb,lb)) -- headers = email.message.Message(StringIO( -- 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % -+ headers = email.message.Message(BytesIO( -+ b'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - return urllib.addinfourl(fo, headers, 'file:'+file) - -@@ -323,13 +323,13 @@ class FTPRangeHandler(urllib.request.FTPHandler): - fp = RangeableFileObject(fp, (0,retrlen)) - # -- range support modifications end here - -- headers = "" -+ headers = b"" - mtype = mimetypes.guess_type(req.get_full_url())[0] - if mtype: -- headers += "Content-Type: %s\n" % mtype -+ headers += b"Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: -- headers += "Content-Length: %d\n" % retrlen -- sf = StringIO(headers) -+ headers += b"Content-Length: %d\n" % retrlen -+ sf = BytesIO(headers) - headers = email.message.Message(sf) - return addinfourl(fp, headers, req.get_full_url()) - except ftplib.all_errors as msg: -diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index 35c091e..69cd113 100644 ---- a/urlgrabber/grabber.py -+++ b/urlgrabber/grabber.py -@@ -516,7 +516,7 @@ from six.moves import urllib - from six.moves.http_client import responses, HTTPException - from urlgrabber.byterange import range_tuple_normalize, range_tuple_to_header, RangeError - --from io import StringIO -+from io import BytesIO - - try: - import xattr -@@ -1235,7 +1235,7 @@ default_grabber = URLGrabber() - class PyCurlFileObject(object): - def __init__(self, url, filename, opts): - self.fo = None -- self._hdr_dump = '' -+ self._hdr_dump = b'' - self._parsed_hdr = None - self.url = url - self.scheme = urllib.parse.urlsplit(self.url)[0] -@@ -1246,7 +1246,7 @@ class PyCurlFileObject(object): - if self.opts.reget == 'check_timestamp': - raise NotImplementedError("check_timestamp regets are not implemented in this ver of urlgrabber. Please report this.") - self._complete = False -- self._rbuf = '' -+ self._rbuf = b'' - self._rbufsize = 1024*8 - self._ttime = time.time() - self._tsize = 0 -@@ -1298,15 +1298,9 @@ class PyCurlFileObject(object): - start = self._range[0] - pos - stop = self._range[1] - pos - if start < len(buf) and stop > 0: -- if not six.PY3 or isinstance(self.fo, StringIO): -- self.fo.write(buf[max(start, 0):stop].decode('utf-8')) -- else: -- self.fo.write(buf[max(start, 0):stop]) -+ self.fo.write(buf[max(start, 0):stop]) - else: -- if not six.PY3 or isinstance(self.fo, StringIO): -- self.fo.write(buf.decode('utf-8')) -- else: -- self.fo.write(buf) -+ self.fo.write(buf) - except IOError as e: - self._cb_error = URLGrabError(16, exception2msg(e)) - return -1 -@@ -1316,7 +1310,7 @@ class PyCurlFileObject(object): - - def _hdr_retrieve(self, buf): - if self._hdr_ended: -- self._hdr_dump = '' -+ self._hdr_dump = b'' - self.size = 0 - self._hdr_ended = False - -@@ -1328,12 +1322,12 @@ class PyCurlFileObject(object): - # but we can't do that w/o making it do 2 connects, which sucks - # so we cheat and stuff it in here in the hdr_retrieve - if self.scheme in ['http','https']: -- content_length_str = 'content-length:' if not six.PY3 else b'content-length:' -+ content_length_str = b'content-length:' - if buf.lower().find(content_length_str) != -1: -- split_str = ':' if not six.PY3 else b':' -+ split_str = b':' - length = buf.split(split_str)[1] - self.size = int(length) -- elif (self.append or self.opts.range) and self._hdr_dump == '' and b' 200 ' in buf: -+ elif (self.append or self.opts.range) and self._hdr_dump == b'' and b' 200 ' in buf: - # reget was attempted but server sends it all - # undo what we did in _build_range() - self.append = False -@@ -1349,20 +1343,19 @@ class PyCurlFileObject(object): - if len(s) >= 14: - s = None # ignore MDTM responses - elif buf.startswith(b'150 '): -- s = parse150(buf if not six.PY3 else buf.decode('utf-8')) -+ s = parse150(buf.decode('utf-8')) # Necessary in Python 3, doesn't hurt in Python 2 - if s: - self.size = int(s) - -- location_str = 'location' if not six.PY3 else b'location' -+ location_str = b'location' - if buf.lower().find(location_str) != -1: -- buf_compat = buf if not six.PY3 else buf.decode('utf-8') -- location = ':'.join(buf_compat.split(':')[1:]) -+ location = b':'.join(buf.split(b':')[1:]) - location = location.strip() - self.scheme = urllib.parse.urlsplit(location)[0] - self.url = location - -- self._hdr_dump += buf if not six.PY3 else buf.decode('utf-8') -- end_str = '\r\n' if not six.PY3 else b'\r\n' -+ self._hdr_dump += buf -+ end_str = b'\r\n' - if len(self._hdr_dump) != 0 and buf == end_str: - self._hdr_ended = True - if DEBUG: DEBUG.debug('header ended:') -@@ -1374,12 +1367,12 @@ class PyCurlFileObject(object): - def _return_hdr_obj(self): - if self._parsed_hdr: - return self._parsed_hdr -- statusend = self._hdr_dump.find('\n') -+ statusend = self._hdr_dump.find(b'\n') - statusend += 1 # ridiculous as it may seem. -- hdrfp = StringIO() -+ hdrfp = BytesIO() - hdrfp.write(self._hdr_dump[statusend:]) - hdrfp.seek(0) -- self._parsed_hdr = Message(hdrfp) -+ self._parsed_hdr = Message(hdrfp) - return self._parsed_hdr - - hdr = property(_return_hdr_obj) -@@ -1709,7 +1702,7 @@ class PyCurlFileObject(object): - return (fo, hdr) - - def _do_grab(self): -- """dump the file to a filename or StringIO buffer""" -+ """dump the file to a filename or BytesIO buffer""" - - if self._complete: - return -@@ -1739,7 +1732,7 @@ class PyCurlFileObject(object): - self._prog_basename = 'MEMORY' - - -- self.fo = StringIO() -+ self.fo = BytesIO() - # if this is to be a tempfile instead.... - # it just makes crap in the tempdir - #fh, self._temp_name = mkstemp() -@@ -1778,7 +1771,7 @@ class PyCurlFileObject(object): - raise err - # re open it - try: -- self.fo = open(self.filename, 'r') -+ self.fo = open(self.filename, 'rb') - except IOError as e: - err = URLGrabError(16, _(\ - 'error opening file from %s, IOError: %s') % (self.url, e)) -@@ -1853,7 +1846,7 @@ class PyCurlFileObject(object): - #if self.opts.progress_obj: - # self.opts.progress_obj.update(self._amount_read) - -- self._rbuf = ''.join(buf) -+ self._rbuf = b''.join(buf) - return - - def _progress_update(self, download_total, downloaded, upload_total, uploaded): -@@ -1888,28 +1881,40 @@ class PyCurlFileObject(object): - def read(self, amt=None): - self._fill_buffer(amt) - if amt is None: -- s, self._rbuf = self._rbuf, '' -+ s, self._rbuf = self._rbuf, b'' - else: - s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:] -- return s -+ return s if not six.PY3 else s.decode('utf-8') - - def readline(self, limit=-1): - if not self._complete: self._do_grab() -- return self.fo.readline() -+ return self.fo.readline() if not six.PY3 else self.fo.readline().decode('utf-8') - -- i = self._rbuf.find('\n') -+ i = self._rbuf.find(b'\n') - while i < 0 and not (0 < limit <= len(self._rbuf)): - L = len(self._rbuf) - self._fill_buffer(L + self._rbufsize) - if not len(self._rbuf) > L: break -- i = self._rbuf.find('\n', L) -+ i = self._rbuf.find(b'\n', L) - - if i < 0: i = len(self._rbuf) - else: i = i+1 - if 0 <= limit < len(self._rbuf): i = limit - - s, self._rbuf = self._rbuf[:i], self._rbuf[i:] -- return s -+ return s if not six.PY3 else s.decode('utf-8') -+ -+ # This was added here because we need to wrap self.fo readlines (which will -+ # always return bytes) in correct decoding -+ def readlines(self, *args, **kwargs): -+ if not six.PY3: -+ return [line for line in self.fo.readlines(*args, **kwargs)] -+ else: -+ return self._py3readlines(*args, **kwargs) -+ -+ def _py3readlines(self, *args, **kwargs): -+ for line in self.fo.readlines(*args, **kwargs): -+ yield line.decode('utf-8') - - def close(self): - if self._prog_running: -@@ -2055,11 +2060,9 @@ def _readlines(fd): - buf = os.read(fd, 4096) - if not buf: return None - # whole lines only, no buffering -- buf_compat = buf if not six.PY3 else buf.decode('utf-8') -- while buf_compat[-1] != '\n': -+ while buf.decode('utf-8')[-1] != '\n': - buf += os.read(fd, 4096) -- buf_compat = buf if not six.PY3 else buf.decode('utf-8') -- return buf_compat[:-1].split('\n') -+ return buf.decode('utf-8')[:-1].split('\n') - - import subprocess - -@@ -2403,7 +2406,7 @@ class _TH: - if filename and _TH.dirty is None: - try: - now = int(time.time()) -- for line in open(filename): -+ for line in open(filename, 'rb'): - host, speed, fail, ts = line.rsplit(' ', 3) - _TH.hosts[host] = int(speed), int(fail), min(int(ts), now) - except IOError: pass -@@ -2415,7 +2418,7 @@ class _TH: - if filename and _TH.dirty is True: - tmp = '%s.%d' % (filename, os.getpid()) - try: -- f = open(tmp, 'w') -+ f = open(tmp, 'wb') - for host in _TH.hosts: - f.write(host + ' %d %d %d\n' % _TH.hosts[host]) - f.close() -@@ -2536,7 +2539,7 @@ def _file_object_test(filename=None): - if filename is None: - filename = __file__ - print('using file "%s" for comparisons' % filename) -- fo = open(filename) -+ fo = open(filename, 'rb') - s_input = fo.read() - fo.close() - -@@ -2544,8 +2547,8 @@ def _file_object_test(filename=None): - _test_file_object_readall, - _test_file_object_readline, - _test_file_object_readlines]: -- fo_input = StringIO(s_input) -- fo_output = StringIO() -+ fo_input = BytesIO(s_input) -+ fo_output = BytesIO() - wrapper = PyCurlFileObject(fo_input, None, 0) - print('testing %-30s ' % testfunc.__name__, testfunc(wrapper, fo_output)) - s_output = fo_output.getvalue()