From c508ad399b4837b823c9d12a980c6ecf1e7a65d7 Mon Sep 17 00:00:00 2001 From: Zdenek Pavlas Date: Mar 07 2013 12:28:20 +0000 Subject: - Update to latest HEAD. - fix some test cases that were failing. BZ 918658 - exit(1) or /bin/urlgrabber failures. BZ 918613 - clamp timestamps from the future. BZ 894630 - enable GSSNEGOTIATE if implemented correctly. - make error messages more verbose. --- diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 6ba1d8d..43666d9 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,7 +3,7 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.9.1 -Release: 24%{?dist} +Release: 25%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch @@ -44,6 +44,14 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Thu Mar 7 2013 Zdeněk Pavlas - 3.9.1-25 +- Update to latest HEAD. +- fix some test cases that were failing. BZ 918658 +- exit(1) or /bin/urlgrabber failures. BZ 918613 +- clamp timestamps from the future. BZ 894630 +- enable GSSNEGOTIATE if implemented correctly. +- make error messages more verbose. + * Thu Feb 14 2013 Fedora Release Engineering - 3.9.1-24 - Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch index aaf9cbc..4633455 100644 --- a/urlgrabber-HEAD.patch +++ b/urlgrabber-HEAD.patch @@ -12,7 +12,7 @@ index 0000000..1ffe416 +*.kateproject +ipython.log* diff --git a/scripts/urlgrabber b/scripts/urlgrabber -index 518e512..09cd896 100644 +index 518e512..07881b3 100644 --- a/scripts/urlgrabber +++ b/scripts/urlgrabber @@ -115,6 +115,7 @@ options: @@ -71,6 +71,14 @@ index 518e512..09cd896 100644 def help_doc(self): print __doc__ +@@ -294,6 +301,7 @@ class ugclient: + if self.op.localfile: print f + except URLGrabError, e: + print e ++ sys.exit(1) + + def set_debug_logger(self, dbspec): + try: diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down new file mode 100755 index 0000000..3dafb12 @@ -181,6 +189,37 @@ index 50c6348..5fb43f9 100644 base_ftp = 'ftp://localhost/test/' # set to a proftp server only. we're working around a couple of +diff --git a/test/test_mirror.py b/test/test_mirror.py +index 70fe069..cb63a41 100644 +--- a/test/test_mirror.py ++++ b/test/test_mirror.py +@@ -28,7 +28,7 @@ import os + import string, tempfile, random, cStringIO, os + + import urlgrabber.grabber +-from urlgrabber.grabber import URLGrabber, URLGrabError ++from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions + import urlgrabber.mirror + from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder + +@@ -106,6 +106,9 @@ class CallbackTests(TestCase): + self.g = URLGrabber() + fullmirrors = [base_mirror_url + m + '/' for m in \ + (bad_mirrors + good_mirrors)] ++ if hasattr(urlgrabber.grabber, '_TH'): ++ # test assumes mirrors are not re-ordered ++ urlgrabber.grabber._TH.hosts.clear() + self.mg = MirrorGroup(self.g, fullmirrors) + + def test_failure_callback(self): +@@ -168,6 +171,7 @@ class FakeGrabber: + self.resultlist = resultlist or [] + self.index = 0 + self.calls = [] ++ self.opts = URLGrabberOptions() + + def urlgrab(self, url, filename=None, **kwargs): + self.calls.append( (url, filename) ) diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py index 3e5f3b7..8eeaeda 100644 --- a/urlgrabber/byterange.py @@ -236,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644 return (fb,lb) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index e090e90..6ce9861 100644 +index e090e90..1afb2c5 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs) @@ -958,7 +997,7 @@ index e090e90..6ce9861 100644 if opts.ssl_cert_type: self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) if opts.ssl_key_pass: -@@ -1187,28 +1414,26 @@ class PyCurlFileObject(): +@@ -1187,28 +1414,28 @@ class PyCurlFileObject(): if hasattr(opts, 'raw_throttle') and opts.raw_throttle(): self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle())) @@ -982,9 +1021,11 @@ index e090e90..6ce9861 100644 + # proxy + if opts.proxy is not None: + self.curl_obj.setopt(pycurl.PROXY, opts.proxy) -+ self.curl_obj.setopt(pycurl.PROXYAUTH, -+ # All but Kerberos. BZ 769254 -+ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE) ++ auth = pycurl.HTTPAUTH_ANY ++ if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0): ++ # BZ 769254: work around a bug in curl < 7.28.0 ++ auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE ++ self.curl_obj.setopt(pycurl.PROXYAUTH, auth) + + if opts.username and opts.password: + if self.scheme in ('http', 'https'): @@ -1003,7 +1044,7 @@ index e090e90..6ce9861 100644 # our url self.curl_obj.setopt(pycurl.URL, self.url) -@@ -1228,39 +1453,36 @@ class PyCurlFileObject(): +@@ -1228,39 +1455,26 @@ class PyCurlFileObject(): code = self.http_code errcode = e.args[0] @@ -1029,20 +1070,17 @@ index e090e90..6ce9861 100644 elif errcode == 28: - err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) - err.url = self.url -+ err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e)) -+ err.url = errurl - raise err - elif errcode == 35: - msg = _("problem making ssl connection") - err = URLGrabError(14, msg) +- raise err +- elif errcode == 35: +- msg = _("problem making ssl connection") +- err = URLGrabError(14, msg) - err.url = self.url -+ err.url = errurl - raise err - elif errcode == 37: +- raise err +- elif errcode == 37: - msg = _("Could not open/read %s") % (self.url) -+ msg = _("Could not open/read %s") % (errurl) - err = URLGrabError(14, msg) +- err = URLGrabError(14, msg) - err.url = self.url ++ err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e)) + err.url = errurl raise err @@ -1052,48 +1090,16 @@ index e090e90..6ce9861 100644 # this is probably wrong but ultimately this is what happens # we have a legit http code and a pycurl 'writer failed' code # which almost always means something aborted it from outside -@@ -1272,33 +1494,94 @@ class PyCurlFileObject(): - elif errcode == 58: - msg = _("problem with the local client certificate") - err = URLGrabError(14, msg) -- err.url = self.url -+ err.url = errurl - raise err - - elif errcode == 60: -- msg = _("client cert cannot be verified or client cert incorrect") -+ msg = _("Peer cert cannot be verified or peer cert invalid") - err = URLGrabError(14, msg) -- err.url = self.url -+ err.url = errurl - raise err - - elif errcode == 63: - if self._error[1]: - msg = self._error[1] - else: -- msg = _("Max download size exceeded on %s") % (self.url) -+ msg = _("Max download size exceeded on %s") % () - err = URLGrabError(14, msg) +@@ -1269,40 +1483,76 @@ class PyCurlFileObject(): + # figure out what aborted the pycurl process FIXME + raise KeyboardInterrupt + +- elif errcode == 58: +- msg = _("problem with the local client certificate") +- err = URLGrabError(14, msg) - err.url = self.url -+ err.url = errurl - raise err - -- elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it -- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) -+ elif str(e.args[1]) == '' and code and not 200 <= code <= 299: -+ if self.scheme in ['http', 'https']: -+ if self.http_code in responses: -+ resp = responses[self.http_code] -+ msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl) -+ else: -+ msg = 'HTTP Error %s : %s ' % (self.http_code, errurl) -+ elif self.scheme in ['ftp']: -+ msg = 'FTP Error %s : %s ' % (self.http_code, errurl) -+ else: -+ msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme) - else: -- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1])) +- raise err ++ else: + pyerr2str = { 5 : _("Couldn't resolve proxy"), + 6 : _("Couldn't resolve host"), + 7 : _("Couldn't connect"), @@ -1137,25 +1143,57 @@ index e090e90..6ce9861 100644 + 70 : _("Out of disk space on server"), + 73 : _("Remove file exists"), + } -+ errstr = str(e.args[1]) -+ if not errstr: -+ errstr = pyerr2str.get(errcode, '') -+ msg = 'curl#%s - "%s"' % (errcode, errstr) - code = errcode - err = URLGrabError(14, msg) - err.code = code - err.exception = e - raise err ++ errstr = str(e.args[1]) or pyerr2str.get(errcode, '') ++ if code and not 200 <= code <= 299: ++ msg = '%s Error %d - %s' % (self.scheme.upper(), code, ++ self.scheme in ('http', 'https') ++ and responses.get(code) or errstr) ++ else: ++ msg = 'curl#%s - "%s"' % (errcode, errstr) ++ code = errcode + +- elif errcode == 60: +- msg = _("client cert cannot be verified or client cert incorrect") + err = URLGrabError(14, msg) +- err.url = self.url ++ err.url = errurl ++ err.code = code + raise err +- +- elif errcode == 63: +- if self._error[1]: +- msg = self._error[1] +- else: +- msg = _("Max download size exceeded on %s") % (self.url) ++ + else: + if self._error[1]: + msg = self._error[1] -+ err = URLGrabError(14, msg) + err = URLGrabError(14, msg) +- err.url = self.url + err.url = urllib.unquote(self.url) -+ raise err + raise err +- +- elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it +- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) +- else: +- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1])) +- code = errcode +- err = URLGrabError(14, msg) +- err.code = code +- err.exception = e +- raise err def _do_open(self): self.curl_obj = _curl_cache -@@ -1333,7 +1616,11 @@ class PyCurlFileObject(): +- self.curl_obj.reset() # reset all old settings away, just in case ++ # reset() clears PYCURL_ERRORBUFFER, and there's no way ++ # to reinitialize it, so better don't do that. BZ 896025 ++ #self.curl_obj.reset() # reset all old settings away, just in case + # setup any ranges + self._set_opts() + self._do_grab() +@@ -1333,7 +1583,11 @@ class PyCurlFileObject(): if self.opts.range: rt = self.opts.range @@ -1168,7 +1206,7 @@ index e090e90..6ce9861 100644 if rt: header = range_tuple_to_header(rt) -@@ -1434,21 +1721,46 @@ class PyCurlFileObject(): +@@ -1434,21 +1688,46 @@ class PyCurlFileObject(): #fh, self._temp_name = mkstemp() #self.fo = open(self._temp_name, 'wb') @@ -1222,7 +1260,7 @@ index e090e90..6ce9861 100644 else: #self.fo = open(self._temp_name, 'r') self.fo.seek(0) -@@ -1526,17 +1838,20 @@ class PyCurlFileObject(): +@@ -1526,17 +1805,20 @@ class PyCurlFileObject(): if self._prog_running: downloaded += self._reget_length self.opts.progress_obj.update(downloaded) @@ -1248,7 +1286,7 @@ index e090e90..6ce9861 100644 msg = _("Downloaded more than max size for %s: %s > %s") \ % (self.url, cur, max_size) -@@ -1544,13 +1859,6 @@ class PyCurlFileObject(): +@@ -1544,13 +1826,6 @@ class PyCurlFileObject(): return True return False @@ -1262,7 +1300,7 @@ index e090e90..6ce9861 100644 def read(self, amt=None): self._fill_buffer(amt) if amt is None: -@@ -1582,9 +1890,21 @@ class PyCurlFileObject(): +@@ -1582,9 +1857,21 @@ class PyCurlFileObject(): self.opts.progress_obj.end(self._amount_read) self.fo.close() @@ -1285,7 +1323,7 @@ index e090e90..6ce9861 100644 ##################################################################### # DEPRECATED FUNCTIONS -@@ -1621,6 +1941,478 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, +@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, ##################################################################### @@ -1727,6 +1765,8 @@ index e090e90..6ce9861 100644 + if ug_err is None: + # defer first update if the file was small. BZ 851178. + if not ts and dl_size < 1e6: return ++ # clamp timestamps from the future. BZ 894630. ++ if ts > now: ts = now + + # k1: the older, the less useful + # k2: <500ms readings are less reliable