diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 43666d9..fa7dc7d 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,7 +3,7 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.9.1 -Release: 25%{?dist} +Release: 26%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch @@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Wed Mar 27 2013 Zdenek Pavlas - 3.9.1-26 +- Update to latest HEAD. +- Handle HTTP 200 response to range requests correctly. BZ 919076 +- Reset curl_obj to clear CURLOPT_RANGE from previous requests. BZ 923951 + * Thu Mar 7 2013 Zdeněk Pavlas - 3.9.1-25 - Update to latest HEAD. - fix some test cases that were failing. BZ 918658 diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch index 4633455..8947982 100644 --- a/urlgrabber-HEAD.patch +++ b/urlgrabber-HEAD.patch @@ -275,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644 return (fb,lb) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index e090e90..1afb2c5 100644 +index e090e90..9526dc1 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs) @@ -874,7 +874,7 @@ index e090e90..1afb2c5 100644 if not self._prog_running: if self.opts.progress_obj: size = self.size + self._reget_length -@@ -1079,15 +1267,24 @@ class PyCurlFileObject(): +@@ -1079,23 +1267,40 @@ class PyCurlFileObject(): self.opts.progress_obj.update(self._amount_read) self._amount_read += len(buf) @@ -899,9 +899,29 @@ index e090e90..1afb2c5 100644 - return -1 + return -1 try: - self._hdr_dump += buf +- self._hdr_dump += buf # we have to get the size before we do the progress obj start -@@ -1104,7 +1301,17 @@ class PyCurlFileObject(): + # but we can't do that w/o making it do 2 connects, which sucks + # so we cheat and stuff it in here in the hdr_retrieve +- if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1: +- length = buf.split(':')[1] +- self.size = int(length) ++ if self.scheme in ['http','https']: ++ if buf.lower().find('content-length') != -1: ++ length = buf.split(':')[1] ++ self.size = int(length) ++ elif self.append and self._hdr_dump == '' and ' 200 ' in buf: ++ # reget was attempted but server sends it all ++ # undo what we did in _build_range() ++ self.append = False ++ self.reget_time = None ++ self._amount_read = 0 ++ self._reget_length = 0 ++ self.fo.truncate(0) + elif self.scheme in ['ftp']: + s = None + if buf.startswith('213 '): +@@ -1104,7 +1309,18 @@ class PyCurlFileObject(): s = parse150(buf) if s: self.size = int(s) @@ -913,6 +933,7 @@ index e090e90..1afb2c5 100644 + self.scheme = urlparse.urlsplit(location)[0] + self.url = location + ++ self._hdr_dump += buf + if len(self._hdr_dump) != 0 and buf == '\r\n': + self._hdr_ended = True + if DEBUG: DEBUG.debug('header ended:') @@ -920,7 +941,7 @@ index e090e90..1afb2c5 100644 return len(buf) except KeyboardInterrupt: return pycurl.READFUNC_ABORT -@@ -1113,8 +1320,10 @@ class PyCurlFileObject(): +@@ -1113,8 +1329,10 @@ class PyCurlFileObject(): if self._parsed_hdr: return self._parsed_hdr statusend = self._hdr_dump.find('\n') @@ -931,7 +952,7 @@ index e090e90..1afb2c5 100644 self._parsed_hdr = mimetools.Message(hdrfp) return self._parsed_hdr -@@ -1127,6 +1336,9 @@ class PyCurlFileObject(): +@@ -1127,6 +1345,9 @@ class PyCurlFileObject(): if not opts: opts = self.opts @@ -941,7 +962,7 @@ index e090e90..1afb2c5 100644 # defaults we're always going to set self.curl_obj.setopt(pycurl.NOPROGRESS, False) -@@ -1136,11 +1348,21 @@ class PyCurlFileObject(): +@@ -1136,11 +1357,21 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update) self.curl_obj.setopt(pycurl.FAILONERROR, True) self.curl_obj.setopt(pycurl.OPT_FILETIME, True) @@ -964,7 +985,7 @@ index e090e90..1afb2c5 100644 # maybe to be options later self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) -@@ -1148,9 +1370,11 @@ class PyCurlFileObject(): +@@ -1148,9 +1379,11 @@ class PyCurlFileObject(): # timeouts timeout = 300 @@ -979,7 +1000,7 @@ index e090e90..1afb2c5 100644 # ssl options if self.scheme == 'https': -@@ -1158,13 +1382,16 @@ class PyCurlFileObject(): +@@ -1158,13 +1391,16 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert) self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert) self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer) @@ -997,7 +1018,7 @@ index e090e90..1afb2c5 100644 if opts.ssl_cert_type: self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) if opts.ssl_key_pass: -@@ -1187,28 +1414,28 @@ class PyCurlFileObject(): +@@ -1187,28 +1423,28 @@ class PyCurlFileObject(): if hasattr(opts, 'raw_throttle') and opts.raw_throttle(): self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle())) @@ -1044,7 +1065,7 @@ index e090e90..1afb2c5 100644 # our url self.curl_obj.setopt(pycurl.URL, self.url) -@@ -1228,39 +1455,26 @@ class PyCurlFileObject(): +@@ -1228,39 +1464,26 @@ class PyCurlFileObject(): code = self.http_code errcode = e.args[0] @@ -1090,7 +1111,7 @@ index e090e90..1afb2c5 100644 # this is probably wrong but ultimately this is what happens # we have a legit http code and a pycurl 'writer failed' code # which almost always means something aborted it from outside -@@ -1269,40 +1483,76 @@ class PyCurlFileObject(): +@@ -1269,36 +1492,70 @@ class PyCurlFileObject(): # figure out what aborted the pycurl process FIXME raise KeyboardInterrupt @@ -1186,14 +1207,7 @@ index e090e90..1afb2c5 100644 def _do_open(self): self.curl_obj = _curl_cache -- self.curl_obj.reset() # reset all old settings away, just in case -+ # reset() clears PYCURL_ERRORBUFFER, and there's no way -+ # to reinitialize it, so better don't do that. BZ 896025 -+ #self.curl_obj.reset() # reset all old settings away, just in case - # setup any ranges - self._set_opts() - self._do_grab() -@@ -1333,7 +1583,11 @@ class PyCurlFileObject(): +@@ -1333,7 +1590,11 @@ class PyCurlFileObject(): if self.opts.range: rt = self.opts.range @@ -1206,7 +1220,7 @@ index e090e90..1afb2c5 100644 if rt: header = range_tuple_to_header(rt) -@@ -1434,21 +1688,46 @@ class PyCurlFileObject(): +@@ -1434,21 +1695,46 @@ class PyCurlFileObject(): #fh, self._temp_name = mkstemp() #self.fo = open(self._temp_name, 'wb') @@ -1260,7 +1274,7 @@ index e090e90..1afb2c5 100644 else: #self.fo = open(self._temp_name, 'r') self.fo.seek(0) -@@ -1526,17 +1805,20 @@ class PyCurlFileObject(): +@@ -1526,17 +1812,20 @@ class PyCurlFileObject(): if self._prog_running: downloaded += self._reget_length self.opts.progress_obj.update(downloaded) @@ -1286,7 +1300,7 @@ index e090e90..1afb2c5 100644 msg = _("Downloaded more than max size for %s: %s > %s") \ % (self.url, cur, max_size) -@@ -1544,13 +1826,6 @@ class PyCurlFileObject(): +@@ -1544,13 +1833,6 @@ class PyCurlFileObject(): return True return False @@ -1300,7 +1314,7 @@ index e090e90..1afb2c5 100644 def read(self, amt=None): self._fill_buffer(amt) if amt is None: -@@ -1582,9 +1857,21 @@ class PyCurlFileObject(): +@@ -1582,9 +1864,21 @@ class PyCurlFileObject(): self.opts.progress_obj.end(self._amount_read) self.fo.close() @@ -1323,7 +1337,7 @@ index e090e90..1afb2c5 100644 ##################################################################### # DEPRECATED FUNCTIONS -@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, +@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, #####################################################################