diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index b1fbe51..1f6f8e5 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,9 +3,10 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 2.9.9 -Release: 1.1 +Release: 2 Source0: urlgrabber-%{version}.tar.gz Patch0: urlgrabber-read-error.patch +Patch1: urlgrabber-ssl-byterange-keepalive.patch License: LGPL Group: Development/Libraries BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot @@ -22,6 +23,7 @@ authentication, proxies and more. %prep %setup -n urlgrabber-%{version} %patch0 -p0 +%patch1 -p0 %build python setup.py build @@ -41,6 +43,9 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/urlgrabber %changelog +* Mon Jul 17 2006 James Bowes - 2.9.9-2 +- Add support for byte ranges and keepalive over HTTPS + * Wed Jul 12 2006 Jesse Keating - 2.9.9-1.1 - rebuild diff --git a/urlgrabber-ssl-byterange-keepalive.patch b/urlgrabber-ssl-byterange-keepalive.patch new file mode 100644 index 0000000..70f5e5c --- /dev/null +++ b/urlgrabber-ssl-byterange-keepalive.patch @@ -0,0 +1,157 @@ +Index: urlgrabber/byterange.py +=================================================================== +RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/byterange.py,v +retrieving revision 1.11 +diff -u -r1.11 byterange.py +--- urlgrabber/byterange.py 22 Oct 2005 21:57:28 -0000 1.11 ++++ urlgrabber/byterange.py 13 Jul 2006 19:48:26 -0000 +@@ -71,6 +71,15 @@ + # HTTP's Range Not Satisfiable error + raise RangeError('Requested Range Not Satisfiable') + ++class HTTPSRangeHandler(HTTPRangeHandler): ++ """ Range Header support for HTTPS. """ ++ ++ def https_error_206(self, req, fp, code, msg, hdrs): ++ return self.http_error_206(req, fp, code, msg, hdrs) ++ ++ def https_error_416(self, req, fp, code, msg, hdrs): ++ self.https_error_416(req, fp, code, msg, hdrs) ++ + class RangeableFileObject: + """File object wrapper to enable raw range handling. + This was implemented primarilary for handling range +Index: urlgrabber/grabber.py +=================================================================== +RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/grabber.py,v +retrieving revision 1.46 +diff -u -r1.46 grabber.py +--- urlgrabber/grabber.py 22 Mar 2006 20:09:33 -0000 1.46 ++++ urlgrabber/grabber.py 13 Jul 2006 19:48:26 -0000 +@@ -402,24 +402,25 @@ + # This is a convenient way to make keepalive optional. + # Just rename the module so it can't be imported. + import keepalive +- from keepalive import HTTPHandler ++ from keepalive import HTTPHandler, HTTPSHandler + except ImportError, msg: +- keepalive_handler = None ++ keepalive_handlers = () + else: +- keepalive_handler = HTTPHandler() ++ keepalive_handlers = (HTTPHandler(), HTTPSHandler()) + + try: + # add in range support conditionally too + import byterange +- from byterange import HTTPRangeHandler, FileRangeHandler, \ +- FTPRangeHandler, range_tuple_normalize, range_tuple_to_header, \ +- RangeError ++ from byterange import HTTPRangeHandler, HTTPSRangeHandler, \ ++ FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \ ++ range_tuple_to_header, RangeError + except ImportError, msg: + range_handlers = () + RangeError = None + have_range = 0 + else: +- range_handlers = (HTTPRangeHandler(), FileRangeHandler(), FTPRangeHandler()) ++ range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(), ++ FileRangeHandler(), FTPRangeHandler()) + have_range = 1 + + +@@ -454,7 +455,7 @@ + + global DEBUG + DEBUG = DBOBJ +- if keepalive_handler and keepalive.DEBUG is None: ++ if keepalive_handlers and keepalive.DEBUG is None: + keepalive.DEBUG = DBOBJ + if have_range and byterange.DEBUG is None: + byterange.DEBUG = DBOBJ +@@ -582,7 +583,8 @@ + + def close_all(): + """close any open keepalive connections""" +- if keepalive_handler: keepalive_handler.close_all() ++ for handler in keepalive_handlers: ++ handler.close_all() + + def urlgrab(url, filename=None, **kwargs): + """grab the file at and make a local copy at +@@ -1012,7 +1014,7 @@ + return self.opts.opener + elif self._opener is None: + handlers = [] +- need_keepalive_handler = (keepalive_handler and self.opts.keepalive) ++ need_keepalive_handler = (keepalive_handlers and self.opts.keepalive) + need_range_handler = (range_handlers and \ + (self.opts.range or self.opts.reget)) + # if you specify a ProxyHandler when creating the opener +@@ -1043,7 +1045,7 @@ + # ------------------------------------------------------- + + if need_keepalive_handler: +- handlers.append( keepalive_handler ) ++ handlers.extend( keepalive_handlers ) + if need_range_handler: + handlers.extend( range_handlers ) + handlers.append( auth_handler ) +Index: urlgrabber/keepalive.py +=================================================================== +RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/keepalive.py,v +retrieving revision 1.14 +diff -u -r1.14 keepalive.py +--- urlgrabber/keepalive.py 4 Apr 2006 21:00:32 -0000 1.14 ++++ urlgrabber/keepalive.py 13 Jul 2006 19:48:26 -0000 +@@ -172,7 +172,7 @@ + else: + return dict(self._hostmap) + +-class HTTPHandler(urllib2.HTTPHandler): ++class KeepAliveHandler: + def __init__(self): + self._cm = ConnectionManager() + +@@ -207,9 +207,6 @@ + self._cm.remove(connection) + + #### Transaction Execution +- def http_open(self, req): +- return self.do_open(HTTPConnection, req) +- + def do_open(self, http_class, req): + host = req.get_host() + if not host: +@@ -324,6 +321,20 @@ + if req.has_data(): + h.send(data) + ++class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler): ++ def __init__(self): ++ KeepAliveHandler.__init__(self) ++ ++ def http_open(self, req): ++ return self.do_open(HTTPConnection, req) ++ ++class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): ++ def __init__(self): ++ KeepAliveHandler.__init__(self) ++ ++ def https_open(self, req): ++ return self.do_open(HTTPSConnection, req) ++ + class HTTPResponse(httplib.HTTPResponse): + # we need to subclass HTTPResponse in order to + # 1) add readline() and readlines() methods +@@ -425,6 +436,9 @@ + class HTTPConnection(httplib.HTTPConnection): + # use the modified response class + response_class = HTTPResponse ++ ++class HTTPSConnection(httplib.HTTPSConnection): ++ response_class = HTTPResponse + + ######################################################################### + ##### TEST FUNCTIONS