diff --git a/.gitignore b/.gitignore index c6fb12b..5f2f61a 100644 --- a/.gitignore +++ b/.gitignore @@ -285,3 +285,4 @@ calibre-0.7.14-nofonts.tar.xz /calibre-3.29.0-nofonts.tar.xz /calibre-3.34.0-nofonts.tar.xz /calibre-3.36.0-nofonts.tar.xz +/calibre-3.46.0-nofonts.tar.xz diff --git a/0001-py3-fix-invalid-escapes.patch b/0001-py3-fix-invalid-escapes.patch new file mode 100644 index 0000000..3871503 --- /dev/null +++ b/0001-py3-fix-invalid-escapes.patch @@ -0,0 +1,102 @@ +From 0156785a14ab3d120dcf7c3650a79c4aba6aa9c2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= +Date: Fri, 19 Jul 2019 16:22:21 +0200 +Subject: [PATCH 01/71] py3: fix invalid escapes + +python3.8 warns about strings which try to escape characters which do +not need that. To avoid the warning, the backslash should be escaped. +(Behaviour is functionally the same, so e.g. '\$' is still the same as +r'\$', except for the warning.) +--- + .../ebooks/unihandecode/unicodepoints.py | 26 +++++++++---------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/src/calibre/ebooks/unihandecode/unicodepoints.py b/src/calibre/ebooks/unihandecode/unicodepoints.py +index fe1495de73..d9130123fb 100644 +--- a/src/calibre/ebooks/unihandecode/unicodepoints.py ++++ b/src/calibre/ebooks/unihandecode/unicodepoints.py +@@ -194,7 +194,7 @@ CODEPOINTS = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Sh', 'D', 'Gh', '&', '+m', + ], + 'x07': [ +- '//', '/', ',', '!', '!', '-', ',', ',', ';', '?', '~', '\{', '\}', '*', '[?]', '', ++ '//', '/', ',', '!', '!', '-', ',', ',', ';', '?', '~', '\\{', '\\}', '*', '[?]', '', + '\'', '', 'b', 'g', 'g', 'd', 'd', 'h', 'w', 'z', 'H', 't', 't', 'y', 'yh', 'k', + 'l', 'm', 'n', 's', 's', '`', 'p', 'p', 'S', 'q', 'r', 'sh', 't', '[?]', '[?]', '[?]', + 'a', 'a', 'a', 'A', 'A', 'A', 'e', 'e', 'e', 'E', 'i', 'i', 'u', 'u', 'u', 'o', +@@ -844,7 +844,7 @@ CODEPOINTS = { + 'x28': [ + ' ', 'a', '1', 'b', '\'', 'k', '2', 'l', '@', 'c', 'i', 'f', '/', 'm', 's', 'p', + '"', 'e', '3', 'h', '9', 'o', '6', 'r', '^', 'd', 'j', 'g', '>', 'n', 't', 'q', +- ',', '*', '5', '<', '-', 'u', '8', 'v', '.', '%', '[', '\$', '+', 'x', '!', '&', ++ ',', '*', '5', '<', '-', 'u', '8', 'v', '.', '%', '[', '\\$', '+', 'x', '!', '&', + ';', ':', '4', '\\', '0', 'z', '7', '(', '_', '?', 'w', ']', '#', 'y', ')', '=', + '[d7]', '[d17]', '[d27]', '[d127]', '[d37]', '[d137]', '[d237]', '[d1237]', '[d47]', '[d147]', '[d247]', '[d1247]', '[d347]', '[d1347]', '[d2347]', '[d12347]', + '[d57]', '[d157]', '[d257]', '[d1257]', '[d357]', '[d1357]', '[d2357]', '[d12357]', '[d457]', '[d1457]', '[d2457]', '[d12457]', '[d3457]', '[d13457]', '[d23457]', '[d123457]', +@@ -916,15 +916,15 @@ CODEPOINTS = { + 'x00': [ + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', +- ' ', '!', '"', '#', '\$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ++ ' ', '!', '"', '#', '\\$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ']', '\\', ']', '^', '_', + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\{', '|', '\}', '~', '\x7f', ++ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\\{', '|', '\\}', '~', '\x7f', + '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +- ' ', '!', 'C/', 'PS', '\$?', 'Y=', '|', 'SS', '"', '(c)', 'a', '<<', '!', '', '(r)', '-', ++ ' ', '!', 'C/', 'PS', '\\$?', 'Y=', '|', 'SS', '"', '(c)', 'a', '<<', '!', '', '(r)', '-', + 'deg', '+-', '2', '3', '\'', 'u', 'P', '*', ',', '1', 'o', '>>', '1/4', '1/2', '3/4', '?', + 'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', + 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'U', 'Th', 'ss', +@@ -935,10 +935,10 @@ CODEPOINTS = { + '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', + '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', + '', '', '', '~', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', +- '..', '--', '-', '_', '_', '(', ') ', '\{', '\} ', '[', '] ', '[(', ')] ', '<<', '>> ', '<', +- '> ', '[', '] ', '\{', '\}', '[?]', '[?]', '[?]', '[?]', '', '', '', '', '', '', '', +- ',', ',', '.', '', ';', ':', '?', '!', '-', '(', ')', '\{', '\}', '\{', '\}', '#', +- '&', '*', '+', '-', '<', '>', '=', '', '\\', '\$', '%', '@', '[?]', '[?]', '[?]', '[?]', ++ '..', '--', '-', '_', '_', '(', ') ', '\\{', '\\} ', '[', '] ', '[(', ')] ', '<<', '>> ', '<', ++ '> ', '[', '] ', '\\{', '\\}', '[?]', '[?]', '[?]', '[?]', '', '', '', '', '', '', '', ++ ',', ',', '.', '', ';', ':', '?', '!', '-', '(', ')', '\\{', '\\}', '\\{', '\\}', '#', ++ '&', '*', '+', '-', '<', '>', '=', '', '\\', '\\$', '%', '@', '[?]', '[?]', '[?]', '[?]', + '', '', '', '[?]', '', '[?]', '', '', '', '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', +@@ -1256,12 +1256,12 @@ CODEPOINTS = { + 'maels', 'maelt', 'maelp', 'maelh', 'maem', 'maeb', 'maebs', 'maes', 'maess', 'maeng', 'maej', 'maec', 'maek', 'maet', 'maep', 'maeh', + ], + 'xff': [ +- '[?]', '!', '"', '#', '\$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ++ '[?]', '!', '"', '#', '\\$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\{', '|', '\}', '~', '[?]', ++ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\\{', '|', '\\}', '~', '[?]', + '[?]', '.', '[', ']', ',', '*', 'wo', 'a', 'i', 'u', 'e', 'o', 'ya', 'yu', 'yo', 'tu', + '+', 'a', 'i', 'u', 'e', 'o', 'ka', 'ki', 'ku', 'ke', 'ko', 'sa', 'si', 'su', 'se', 'so', + 'ta', 'ti', 'tu', 'te', 'to', 'na', 'ni', 'nu', 'ne', 'no', 'ha', 'hi', 'hu', 'he', 'ho', 'ma', +@@ -1271,7 +1271,7 @@ CODEPOINTS = { + '[?]', '[?]', 'a', 'ae', 'ya', 'yae', 'eo', 'e', '[?]', '[?]', 'yeo', 'ye', 'o', 'wa', 'wae', 'oe', + '[?]', '[?]', 'yo', 'u', 'weo', 'we', 'wi', 'yu', '[?]', '[?]', 'eu', 'yi', 'i', '[?]', '[?]', '[?]', + '/C', 'PS', '!', '-', '|', 'Y=', 'W=', '[?]', '|', '-', '|', '-', '|', '#', 'O', '[?]', +- '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '\{', '|', '\}', '', '', '', '', ++ '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '\\{', '|', '\\}', '', '', '', '', + ], + 'xc3': [ + 'ssal', 'ssalg', 'ssalm', 'ssalb', 'ssals', 'ssalt', 'ssalp', 'ssalh', 'ssam', 'ssab', 'ssabs', 'ssas', 'ssass', 'ssang', 'ssaj', 'ssac', +@@ -1652,7 +1652,7 @@ CODEPOINTS = { + 'cwil', 'cwilg', 'cwilm', 'cwilb', 'cwils', 'cwilt', 'cwilp', 'cwilh', 'cwim', 'cwib', 'cwibs', 'cwis', 'cwiss', 'cwing', 'cwij', 'cwic', + ], + 'x30': [ +- ' ', ', ', '. ', '"', '[JIS]', '"', '/', '0', '<', '> ', '<<', '>> ', '[', '] ', '\{', '\} ', ++ ' ', ', ', '. ', '"', '[JIS]', '"', '/', '0', '<', '> ', '<<', '>> ', '[', '] ', '\\{', '\\} ', + '[(', ')] ', '@', 'X ', '[', '] ', '[[', ']] ', '((', ')) ', '[[', ']] ', '~ ', '``', '\'\'', ',,', + '@', '1', '2', '3', '4', '5', '6', '7', '8', '9', '', '', '', '', '', '', + '~', '+', '+', '+', '+', '', '@', ' // ', '+10+', '+20+', '+30+', '[?]', '[?]', '[?]', '', '', diff --git a/0002-py3-another-warning-about-invalid-escape.patch b/0002-py3-another-warning-about-invalid-escape.patch new file mode 100644 index 0000000..027ae9c --- /dev/null +++ b/0002-py3-another-warning-about-invalid-escape.patch @@ -0,0 +1,22 @@ +From 9480360bc8f522f0bb83cf3553b407b9d649e9f2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= +Date: Fri, 19 Jul 2019 16:26:02 +0200 +Subject: [PATCH 02/71] py3: another warning about invalid escape + +--- + src/tinycss/token_data.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/tinycss/token_data.py b/src/tinycss/token_data.py +index 40bff86e77..dfbafc1781 100644 +--- a/src/tinycss/token_data.py ++++ b/src/tinycss/token_data.py +@@ -214,7 +214,7 @@ FIND_NEWLINES = lambda x : list(re.compile(COMPILED_MACROS['nl']).finditer(x)) + + + class Token(object): +- """A single atomic token. ++ r"""A single atomic token. + + .. attribute:: is_container + diff --git a/0003-Update-WSJ.patch b/0003-Update-WSJ.patch new file mode 100644 index 0000000..30950ac --- /dev/null +++ b/0003-Update-WSJ.patch @@ -0,0 +1,285 @@ +From 39c86f23d401f9d7329d94fcbf32b51cbc003b8c Mon Sep 17 00:00:00 2001 +From: Kovid Goyal +Date: Sat, 20 Jul 2019 12:40:26 +0530 +Subject: [PATCH 03/71] Update WSJ + +Fixes #1837213 [Private bug](https://bugs.launchpad.net/calibre/+bug/1837213) +--- + recipes/wsj.recipe | 98 +++++++++++++++++++++++------------------ + recipes/wsj_free.recipe | 98 +++++++++++++++++++++++------------------ + 2 files changed, 110 insertions(+), 86 deletions(-) + +diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe +index da28f081b3..f40f3fedfe 100644 +--- a/recipes/wsj.recipe ++++ b/recipes/wsj.recipe +@@ -5,10 +5,7 @@ + from __future__ import absolute_import, division, print_function, unicode_literals + + import json +-try: +- from urllib.parse import quote +-except ImportError: +- from urllib import quote ++from base64 import standard_b64encode + + from mechanize import Request + +@@ -16,6 +13,16 @@ + from calibre.web.feeds.news import BasicNewsRecipe + from css_selectors import Select + ++try: ++ import urllib.parse as urlparse ++except ImportError: ++ import urlparse ++try: ++ from urllib.parse import quote ++except ImportError: ++ from urllib import quote ++ ++ + needs_subscription = True + + +@@ -40,7 +47,7 @@ class WSJ(BasicNewsRecipe): + ignore_duplicate_articles = {'url'} + remove_attributes = ['style', 'data-scrim'] + needs_subscription = needs_subscription +- WSJ_ITP = 'https://online.wsj.com/itp/today' ++ WSJ_ITP = 'https://www.wsj.com/print-edition/today' + + keep_only_tags = [ + dict(classes('wsj-article-headline-wrap article_header bigTop__hed bigTop__dek bigTop__captioncredit')), +@@ -87,51 +94,56 @@ def get_cover_url(self): + # login {{{ + if needs_subscription: + def get_browser(self, *a, **kw): +- # To understand the signin logic read signin.js from +- # https://id.wsj.com/access/pages/wsj/us/signin.html +- # This is the same login servie as used by Barrons ++ # To understand the login logic read app-min.js from ++ # https://sso.accounts.dowjones.com/login ++ itp = quote(self.WSJ_ITP, safe='') ++ start_url = 'https://accounts.wsj.com/login?target=' + itp + kw['user_agent'] = random_user_agent(allow_ie=False) + br = BasicNewsRecipe.get_browser(self, *a, **kw) +- # self.wsj_itp_page = open('/t/raw.html').read() +- # return br +- url = 'https://id.wsj.com/access/pages/wsj/us/signin.html?mg=com-wsj&mg=id-wsj' +- # br.set_debug_http(True) +- br.open(url).read() +- rurl = 'https://id.wsj.com/auth/submitlogin.json' +- rq = Request(rurl, headers={ +- 'Accept': 'application/json, text/javascript, */*; q=0.01', ++ self.log('Starting login process...') ++ res = br.open(start_url) ++ sso_url = res.geturl() ++ query = urlparse.parse_qs(urlparse.urlparse(sso_url).query) ++ query = {k:v[0] for k, v in query.items()} ++ request_query = { ++ 'username': self.username, ++ 'password': self.password, ++ 'client_id': query['client'], ++ 'sso': 'true', ++ 'tenant': 'sso', ++ '_intstate': 'deprecated', ++ } ++ for k in 'scope connection nonce state ui_locales ns protocol redirect_uri'.split(): ++ request_query[k] = query[k] ++ login_url = 'https://sso.accounts.dowjones.com/usernamepassword/login' ++ # you can get the version below from lib-min.js ++ # search for: str: "x.x.x" ++ # This might need to be updated in the future ++ auth0_client = json.dumps({"name": "auth0.js", "version": "7.0.3"}) ++ if not isinstance(auth0_client, bytes): ++ auth0_client = auth0_client.encode('utf-8') ++ auth0_client = standard_b64encode(auth0_client) ++ if isinstance(auth0_client, bytes): ++ auth0_client = auth0_client.decode('ascii') ++ rq = Request(login_url, headers={ ++ 'Accept': 'text/html', + 'Accept-Language': 'en-US,en;q=0.8', +- 'Content-Type': 'application/json', +- 'Referer': url, ++ 'Auth0-Client': auth0_client.rstrip('='), + 'X-HTTP-Method-Override': 'POST', + 'X-Requested-With': 'XMLHttpRequest', +- }, data=json.dumps({ +- 'username': self.username, +- 'password': self.password, +- 'realm': 'default', +- 'savelogin': 'true', +- 'template': 'default', +- 'url': quote(self.WSJ_ITP), +- })) +- r = br.open(rq) +- if r.code != 200: +- raise ValueError('Failed to login, check username and password') +- data = json.loads(r.read()) +- # print(data) +- if data.get('result') != 'success': +- raise ValueError( +- 'Failed to login (XHR failed), check username and password') +- br.set_cookie('m', data['username'], '.wsj.com') +- try: +- r = br.open(data['url']) +- except Exception: +- self.log.error('Failed to open login url: {}'.format(data['url'])) +- raise +- self.wsj_itp_page = raw = r.read() ++ 'X-Remote-User': self.username ++ }, data=request_query) ++ self.log('Sending login request...') ++ res = br.open(rq) ++ if res.code != 200: ++ raise ValueError('Failed to login, check your username and password') ++ br.select_form(nr=0) ++ self.log('Performing login callback...') ++ res = br.submit() ++ self.wsj_itp_page = raw = res.read() + if b'>Sign Out<' not in raw: + raise ValueError( +- 'Failed to login (auth URL failed), check username and password') +- # open('/t/raw.html', 'w').write(raw) ++ 'Failed to login (callback URL failed), check username and password') + return br + else: + def get_browser(self, *a, **kw): +diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe +index e04e210114..25726c0ca3 100644 +--- a/recipes/wsj_free.recipe ++++ b/recipes/wsj_free.recipe +@@ -5,10 +5,7 @@ + from __future__ import absolute_import, division, print_function, unicode_literals + + import json +-try: +- from urllib.parse import quote +-except ImportError: +- from urllib import quote ++from base64 import standard_b64encode + + from mechanize import Request + +@@ -16,6 +13,16 @@ + from calibre.web.feeds.news import BasicNewsRecipe + from css_selectors import Select + ++try: ++ import urllib.parse as urlparse ++except ImportError: ++ import urlparse ++try: ++ from urllib.parse import quote ++except ImportError: ++ from urllib import quote ++ ++ + needs_subscription = False + + +@@ -40,7 +47,7 @@ class WSJ(BasicNewsRecipe): + ignore_duplicate_articles = {'url'} + remove_attributes = ['style', 'data-scrim'] + needs_subscription = needs_subscription +- WSJ_ITP = 'https://online.wsj.com/itp/today' ++ WSJ_ITP = 'https://www.wsj.com/print-edition/today' + + keep_only_tags = [ + dict(classes('wsj-article-headline-wrap article_header bigTop__hed bigTop__dek bigTop__captioncredit')), +@@ -87,51 +94,56 @@ def get_cover_url(self): + # login {{{ + if needs_subscription: + def get_browser(self, *a, **kw): +- # To understand the signin logic read signin.js from +- # https://id.wsj.com/access/pages/wsj/us/signin.html +- # This is the same login servie as used by Barrons ++ # To understand the login logic read app-min.js from ++ # https://sso.accounts.dowjones.com/login ++ itp = quote(self.WSJ_ITP, safe='') ++ start_url = 'https://accounts.wsj.com/login?target=' + itp + kw['user_agent'] = random_user_agent(allow_ie=False) + br = BasicNewsRecipe.get_browser(self, *a, **kw) +- # self.wsj_itp_page = open('/t/raw.html').read() +- # return br +- url = 'https://id.wsj.com/access/pages/wsj/us/signin.html?mg=com-wsj&mg=id-wsj' +- # br.set_debug_http(True) +- br.open(url).read() +- rurl = 'https://id.wsj.com/auth/submitlogin.json' +- rq = Request(rurl, headers={ +- 'Accept': 'application/json, text/javascript, */*; q=0.01', ++ self.log('Starting login process...') ++ res = br.open(start_url) ++ sso_url = res.geturl() ++ query = urlparse.parse_qs(urlparse.urlparse(sso_url).query) ++ query = {k:v[0] for k, v in query.items()} ++ request_query = { ++ 'username': self.username, ++ 'password': self.password, ++ 'client_id': query['client'], ++ 'sso': 'true', ++ 'tenant': 'sso', ++ '_intstate': 'deprecated', ++ } ++ for k in 'scope connection nonce state ui_locales ns protocol redirect_uri'.split(): ++ request_query[k] = query[k] ++ login_url = 'https://sso.accounts.dowjones.com/usernamepassword/login' ++ # you can get the version below from lib-min.js ++ # search for: str: "x.x.x" ++ # This might need to be updated in the future ++ auth0_client = json.dumps({"name": "auth0.js", "version": "7.0.3"}) ++ if not isinstance(auth0_client, bytes): ++ auth0_client = auth0_client.encode('utf-8') ++ auth0_client = standard_b64encode(auth0_client) ++ if isinstance(auth0_client, bytes): ++ auth0_client = auth0_client.decode('ascii') ++ rq = Request(login_url, headers={ ++ 'Accept': 'text/html', + 'Accept-Language': 'en-US,en;q=0.8', +- 'Content-Type': 'application/json', +- 'Referer': url, ++ 'Auth0-Client': auth0_client.rstrip('='), + 'X-HTTP-Method-Override': 'POST', + 'X-Requested-With': 'XMLHttpRequest', +- }, data=json.dumps({ +- 'username': self.username, +- 'password': self.password, +- 'realm': 'default', +- 'savelogin': 'true', +- 'template': 'default', +- 'url': quote(self.WSJ_ITP), +- })) +- r = br.open(rq) +- if r.code != 200: +- raise ValueError('Failed to login, check username and password') +- data = json.loads(r.read()) +- # print(data) +- if data.get('result') != 'success': +- raise ValueError( +- 'Failed to login (XHR failed), check username and password') +- br.set_cookie('m', data['username'], '.wsj.com') +- try: +- r = br.open(data['url']) +- except Exception: +- self.log.error('Failed to open login url: {}'.format(data['url'])) +- raise +- self.wsj_itp_page = raw = r.read() ++ 'X-Remote-User': self.username ++ }, data=request_query) ++ self.log('Sending login request...') ++ res = br.open(rq) ++ if res.code != 200: ++ raise ValueError('Failed to login, check your username and password') ++ br.select_form(nr=0) ++ self.log('Performing login callback...') ++ res = br.submit() ++ self.wsj_itp_page = raw = res.read() + if b'>Sign Out<' not in raw: + raise ValueError( +- 'Failed to login (auth URL failed), check username and password') +- # open('/t/raw.html', 'w').write(raw) ++ 'Failed to login (callback URL failed), check username and password') + return br + else: + def get_browser(self, *a, **kw): diff --git a/0004-Preferences-Ignored-devices-Add-a-button-to-reset-th.patch b/0004-Preferences-Ignored-devices-Add-a-button-to-reset-th.patch new file mode 100644 index 0000000..3e78d67 --- /dev/null +++ b/0004-Preferences-Ignored-devices-Add-a-button-to-reset-th.patch @@ -0,0 +1,85 @@ +From 6bad6948033823b82ea37fb889188e4904a6508c Mon Sep 17 00:00:00 2001 +From: Kovid Goyal +Date: Sat, 20 Jul 2019 14:59:54 +0530 +Subject: [PATCH 04/71] Preferences->Ignored devices: Add a button to reset the + list of devices that calibre is allowed to manage + +--- + .../gui2/preferences/ignored_devices.py | 31 ++++++++++++++----- + 1 file changed, 24 insertions(+), 7 deletions(-) + +diff --git a/src/calibre/gui2/preferences/ignored_devices.py b/src/calibre/gui2/preferences/ignored_devices.py +index 8cfb943cf3..cef9ac0dc2 100644 +--- a/src/calibre/gui2/preferences/ignored_devices.py ++++ b/src/calibre/gui2/preferences/ignored_devices.py +@@ -1,15 +1,16 @@ + #!/usr/bin/env python2 + # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai ++# License: GPLv3 Copyright: 2012, Kovid Goyal + from __future__ import absolute_import, division, print_function, unicode_literals + +-__license__ = 'GPL v3' +-__copyright__ = '2012, Kovid Goyal ' +-__docformat__ = 'restructuredtext en' ++import textwrap + +-from PyQt5.Qt import (QLabel, QVBoxLayout, QListWidget, QListWidgetItem, Qt, +- QIcon) ++from PyQt5.Qt import ( ++ QIcon, QLabel, QListWidget, QListWidgetItem, QPushButton, Qt, QVBoxLayout ++) + + from calibre.customize.ui import enable_plugin ++from calibre.gui2 import gprefs + from calibre.gui2.preferences import ConfigWidgetBase, test_widget + from polyglot.builtins import iteritems, range + +@@ -22,6 +23,7 @@ class ConfigWidget(ConfigWidgetBase): + self.gui = gui + self.l = l = QVBoxLayout() + self.setLayout(l) ++ self.confirms_reset = False + + self.la = la = QLabel(_( + 'The list of devices that you have asked calibre to ignore. ' +@@ -46,11 +48,24 @@ class ConfigWidget(ConfigWidgetBase): + f.itemChanged.connect(self.changed_signal) + f.itemDoubleClicked.connect(self.toggle_item) + ++ self.reset_confirmations_button = b = QPushButton(_('Reset allowed devices')) ++ b.setToolTip(textwrap.fill(_( ++ 'This will erase the list of devices that calibre knows about' ++ ' causing it to ask you for permission to manage them again,' ++ ' the next time they connect'))) ++ b.clicked.connect(self.reset_confirmations) ++ l.addWidget(b) ++ ++ def reset_confirmations(self): ++ self.confirms_reset = True ++ self.changed_signal.emit() ++ + def toggle_item(self, item): + item.setCheckState(Qt.Checked if item.checkState() == Qt.Unchecked else + Qt.Unchecked) + + def initialize(self): ++ self.confirms_reset = False + self.devices.blockSignals(True) + self.devices.clear() + for dev in self.gui.device_manager.devices: +@@ -94,11 +109,13 @@ class ConfigWidget(ConfigWidgetBase): + dev = e.data(Qt.UserRole) + if e.checkState() == Qt.Unchecked: + enable_plugin(dev) ++ if self.confirms_reset: ++ gprefs['ask_to_manage_device'] = [] + + return True # Restart required + + + if __name__ == '__main__': +- from PyQt5.Qt import QApplication +- app = QApplication([]) ++ from calibre.gui2 import Application ++ app = Application([]) + test_widget('Sharing', 'Ignored Devices') diff --git a/0005-Open-With-don-t-raise-KeyError-if-cache-exists-and-t.patch b/0005-Open-With-don-t-raise-KeyError-if-cache-exists-and-t.patch new file mode 100644 index 0000000..6590b63 --- /dev/null +++ b/0005-Open-With-don-t-raise-KeyError-if-cache-exists-and-t.patch @@ -0,0 +1,35 @@ +From 998c659c9e4afa3766c0c3056e9d359690e061a8 Mon Sep 17 00:00:00 2001 +From: Eli Schwartz +Date: Tue, 23 Jul 2019 01:11:19 -0400 +Subject: [PATCH 05/71] Open With: don't raise KeyError if cache exists and + there are new dirs + +If the cache failed to load, it is initialized as a defaultdict and all +mtimes compare as 0. If the cache did load, however, then an ordinary +dict was used, and if new icon directories appeared on the system since +the cache creation, they would raise a KeyError and Open With would not +load data. + +Fix by using a defaultdict in all cases, but initializing with the +contents of the cache if possible. + +Discovered when crazy applications added crazy subdirectories in +/usr/share/pixmaps (???) and suddenly calibre failed to do the right +thing, but the same should apply if the system adds a new icon theme. +--- + src/calibre/utils/open_with/linux.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/calibre/utils/open_with/linux.py b/src/calibre/utils/open_with/linux.py +index f4b3269c06..f397c68659 100644 +--- a/src/calibre/utils/open_with/linux.py ++++ b/src/calibre/utils/open_with/linux.py +@@ -116,7 +116,7 @@ def find_icons(): + with open(cache_file, 'rb') as f: + cache = f.read() + cache = msgpack_loads(cache) +- mtimes, cache = cache['mtimes'], cache['data'] ++ mtimes, cache = defaultdict(int, cache['mtimes']), defaultdict(dict, cache['data']) + except Exception: + mtimes, cache = defaultdict(int), defaultdict(dict) + diff --git a/0006-LIT-Output-Fix-regression-in-3.41-caused-by-py3-port.patch b/0006-LIT-Output-Fix-regression-in-3.41-caused-by-py3-port.patch new file mode 100644 index 0000000..0c22cb4 --- /dev/null +++ b/0006-LIT-Output-Fix-regression-in-3.41-caused-by-py3-port.patch @@ -0,0 +1,25 @@ +From e51136af1d1df5dd8e492ef71c87724708f538ad Mon Sep 17 00:00:00 2001 +From: Kovid Goyal +Date: Tue, 23 Jul 2019 20:08:12 +0530 +Subject: [PATCH 06/71] LIT Output: Fix regression in 3.41 caused by py3 + porting that broke conversion of some files to LIT. Fixes #1837561 [Lit + conversion error (Conversion options changed from + default)](https://bugs.launchpad.net/calibre/+bug/1837561) + +--- + src/calibre/ebooks/lit/writer.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py +index f1191b781e..534570fd19 100644 +--- a/src/calibre/ebooks/lit/writer.py ++++ b/src/calibre/ebooks/lit/writer.py +@@ -721,6 +721,8 @@ class LitWriter(object): + dchunks.append(dchunk.getvalue()) + dcounts.append(dcount) + if ichunk: ++ if not isinstance(name, bytes): ++ name = name.encode('utf-8') + ichunk.write(decint(len(name))) + ichunk.write(name) + ichunk.write(decint(cid)) diff --git a/0007-use-raw-strings-where-possible-to-avoid-escaping-iss.patch b/0007-use-raw-strings-where-possible-to-avoid-escaping-iss.patch new file mode 100644 index 0000000..044bbad --- /dev/null +++ b/0007-use-raw-strings-where-possible-to-avoid-escaping-iss.patch @@ -0,0 +1,193 @@ +From 504d7c417593f7402198886f68cd6b4363844035 Mon Sep 17 00:00:00 2001 +From: Eli Schwartz +Date: Thu, 18 Jul 2019 12:26:48 -0400 +Subject: [PATCH 07/71] use raw strings where possible to avoid escaping issues + +--- + src/calibre/ebooks/oeb/transforms/split.py | 2 +- + src/calibre/ebooks/pml/pmlml.py | 24 +++++++++++----------- + src/calibre/ebooks/readability/cleaners.py | 2 +- + src/calibre/ebooks/rtf/rtfml.py | 12 +++++------ + 4 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py +index d0e1a334ec..a42bae67a5 100644 +--- a/src/calibre/ebooks/oeb/transforms/split.py ++++ b/src/calibre/ebooks/oeb/transforms/split.py +@@ -294,7 +294,7 @@ class FlowSplitter(object): + body = self.get_body(root) + if body is None: + return False +- txt = re.sub(u'\\s+|\\xa0', '', ++ txt = re.sub(ur'\s+|\xa0', '', + etree.tostring(body, method='text', encoding='unicode')) + if len(txt) > 1: + return False +diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py +index 772117321c..e99ec66226 100644 +--- a/src/calibre/ebooks/pml/pmlml.py ++++ b/src/calibre/ebooks/pml/pmlml.py +@@ -155,7 +155,7 @@ class PMLMLizer(object): + + def get_anchor(self, page, aid): + aid = self.get_anchor_id(page.href, aid) +- return u'\\Q="%s"' % aid ++ return ur'\Q="%s"' % aid + + def remove_newlines(self, text): + text = text.replace('\r\n', ' ') +@@ -186,10 +186,10 @@ class PMLMLizer(object): + anchors = set(re.findall(r'(?<=\\Q=").+?(?=")', text)) + links = set(re.findall(r'(?<=\\q="#).+?(?=")', text)) + for unused in anchors.difference(links): +- text = text.replace('\\Q="%s"' % unused, '') ++ text = text.replace(r'\Q="%s"' % unused, '') + + # Remove \Cn tags that are within \x and \Xn tags +- text = re.sub(unicode_type(r'(?msu)(?P\\(x|X[0-4]))(?P.*?)(?P\\C[0-4]\s*=\s*"[^"]*")(?P.*?)(?P=t)'), '\\g\\g\\g\\g', text) ++ text = re.sub(unicode_type(r'(?msu)(?P\\(x|X[0-4]))(?P.*?)(?P\\C[0-4]\s*=\s*"[^"]*")(?P.*?)(?P=t)'), r'\g\g\g\g', text) + + # Replace bad characters. + text = text.replace(u'\xc2', '') +@@ -259,7 +259,7 @@ class PMLMLizer(object): + '%s.png' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00') + text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])]) + elif tag == 'hr': +- w = '\\w' ++ w = r'\w' + width = elem.get('width') + if width: + if not width.endswith('%'): +@@ -286,17 +286,17 @@ class PMLMLizer(object): + toc_title, toc_depth = self.toc[toc_page].get(toc_x, (None, 0)) + if toc_title: + toc_depth = max(min(toc_depth, 4), 0) +- text.append('\\C%s="%s"' % (toc_depth, toc_title)) ++ text.append(r'\C%s="%s"' % (toc_depth, toc_title)) + + # Process style information that needs holds a single tag. + # Commented out because every page in an OEB book starts with this style. + if style['page-break-before'] == 'always': +- text.append('\\p') ++ text.append(r'\p') + + # Process basic PML tags. + pml_tag = TAG_MAP.get(tag, None) + if pml_tag and pml_tag not in tag_stack+tags: +- text.append('\\%s' % pml_tag) ++ text.append(r'\%s' % pml_tag) + tags.append(pml_tag) + + # Special processing of tags that require an argument. +@@ -311,7 +311,7 @@ class PMLMLizer(object): + if href not in self.link_hrefs.keys(): + self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys()) + href = '#%s' % self.link_hrefs[href] +- text.append('\\q="%s"' % href) ++ text.append(r'\q="%s"' % href) + tags.append('q') + + # Anchor ids +@@ -325,14 +325,14 @@ class PMLMLizer(object): + for s in STYLES: + style_tag = s[1].get(style[s[0]], None) + if style_tag and style_tag not in tag_stack+tags: +- text.append('\\%s' % style_tag) ++ text.append('r\%s' % style_tag) + tags.append(style_tag) + + # margin left + try: + mms = int(float(style['margin-left']) * 100 / style.height) + if mms: +- text.append('\\T="%s%%"' % mms) ++ text.append(r'\T="%s%%"' % mms) + except: + pass + +@@ -360,7 +360,7 @@ class PMLMLizer(object): + # text.append('\n\n') + + if style['page-break-after'] == 'always': +- text.append('\\p') ++ text.append(r'\p') + + # Process text after this tag but not within another. + if hasattr(elem, 'tail') and elem.tail: +@@ -382,5 +382,5 @@ class PMLMLizer(object): + if tag in ('c', 'r'): + text.append('\n\\%s' % tag) + else: +- text.append('\\%s' % tag) ++ text.append(r'\%s' % tag) + return text +diff --git a/src/calibre/ebooks/readability/cleaners.py b/src/calibre/ebooks/readability/cleaners.py +index 057fcf17b3..d30216c4d8 100644 +--- a/src/calibre/ebooks/readability/cleaners.py ++++ b/src/calibre/ebooks/readability/cleaners.py +@@ -17,7 +17,7 @@ htmlstrip = re.compile("<" # open + + def clean_attributes(html): + while htmlstrip.search(html): +- html = htmlstrip.sub('<\\1\\2>', html) ++ html = htmlstrip.sub(r'<\1\2>', html) + return html + + +diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py +index d4b339c53c..7f6cc91c50 100644 +--- a/src/calibre/ebooks/rtf/rtfml.py ++++ b/src/calibre/ebooks/rtf/rtfml.py +@@ -83,7 +83,7 @@ def txt2rtf(text): + for x in text: + val = ord(x) + if val == 160: +- buf.write(u'\\~') ++ buf.write(ur'\~') + elif val <= 127: + buf.write(unicode_type(x)) + else: +@@ -115,7 +115,7 @@ class RTFMLizer(object): + self.opts, self.opts.output_profile) + self.currently_dumping_item = item + output += self.dump_text(item.data.find(XHTML('body')), stylizer) +- output += '{\\page }' ++ output += r'{\page }' + for item in self.oeb_book.spine: + self.log.debug('Converting %s to RTF markup...' % item.href) + # Removing comments is needed as comments with -- inside them can +@@ -127,7 +127,7 @@ class RTFMLizer(object): + stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) + self.currently_dumping_item = item + output += self.dump_text(content.find(XHTML('body')), stylizer) +- output += '{\\page }' ++ output += r'{\page }' + output += self.footer() + output = self.insert_images(output) + output = self.clean_text(output) +@@ -259,7 +259,7 @@ class RTFMLizer(object): + block_start = '' + block_end = '' + if 'block' not in tag_stack: +- block_start = '{\\par\\pard\\hyphpar ' ++ block_start = r'{\par\pard\hyphpar ' + block_end = '}' + text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end) + +@@ -292,7 +292,7 @@ class RTFMLizer(object): + end_tag = tag_stack.pop() + if end_tag != 'block': + if tag in BLOCK_TAGS: +- text += u'\\par\\pard\\plain\\hyphpar}' ++ text += ur'\par\pard\plain\hyphpar}' + else: + text += u'}' + +@@ -300,6 +300,6 @@ class RTFMLizer(object): + if 'block' in tag_stack: + text += '%s' % txt2rtf(elem.tail) + else: +- text += '{\\par\\pard\\hyphpar %s}' % txt2rtf(elem.tail) ++ text += r'{\par\pard\hyphpar %s}' % txt2rtf(elem.tail) + + return text diff --git a/0008-fix-imports-from-the-wrong-module.patch b/0008-fix-imports-from-the-wrong-module.patch new file mode 100644 index 0000000..cce0f3f --- /dev/null +++ b/0008-fix-imports-from-the-wrong-module.patch @@ -0,0 +1,36 @@ +From 018f73f89cac3db1bd102a3eac3a4a916640ad41 Mon Sep 17 00:00:00 2001 +From: Eli Schwartz +Date: Thu, 18 Jul 2019 12:28:02 -0400 +Subject: [PATCH 08/71] fix imports from the wrong module + +when module_a.submodule_a imports module_b, then module_c should not use +"from module_a.submodule_a import module_b" +--- + src/calibre/ebooks/oeb/transforms/split.py | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py +index a42bae67a5..93fd1226c5 100644 +--- a/src/calibre/ebooks/oeb/transforms/split.py ++++ b/src/calibre/ebooks/oeb/transforms/split.py +@@ -18,9 +18,10 @@ from lxml import etree + from calibre import as_unicode, force_unicode + from calibre.ebooks.epub import rules + from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, +- urldefrag, rewrite_links, urlunquote, XHTML, urlnormalize) ++ urldefrag, rewrite_links, XHTML, urlnormalize) + from calibre.ebooks.oeb.polish.split import do_split + from polyglot.builtins import iteritems, range, map ++from polyglot.urllib import unquote + from css_selectors import Select, SelectorError + + XPath = functools.partial(_XPath, namespaces=NAMESPACES) +@@ -179,7 +180,7 @@ class Split(object): + nhref = anchor_map[frag if frag else None] + nhref = self.current_item.relhref(nhref) + if frag: +- nhref = '#'.join((urlunquote(nhref), frag)) ++ nhref = '#'.join((unquote(nhref), frag)) + + return nhref + return url diff --git a/0009-unicode_check-do-not-try-to-check-pyuic-generated-fi.patch b/0009-unicode_check-do-not-try-to-check-pyuic-generated-fi.patch new file mode 100644 index 0000000..8d323c1 --- /dev/null +++ b/0009-unicode_check-do-not-try-to-check-pyuic-generated-fi.patch @@ -0,0 +1,24 @@ +From f39eff0117e1b789caad121e79b193b38fed8cf7 Mon Sep 17 00:00:00 2001 +From: Eli Schwartz +Date: Mon, 22 Jul 2019 11:20:50 -0400 +Subject: [PATCH 09/71] unicode_check: do not try to check pyuic-generated + files + +--- + setup/port.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/setup/port.py b/setup/port.py +index ecd09e2e9e..401764fcb7 100644 +--- a/setup/port.py ++++ b/setup/port.py +@@ -72,7 +72,8 @@ class Base(Command): + def get_files(self): + from calibre import walk + for path in walk(os.path.join(self.SRC, 'calibre')): +- if path.endswith('.py') and not os.path.basename(path) in self.EXCLUDED_BASENAMES: ++ if (path.endswith('.py') and not path.endswith('_ui.py') and not ++ os.path.basename(path) in self.EXCLUDED_BASENAMES): + yield path + + def file_hash(self, f): diff --git a/0010-py3-more-work-towards-universal-__future__s.patch b/0010-py3-more-work-towards-universal-__future__s.patch new file mode 100644 index 0000000..c058cc2 --- /dev/null +++ b/0010-py3-more-work-towards-universal-__future__s.patch @@ -0,0 +1,2388 @@ +From 2c909f199e721e27c7639acc575a917215a7c3da Mon Sep 17 00:00:00 2001 +From: Eli Schwartz +Date: Thu, 18 Jul 2019 12:29:39 -0400 +Subject: [PATCH 10/71] py3: more work towards universal __future__s + +--- + src/calibre/ebooks/oeb/transforms/flatcss.py | 42 +-- + src/calibre/ebooks/oeb/transforms/jacket.py | 7 +- + src/calibre/ebooks/oeb/transforms/metadata.py | 2 +- + src/calibre/ebooks/oeb/transforms/split.py | 12 +- + .../ebooks/oeb/transforms/structure.py | 12 +- + .../ebooks/oeb/transforms/trimmanifest.py | 2 +- + src/calibre/ebooks/pml/__init__.py | 1 + + src/calibre/ebooks/pml/pmlconverter.py | 37 +-- + src/calibre/ebooks/pml/pmlml.py | 13 +- + src/calibre/ebooks/rb/__init__.py | 5 +- + src/calibre/ebooks/rb/writer.py | 1 + + src/calibre/ebooks/readability/cleaners.py | 2 + + src/calibre/ebooks/readability/debug.py | 2 + + src/calibre/ebooks/readability/htmls.py | 18 +- + src/calibre/ebooks/readability/readability.py | 18 +- + src/calibre/ebooks/rtf/input.py | 5 +- + src/calibre/ebooks/rtf/preprocess.py | 3 +- + src/calibre/ebooks/rtf/rtfml.py | 22 +- + src/calibre/ebooks/rtf2xml/ParseRtf.py | 6 +- + src/calibre/ebooks/rtf2xml/check_encoding.py | 6 +- + src/calibre/ebooks/rtf2xml/footnote.py | 8 +- + src/calibre/ebooks/rtf2xml/hex_2_utf8.py | 6 +- + src/calibre/ebooks/rtf2xml/list_table.py | 15 +- + src/calibre/ebooks/rtf2xml/make_lists.py | 5 +- + src/calibre/ebooks/rtf2xml/old_rtf.py | 7 +- + src/calibre/ebooks/rtf2xml/paragraph_def.py | 7 +- + src/calibre/ebooks/rtf2xml/pict.py | 6 +- + src/calibre/ebooks/rtf2xml/process_tokens.py | 8 +- + src/calibre/ebooks/rtf2xml/sections.py | 16 +- + src/calibre/ebooks/rtf2xml/table.py | 11 +- + src/calibre/ebooks/snb/__init__.py | 3 +- + src/calibre/ebooks/snb/snbml.py | 41 +-- + src/calibre/ebooks/unihandecode/__init__.py | 7 +- + .../ebooks/unihandecode/jacodepoints.py | 3 +- + src/calibre/ebooks/unihandecode/jadecoder.py | 3 +- + .../ebooks/unihandecode/krcodepoints.py | 3 +- + src/calibre/ebooks/unihandecode/krdecoder.py | 2 +- + .../ebooks/unihandecode/pykakasi/__init__.py | 3 +- + .../ebooks/unihandecode/pykakasi/h2a.py | 239 +++++++++--------- + .../ebooks/unihandecode/pykakasi/j2h.py | 3 +- + .../ebooks/unihandecode/pykakasi/jisyo.py | 2 + + .../ebooks/unihandecode/pykakasi/k2a.py | 1 + + .../ebooks/unihandecode/pykakasi/kakasi.py | 2 +- + .../ebooks/unihandecode/unicodepoints.py | 1 + + src/calibre/ebooks/unihandecode/unidecoder.py | 1 + + .../ebooks/unihandecode/vncodepoints.py | 3 +- + src/calibre/ebooks/unihandecode/vndecoder.py | 2 +- + .../ebooks/unihandecode/zhcodepoints.py | 3 +- + src/calibre/gui2/book_details.py | 27 +- + src/calibre/gui2/cover_flow.py | 9 +- + src/calibre/gui2/custom_column_widgets.py | 3 +- + src/calibre/gui2/email.py | 2 +- + src/calibre/gui2/init.py | 5 +- + src/calibre/gui2/jobs.py | 14 +- + src/calibre/gui2/layout.py | 3 +- + src/calibre/gui2/linux_file_dialogs.py | 6 +- + src/calibre/gui2/shortcuts.py | 4 +- + src/calibre/gui2/ui.py | 6 +- + src/calibre/gui2/update.py | 16 +- + 59 files changed, 393 insertions(+), 329 deletions(-) + +diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py +index 4b777dbc04..c02dfe9e34 100644 +--- a/src/calibre/ebooks/oeb/transforms/flatcss.py ++++ b/src/calibre/ebooks/oeb/transforms/flatcss.py +@@ -1,7 +1,7 @@ + ''' + CSS flattening transform. + ''' +-from __future__ import with_statement ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL v3' + __copyright__ = '2008, Marshall T. Vandegrift ' +@@ -122,9 +122,9 @@ class EmbedFontsCSSRules(object): + if not self.body_font_family: + return None + if not self.href: +- iid, href = oeb.manifest.generate(u'page_styles', u'page_styles.css') ++ iid, href = oeb.manifest.generate('page_styles', 'page_styles.css') + rules = [css_text(x) for x in self.rules] +- rules = u'\n\n'.join(rules) ++ rules = '\n\n'.join(rules) + sheet = css_parser.parseString(rules, validate=False) + self.href = oeb.manifest.add(iid, href, guess_type(href)[0], + data=sheet).href +@@ -228,13 +228,13 @@ class CSSFlattener(object): + try: + faces = font_scanner.fonts_for_family(family) + except NoFonts: +- msg = (u'No embeddable fonts found for family: %r'%family) ++ msg = ('No embeddable fonts found for family: %r'%family) + if failure_critical: + raise ValueError(msg) + self.oeb.log.warn(msg) + return body_font_family, efi + if not faces: +- msg = (u'No embeddable fonts found for family: %r'%family) ++ msg = ('No embeddable fonts found for family: %r'%family) + if failure_critical: + raise ValueError(msg) + self.oeb.log.warn(msg) +@@ -243,26 +243,26 @@ class CSSFlattener(object): + for i, font in enumerate(faces): + ext = 'otf' if font['is_otf'] else 'ttf' + fid, href = self.oeb.manifest.generate(id=u'font', +- href=u'fonts/%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext)) ++ href='fonts/%s.%s'%(ascii_filename(font['full_name']).replace(' ', '-'), ext)) + item = self.oeb.manifest.add(fid, href, + guess_type('dummy.'+ext)[0], + data=font_scanner.get_font_data(font)) + item.unload_data_from_memory() + + cfont = { +- u'font-family':u'"%s"'%font['font-family'], +- u'panose-1': u' '.join(map(unicode_type, font['panose'])), +- u'src': u'url(%s)'%item.href, ++ 'font-family': '"%s"'%font['font-family'], ++ 'panose-1': ' '.join(map(unicode_type, font['panose'])), ++ 'src': 'url(%s)'%item.href, + } + + if i == 0: + generic_family = panose_to_css_generic_family(font['panose']) +- body_font_family = u"'%s',%s"%(font['font-family'], generic_family) +- self.oeb.log(u'Embedding font: %s'%font['font-family']) +- for k in (u'font-weight', u'font-style', u'font-stretch'): +- if font[k] != u'normal': ++ body_font_family = "'%s',%s"%(font['font-family'], generic_family) ++ self.oeb.log('Embedding font: %s'%font['font-family']) ++ for k in ('font-weight', 'font-style', 'font-stretch'): ++ if font[k] != 'normal': + cfont[k] = font[k] +- rule = '@font-face { %s }'%('; '.join(u'%s:%s'%(k, v) for k, v in ++ rule = '@font-face { %s }'%('; '.join('%s:%s'%(k, v) for k, v in + iteritems(cfont))) + rule = css_parser.parseString(rule) + efi.append(rule) +@@ -295,7 +295,7 @@ class CSSFlattener(object): + if self.context.change_justification != 'original': + bs.append('text-align: '+ self.context.change_justification) + if self.body_font_family: +- bs.append(u'font-family: '+self.body_font_family) ++ bs.append('font-family: '+self.body_font_family) + body.set('style', '; '.join(bs)) + stylizer = Stylizer(html, item.href, self.oeb, self.context, profile, + user_css=self.context.extra_css, +@@ -458,7 +458,7 @@ class CSSFlattener(object): + dyn_rescale = node.attrib.pop('data-calibre-rescale', None) + if dyn_rescale is not None: + try: +- dyn_rescale = float(dyn_rescale) / 100.0 ++ dyn_rescale = float(dyn_rescale) / 100 + except Exception: + dyn_rescale = 1 + fsize = self.fmap[_sbase] +@@ -476,7 +476,7 @@ class CSSFlattener(object): + try: + minlh = self.context.minimum_line_height / 100. + if not is_drop_cap and style['line-height'] < minlh * fsize: +- cssdict['line-height'] = str(minlh) ++ cssdict['line-height'] = unicode_type(minlh) + except: + self.oeb.logger.exception('Failed to set minimum line-height') + +@@ -528,7 +528,7 @@ class CSSFlattener(object): + + if cssdict: + items = sorted(iteritems(cssdict)) +- css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) ++ css = ';\n'.join(u'%s: %s' % (key, val) for key, val in items) + classes = node.get('class', '').strip() or 'calibre' + classes_list = classes.split() + # lower() because otherwise if the document uses the same class +@@ -538,7 +538,7 @@ class CSSFlattener(object): + if css in styles: + match = styles[css] + else: +- match = klass + str(names[klass] or '') ++ match = klass + unicode_type(names[klass] or '') + styles[css] = match + names[klass] += 1 + node.attrib['class'] = match +@@ -546,7 +546,7 @@ class CSSFlattener(object): + + for psel, cssdict in iteritems(pseudo_classes): + items = sorted(iteritems(cssdict)) +- css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) ++ css = ';\n'.join('%s: %s' % (key, val) for key, val in items) + pstyles = pseudo_styles[psel] + if css in pstyles: + match = pstyles[css] +@@ -558,7 +558,7 @@ class CSSFlattener(object): + # then the class attribute for a.x tags will contain both + # that class and the class for a.x:hover, which is wrong. + klass = 'pcalibre' +- match = klass + str(names[klass] or '') ++ match = klass + unicode_type(names[klass] or '') + pstyles[css] = match + names[klass] += 1 + keep_classes.add(match) +diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py +index 68ae89b8ba..35ead6a29a 100644 +--- a/src/calibre/ebooks/oeb/transforms/jacket.py ++++ b/src/calibre/ebooks/oeb/transforms/jacket.py +@@ -1,7 +1,6 @@ + #!/usr/bin/env python2 + # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +-from __future__ import with_statement +-from __future__ import print_function ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL v3' + __copyright__ = '2009, Kovid Goyal ' +@@ -188,7 +187,7 @@ class Series(unicode_type): + combined = roman = escape(series or u'') + s = unicode_type.__new__(self, combined) + s.roman = roman +- s.name = escape(series or u'') ++ s.name = escape(series or '') + s.number = escape(fmt_sidx(series_index or 1.0, use_roman=False)) + s.roman_number = escape(fmt_sidx(series_index or 1.0, use_roman=True)) + return s +@@ -260,7 +259,7 @@ def render_jacket(mi, output_profile, + pubdate = '' + else: + dt = as_local_time(mi.pubdate) +- pubdate = strftime(u'%Y', dt.timetuple()) ++ pubdate = strftime('%Y', dt.timetuple()) + except: + pubdate = '' + +diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py +index 7f5342d6f4..ed829dc92d 100644 +--- a/src/calibre/ebooks/oeb/transforms/metadata.py ++++ b/src/calibre/ebooks/oeb/transforms/metadata.py +@@ -1,6 +1,6 @@ + #!/usr/bin/env python2 + # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +-from __future__ import with_statement ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL v3' + __copyright__ = '2009, Kovid Goyal ' +diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py +index 93fd1226c5..a54121e48c 100644 +--- a/src/calibre/ebooks/oeb/transforms/split.py ++++ b/src/calibre/ebooks/oeb/transforms/split.py +@@ -1,4 +1,4 @@ +-from __future__ import with_statement ++from __future__ import absolute_import, division, print_function, unicode_literals + __license__ = 'GPL v3' + __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' + __docformat__ = 'restructuredtext en' +@@ -20,7 +20,7 @@ from calibre.ebooks.epub import rules + from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, + urldefrag, rewrite_links, XHTML, urlnormalize) + from calibre.ebooks.oeb.polish.split import do_split +-from polyglot.builtins import iteritems, range, map ++from polyglot.builtins import iteritems, range, map, unicode_type + from polyglot.urllib import unquote + from css_selectors import Select, SelectorError + +@@ -123,7 +123,7 @@ class Split(object): + + for i, elem in enumerate(item.data.iter('*')): + try: +- elem.set('pb_order', str(i)) ++ elem.set('pb_order', unicode_type(i)) + except TypeError: # Cant set attributes on comment nodes etc. + continue + +@@ -202,7 +202,7 @@ class FlowSplitter(object): + self.csp_counter = 0 + + base, ext = os.path.splitext(self.base) +- self.base = base.replace('%', '%%')+u'_split_%.3d'+ext ++ self.base = base.replace('%', '%%')+'_split_%.3d'+ext + + self.trees = [self.item.data.getroottree()] + self.splitting_on_page_breaks = True +@@ -295,7 +295,7 @@ class FlowSplitter(object): + body = self.get_body(root) + if body is None: + return False +- txt = re.sub(ur'\s+|\xa0', '', ++ txt = re.sub(r'\s+|\xa0', '', + etree.tostring(body, method='text', encoding='unicode')) + if len(txt) > 1: + return False +@@ -338,7 +338,7 @@ class FlowSplitter(object): + for frag in frags: + pre2 = copy.copy(pre) + pre2.text = frag +- pre2.tail = u'' ++ pre2.tail = '' + new_pres.append(pre2) + new_pres[-1].tail = pre.tail + p = pre.getparent() +diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py +index 31c95c8aa1..cd5211c156 100644 +--- a/src/calibre/ebooks/oeb/transforms/structure.py ++++ b/src/calibre/ebooks/oeb/transforms/structure.py +@@ -1,6 +1,6 @@ + #!/usr/bin/env python2 + # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +-from __future__ import with_statement ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL v3' + __copyright__ = '2009, Kovid Goyal ' +@@ -26,7 +26,7 @@ def XPath(x): + + + def isspace(x): +- return not x or x.replace(u'\xa0', u'').isspace() ++ return not x or x.replace('\xa0', '').isspace() + + + def at_start(elem): +@@ -124,11 +124,11 @@ class DetectStructure(object): + elem = matches[0] + eid = elem.get('id', None) + if not eid: +- eid = u'start_reading_at_'+unicode_type(uuid.uuid4()).replace(u'-', u'') ++ eid = 'start_reading_at_'+unicode_type(uuid.uuid4()).replace('-', '') + elem.set('id', eid) +- if u'text' in self.oeb.guide: +- self.oeb.guide.remove(u'text') +- self.oeb.guide.add(u'text', u'Start', item.href+u'#'+eid) ++ if 'text' in self.oeb.guide: ++ self.oeb.guide.remove('text') ++ self.oeb.guide.add('text', 'Start', item.href+'#'+eid) + self.log('Setting start reading at position to %s in %s'%( + self.opts.start_reading_at, item.href)) + return +diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py +index c9d9b04b6e..d67f0e471c 100644 +--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py ++++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py +@@ -1,7 +1,7 @@ + ''' + OPF manifest trimming transform. + ''' +-from __future__ import with_statement ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL v3' + __copyright__ = '2008, Marshall T. Vandegrift ' +diff --git a/src/calibre/ebooks/pml/__init__.py b/src/calibre/ebooks/pml/__init__.py +index c01caf569d..2260f2fe7c 100644 +--- a/src/calibre/ebooks/pml/__init__.py ++++ b/src/calibre/ebooks/pml/__init__.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL v3' + __copyright__ = '2009, John Schember ' +diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py +index da748469c2..4bef9a306b 100644 +--- a/src/calibre/ebooks/pml/pmlconverter.py ++++ b/src/calibre/ebooks/pml/pmlconverter.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + ''' + Convert pml markup to and from html +@@ -229,7 +230,7 @@ class PML_HTMLizer(object): + return html + + def start_line(self): +- start = u'' ++ start = '' + + state = deepcopy(self.state) + div = [] +@@ -258,10 +259,10 @@ class PML_HTMLizer(object): + else: + start += self.STATES_TAGS[key][0] + +- return u'

%s' % start ++ return '

%s' % start + + def end_line(self): +- end = u'' ++ end = '' + + div = [] + span = [] +@@ -281,10 +282,10 @@ class PML_HTMLizer(object): + else: + end += self.STATES_TAGS[key][1] + +- return u'%s

' % end ++ return '%s

' % end + + def process_code(self, code, stream, pre=''): +- text = u'' ++ text = '' + + code = self.CODE_STATES.get(code, None) + if not code: +@@ -309,7 +310,7 @@ class PML_HTMLizer(object): + return text + + def process_code_simple(self, code, stream): +- text = u'' ++ text = '' + + if self.state[code][0]: + if code in self.STATES_CLOSE_VALUE_REQ: +@@ -330,7 +331,7 @@ class PML_HTMLizer(object): + return text + + def process_code_div(self, code, stream): +- text = u'' ++ text = '' + + # Close code. + if self.state[code][0]: +@@ -384,7 +385,7 @@ class PML_HTMLizer(object): + return text + + def process_code_span(self, code, stream): +- text = u'' ++ text = '' + + # Close code. + if self.state[code][0]: +@@ -422,7 +423,7 @@ class PML_HTMLizer(object): + return text + + def process_code_block(self, code, stream, pre=''): +- text = u'' ++ text = '' + + # Close all spans + for c in self.SPAN_STATES: +@@ -467,7 +468,7 @@ class PML_HTMLizer(object): + return text + + def code_value(self, stream): +- value = u'' ++ value = '' + # state 0 is before = + # state 1 is before the first " + # state 2 is before the second " +@@ -506,7 +507,7 @@ class PML_HTMLizer(object): + # Unable to complete the sequence to reterieve the value. Reset + # the stream to the location it started. + stream.seek(loc) +- value = u'' ++ value = '' + + return value.strip() + +@@ -565,7 +566,7 @@ class PML_HTMLizer(object): + + c = line.read(1) + while c != '': +- text = u'' ++ text = '' + + if c == '\\': + c = line.read(1) +@@ -673,10 +674,10 @@ class PML_HTMLizer(object): + indent_state['T'] = False + adv_indent_val = '' + +- output.append(u''.join(parsed)) ++ output.append(''.join(parsed)) + line.close() + +- output = self.cleanup_html(u'\n'.join(output)) ++ output = self.cleanup_html('\n'.join(output)) + + return output + +@@ -700,18 +701,18 @@ class PML_HTMLizer(object): + t_l3 = None + + for level, (href, id, text) in self.toc: +- if level == u'0': ++ if level == '0': + t_l0 = n_toc.add_item(href, id, text) + t_l1 = None + t_l2 = None + t_l3 = None +- elif level == u'1': ++ elif level == '1': + if t_l0 is None: + t_l0 = n_toc + t_l1 = t_l0.add_item(href, id, text) + t_l2 = None + t_l3 = None +- elif level == u'2': ++ elif level == '2': + if t_l1 is None: + if t_l0 is None: + t_l1 = n_toc +@@ -719,7 +720,7 @@ class PML_HTMLizer(object): + t_l1 = t_l0 + t_l2 = t_l1.add_item(href, id, text) + t_l3 = None +- elif level == u'3': ++ elif level == '3': + if t_l2 is None: + if t_l1 is None: + if t_l0 is None: +diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py +index e99ec66226..7685036733 100644 +--- a/src/calibre/ebooks/pml/pmlml.py ++++ b/src/calibre/ebooks/pml/pmlml.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2009, John Schember ' +@@ -103,7 +104,7 @@ class PMLMLizer(object): + def pmlmlize_spine(self): + self.image_hrefs = {} + self.link_hrefs = {} +- output = [u''] ++ output = [''] + output.append(self.get_cover_page()) + output.append(self.get_text()) + output = ''.join(output) +@@ -114,7 +115,7 @@ class PMLMLizer(object): + from calibre.ebooks.oeb.stylizer import Stylizer + from calibre.ebooks.oeb.base import XHTML + +- output = u'' ++ output = '' + if 'cover' in self.oeb_book.guide: + output += '\\m="cover.png"\n' + self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.png' +@@ -132,7 +133,7 @@ class PMLMLizer(object): + from calibre.ebooks.oeb.stylizer import Stylizer + from calibre.ebooks.oeb.base import XHTML + +- text = [u''] ++ text = [''] + for item in self.oeb_book.spine: + self.log.debug('Converting %s to PML markup...' % item.href) + content = etree.tostring(item.data, encoding='unicode') +@@ -155,7 +156,7 @@ class PMLMLizer(object): + + def get_anchor(self, page, aid): + aid = self.get_anchor_id(page.href, aid) +- return ur'\Q="%s"' % aid ++ return r'\Q="%s"' % aid + + def remove_newlines(self, text): + text = text.replace('\r\n', ' ') +@@ -192,8 +193,8 @@ class PMLMLizer(object): + text = re.sub(unicode_type(r'(?msu)(?P\\(x|X[0-4]))(?P
.*?)(?P\\C[0-4]\s*=\s*"[^"]*")(?P.*?)(?P=t)'), r'\g\g\g\g', text) + + # Replace bad characters. +- text = text.replace(u'\xc2', '') +- text = text.replace(u'\xa0', ' ') ++ text = text.replace('\xc2', '') ++ text = text.replace('\xa0', ' ') + + # Turn all characters that cannot be represented by themself into their + # PML code equivelent +diff --git a/src/calibre/ebooks/rb/__init__.py b/src/calibre/ebooks/rb/__init__.py +index f45b966dad..f942dc3eb2 100644 +--- a/src/calibre/ebooks/rb/__init__.py ++++ b/src/calibre/ebooks/rb/__init__.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2009, John Schember ' +@@ -6,6 +7,8 @@ __docformat__ = 'restructuredtext en' + + import os + ++from polyglot.builtins import unicode_type ++ + HEADER = b'\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' + + +@@ -21,7 +24,7 @@ def unique_name(name, used_names): + ext = os.path.splitext(name)[1][:3] + base_name = name[:22] + for i in range(0, 9999): +- name = '%s-%s.%s' % (str(i).rjust('0', 4)[:4], base_name, ext) ++ name = '%s-%s.%s' % (unicode_type(i).rjust('0', 4)[:4], base_name, ext) + if name not in used_names: + break + return name +diff --git a/src/calibre/ebooks/rb/writer.py b/src/calibre/ebooks/rb/writer.py +index 9f4818725f..af671295bc 100644 +--- a/src/calibre/ebooks/rb/writer.py ++++ b/src/calibre/ebooks/rb/writer.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2009, John Schember ' +diff --git a/src/calibre/ebooks/readability/cleaners.py b/src/calibre/ebooks/readability/cleaners.py +index d30216c4d8..ee160f594b 100644 +--- a/src/calibre/ebooks/readability/cleaners.py ++++ b/src/calibre/ebooks/readability/cleaners.py +@@ -1,3 +1,5 @@ ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + # strip out a set of nuisance html attributes that can mess up rendering in RSS feeds + import re + from lxml.html.clean import Cleaner +diff --git a/src/calibre/ebooks/readability/debug.py b/src/calibre/ebooks/readability/debug.py +index 103bb5f9f0..c8acf510cf 100644 +--- a/src/calibre/ebooks/readability/debug.py ++++ b/src/calibre/ebooks/readability/debug.py +@@ -1,3 +1,5 @@ ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + def save_to_file(text, filename): + f = open(filename, 'wt') + f.write('') +diff --git a/src/calibre/ebooks/readability/htmls.py b/src/calibre/ebooks/readability/htmls.py +index 692f26c2ca..56aa159508 100644 +--- a/src/calibre/ebooks/readability/htmls.py ++++ b/src/calibre/ebooks/readability/htmls.py +@@ -1,3 +1,5 @@ ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + import re + + from lxml.html import tostring +@@ -20,14 +22,14 @@ def js_re(src, pattern, flags, repl): + + def normalize_entities(cur_title): + entities = { +- u'\u2014':'-', +- u'\u2013':'-', +- u'—': '-', +- u'–': '-', +- u'\u00A0': ' ', +- u'\u00AB': '"', +- u'\u00BB': '"', +- u'"': '"', ++ '\u2014':'-', ++ '\u2013':'-', ++ '—': '-', ++ '–': '-', ++ '\u00A0': ' ', ++ '\u00AB': '"', ++ '\u00BB': '"', ++ '"': '"', + } + for c, r in iteritems(entities): + if c in cur_title: +diff --git a/src/calibre/ebooks/readability/readability.py b/src/calibre/ebooks/readability/readability.py +index bef8caf307..e8cb581b10 100644 +--- a/src/calibre/ebooks/readability/readability.py ++++ b/src/calibre/ebooks/readability/readability.py +@@ -156,7 +156,7 @@ class Document: + return cleaned_article + except Exception as e: + self.log.exception('error getting summary: ') +- reraise(Unparseable, Unparseable(str(e)), sys.exc_info()[2]) ++ reraise(Unparseable, Unparseable(unicode_type(e)), sys.exc_info()[2]) + + def get_article(self, candidates, best_candidate): + # Now that we have the top candidate, look through its siblings for content that might also be related. +@@ -216,7 +216,7 @@ class Document: + def score_paragraphs(self, ): + MIN_LEN = self.options.get('min_text_length', self.TEXT_LENGTH_THRESHOLD) + candidates = {} +- # self.debug(str([describe(node) for node in self.tags(self.html, "div")])) ++ # self.debug(unicode_type([describe(node) for node in self.tags(self.html, "div")])) + + ordered = [] + for elem in self.tags(self.html, "p", "pre", "td"): +@@ -316,7 +316,7 @@ class Document: + if not REGEXES['divToPElementsRe'].search(unicode_type(''.join(map(tounicode, list(elem))))): + # self.debug("Altering %s to p" % (describe(elem))) + elem.tag = "p" +- # print "Fixed element "+describe(elem) ++ # print("Fixed element "+describe(elem)) + + for elem in self.tags(self.html, 'div'): + if elem.text and elem.text.strip(): +@@ -324,7 +324,7 @@ class Document: + p.text = elem.text + elem.text = None + elem.insert(0, p) +- # print "Appended "+tounicode(p)+" to "+describe(elem) ++ # print("Appended "+tounicode(p)+" to "+describe(elem)) + + for pos, child in reversed(list(enumerate(elem))): + if child.tail and child.tail.strip(): +@@ -332,9 +332,9 @@ class Document: + p.text = child.tail + child.tail = None + elem.insert(pos + 1, p) +- # print "Inserted "+tounicode(p)+" to "+describe(elem) ++ # print("Inserted "+tounicode(p)+" to "+describe(elem)) + if child.tag == 'br': +- # print 'Dropped
at '+describe(elem) ++ # print('Dropped
at '+describe(elem)) + child.drop_tree() + + def tags(self, node, *tag_names): +@@ -363,7 +363,7 @@ class Document: + weight = self.class_weight(el) + if el in candidates: + content_score = candidates[el]['content_score'] +- # print '!',el, '-> %6.3f' % content_score ++ # print('!',el, '-> %6.3f' % content_score) + else: + content_score = 0 + tag = el.tag +@@ -457,7 +457,7 @@ class Document: + siblings.append(sib_content_length) + if j == x: + break +- # self.debug(str(siblings)) ++ # self.debug(unicode_type(siblings)) + if siblings and sum(siblings) > 1000 : + to_remove = False + self.debug("Allowing %s" % describe(el)) +@@ -467,7 +467,7 @@ class Document: + if to_remove: + self.debug("Cleaned %6.3f %s with weight %s cause it has %s." % + (content_score, describe(el), weight, reason)) +- # print tounicode(el) ++ # print(tounicode(el)) + # self.debug("pname %s pweight %.3f" %(pname, pweight)) + el.drop_tree() + +diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py +index 8ff793b5d4..075eaaa36b 100644 +--- a/src/calibre/ebooks/rtf/input.py ++++ b/src/calibre/ebooks/rtf/input.py +@@ -1,4 +1,5 @@ +-from __future__ import with_statement ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + __license__ = 'GPL v3' + __copyright__ = '2008, Kovid Goyal ' + +@@ -36,5 +37,3 @@ class InlineClass(etree.XSLTExtension): + classes.append('col%d'%self.colors.index(fc)) + + output_parent.text = ' '.join(classes) +- +- +diff --git a/src/calibre/ebooks/rtf/preprocess.py b/src/calibre/ebooks/rtf/preprocess.py +index aae0a1502d..04d3c1124b 100644 +--- a/src/calibre/ebooks/rtf/preprocess.py ++++ b/src/calibre/ebooks/rtf/preprocess.py +@@ -1,7 +1,6 @@ + #!/usr/bin/env python2 + # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +-from __future__ import with_statement +-from __future__ import print_function ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL v3' + __copyright__ = '2010, Gerendi Sandor Attila' +diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py +index 7f6cc91c50..5944c4f228 100644 +--- a/src/calibre/ebooks/rtf/rtfml.py ++++ b/src/calibre/ebooks/rtf/rtfml.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2009, John Schember ' +@@ -83,11 +84,12 @@ def txt2rtf(text): + for x in text: + val = ord(x) + if val == 160: +- buf.write(ur'\~') ++ buf.write(r'\~') + elif val <= 127: + buf.write(unicode_type(x)) + else: +- c = unicode_type(r'\u{0:d}?'.format(val)) ++ # python2 and ur'\u' does not work ++ c = unicode_type('\\u{0:d}?'.format(val)) + buf.write(c) + return buf.getvalue() + +@@ -120,7 +122,7 @@ class RTFMLizer(object): + self.log.debug('Converting %s to RTF markup...' % item.href) + # Removing comments is needed as comments with -- inside them can + # cause fromstring() to fail +- content = re.sub(u'', u'', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL) ++ content = re.sub('', '', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL) + content = self.remove_newlines(content) + content = self.remove_tabs(content) + content = etree.fromstring(content) +@@ -149,7 +151,7 @@ class RTFMLizer(object): + return text + + def header(self): +- header = u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033\n' % ( ++ header = '{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033\n' % ( + self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator])) + return header + ( + '{\\fonttbl{\\f0\\froman\\fprq2\\fcharset128 Times New Roman;}{\\f1\\froman\\fprq2\\fcharset128 Times New Roman;}{\\f2\\fswiss\\fprq2\\fcharset128 Arial;}{\\f3\\fnil\\fprq2\\fcharset128 Arial;}{\\f4\\fnil\\fprq2\\fcharset128 MS Mincho;}{\\f5\\fnil\\fprq2\\fcharset128 Tahoma;}{\\f6\\fnil\\fprq0\\fcharset128 Tahoma;}}\n' # noqa +@@ -215,7 +217,7 @@ class RTFMLizer(object): + text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text) + + # Remove non-breaking spaces +- text = text.replace(u'\xa0', ' ') ++ text = text.replace('\xa0', ' ') + text = text.replace('\n\r', '\n') + + return text +@@ -230,16 +232,16 @@ class RTFMLizer(object): + if p is not None and isinstance(p.tag, string_or_bytes) and namespace(p.tag) == XHTML_NS \ + and elem.tail: + return elem.tail +- return u'' ++ return '' + +- text = u'' ++ text = '' + style = stylizer.style(elem) + + if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ + or style['visibility'] == 'hidden': + if hasattr(elem, 'tail') and elem.tail: + return elem.tail +- return u'' ++ return '' + + tag = barename(elem.tag) + tag_count = 0 +@@ -292,9 +294,9 @@ class RTFMLizer(object): + end_tag = tag_stack.pop() + if end_tag != 'block': + if tag in BLOCK_TAGS: +- text += ur'\par\pard\plain\hyphpar}' ++ text += r'\par\pard\plain\hyphpar}' + else: +- text += u'}' ++ text += '}' + + if hasattr(elem, 'tail') and elem.tail: + if 'block' in tag_stack: +diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py +index 01a26ba85c..164e706a03 100644 +--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py ++++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py +@@ -1,4 +1,4 @@ +-from __future__ import unicode_literals, absolute_import, print_function, division ++from __future__ import absolute_import, division, print_function, unicode_literals + ######################################################################### + # # + # # +@@ -26,6 +26,8 @@ from calibre.ebooks.rtf2xml import headings_to_sections, \ + body_styles, preamble_rest, group_styles, \ + inline + from calibre.ebooks.rtf2xml.old_rtf import OldRtf ++from polyglot.builtins import unicode_type ++ + from . import open_for_read, open_for_write + + """ +@@ -248,7 +250,7 @@ class ParseRtf: + enc = encode_obj.get_codepage() + # TODO: to check if cp is a good idea or if I should use a dict to convert + enc = 'cp' + enc +- msg = '%s\nException in token processing' % str(msg) ++ msg = '%s\nException in token processing' % unicode_type(msg) + if check_encoding_obj.check_encoding(self.__file, enc): + file_name = self.__file if isinstance(self.__file, bytes) \ + else self.__file.encode('utf-8') +diff --git a/src/calibre/ebooks/rtf2xml/check_encoding.py b/src/calibre/ebooks/rtf2xml/check_encoding.py +index 37ac305e1f..4f8a04ceda 100644 +--- a/src/calibre/ebooks/rtf2xml/check_encoding.py ++++ b/src/calibre/ebooks/rtf2xml/check_encoding.py +@@ -1,7 +1,9 @@ + #!/usr/bin/env python2 +-from __future__ import unicode_literals, absolute_import, print_function, division ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + import sys + ++from polyglot.builtins import unicode_type + + class CheckEncoding: + +@@ -15,7 +17,7 @@ class CheckEncoding: + try: + char.decode(encoding) + except ValueError as msg: +- sys.stderr.write('line: %s char: %s\n%s\n' % (line_num, char_position, str(msg))) ++ sys.stderr.write('line: %s char: %s\n%s\n' % (line_num, char_position, unicode_type(msg))) + + def check_encoding(self, path, encoding='us-ascii', verbose=True): + line_num = 0 +diff --git a/src/calibre/ebooks/rtf2xml/footnote.py b/src/calibre/ebooks/rtf2xml/footnote.py +index 007febb775..36e7624134 100644 +--- a/src/calibre/ebooks/rtf2xml/footnote.py ++++ b/src/calibre/ebooks/rtf2xml/footnote.py +@@ -1,4 +1,4 @@ +-from __future__ import unicode_literals, absolute_import, print_function, division ++from __future__ import absolute_import, division, print_function, unicode_literals + ######################################################################### + # # + # # +@@ -15,6 +15,8 @@ import os + + from calibre.ebooks.rtf2xml import copy + from calibre.ptempfile import better_mktemp ++from polyglot.builtins import unicode_type ++ + from . import open_for_read, open_for_write + + +@@ -56,7 +58,7 @@ class Footnote: + if self.__first_line: + self.__first_line_func(line) + if self.__token_info == 'cw 3: + sys.stderr.write( + 'Old rtf construction %s (bracket %s, line %s)\n' % ( +- self.__inline_info, str(self.__ob_group), line_num) ++ self.__inline_info, unicode_type(self.__ob_group), line_num) + ) + return True + self.__previous_token = line[6:16] +diff --git a/src/calibre/ebooks/rtf2xml/paragraph_def.py b/src/calibre/ebooks/rtf2xml/paragraph_def.py +index 45c2f136ec..f5aab5df05 100644 +--- a/src/calibre/ebooks/rtf2xml/paragraph_def.py ++++ b/src/calibre/ebooks/rtf2xml/paragraph_def.py +@@ -1,4 +1,4 @@ +-from __future__ import unicode_literals, absolute_import, print_function, division ++from __future__ import absolute_import, division, print_function, unicode_literals + ######################################################################### + # # + # # +@@ -12,8 +12,11 @@ from __future__ import unicode_literals, absolute_import, print_function, divisi + # # + ######################################################################### + import sys, os ++ + from calibre.ebooks.rtf2xml import copy, border_parse + from calibre.ptempfile import better_mktemp ++from polyglot.builtins import unicode_type ++ + from . import open_for_read, open_for_write + + +@@ -621,7 +624,7 @@ if another paragraph_def is found, the state changes to collect_tokens. + num = len(self.__style_num_strings) + new_style = 1 + num = '%04d' % num +- self.__att_val_dict['style-num'] = 's' + str(num) ++ self.__att_val_dict['style-num'] = 's' + unicode_type(num) + if new_style: + self.__write_body_styles() + +diff --git a/src/calibre/ebooks/rtf2xml/pict.py b/src/calibre/ebooks/rtf2xml/pict.py +index 62fa905db5..f3b1f74f18 100644 +--- a/src/calibre/ebooks/rtf2xml/pict.py ++++ b/src/calibre/ebooks/rtf2xml/pict.py +@@ -1,4 +1,4 @@ +-from __future__ import unicode_literals, absolute_import, print_function, division ++from __future__ import absolute_import, division, print_function, unicode_literals + ######################################################################### + # # + # # +@@ -15,6 +15,8 @@ import sys, os + + from calibre.ebooks.rtf2xml import copy + from calibre.ptempfile import better_mktemp ++from polyglot.builtins import unicode_type ++ + from . import open_for_read, open_for_write + + +@@ -76,7 +78,7 @@ class Pict: + try: + os.mkdir(self.__dir_name) + except OSError as msg: +- msg = "%sCouldn't make directory '%s':\n" % (str(msg), self.__dir_name) ++ msg = "%sCouldn't make directory '%s':\n" % (unicode_type(msg), self.__dir_name) + raise self.__bug_handler + else: + if self.__run_level > 1: +diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py +index c7a720f152..e64fba4d8c 100644 +--- a/src/calibre/ebooks/rtf2xml/process_tokens.py ++++ b/src/calibre/ebooks/rtf2xml/process_tokens.py +@@ -1,4 +1,4 @@ +-from __future__ import unicode_literals, absolute_import, print_function, division ++from __future__ import absolute_import, division, print_function, unicode_literals + ######################################################################### + # # + # # +@@ -15,6 +15,8 @@ import os, re + + from calibre.ebooks.rtf2xml import copy, check_brackets + from calibre.ptempfile import better_mktemp ++from polyglot.builtins import unicode_type ++ + from . import open_for_read, open_for_write + + +@@ -693,7 +695,7 @@ class ProcessTokens: + if num[-1] == ';': + num = num[:-1] + third_field = 'en' +- num = str('%X' % int(num)) ++ num = unicode_type('%X' % int(num)) + if len(num) != 2: + num = "0" + num + return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num) +@@ -730,7 +732,7 @@ class ProcessTokens: + return 0 + num = '%0.2f' % round(numerator/denominator, 2) + return num +- string_num = str(num) ++ string_num = unicode_type(num) + if string_num[-2:] == ".0": + string_num = string_num[:-2] + return string_num +diff --git a/src/calibre/ebooks/rtf2xml/sections.py b/src/calibre/ebooks/rtf2xml/sections.py +index 3fbd7b3187..fdc0ed4eb6 100644 +--- a/src/calibre/ebooks/rtf2xml/sections.py ++++ b/src/calibre/ebooks/rtf2xml/sections.py +@@ -1,4 +1,4 @@ +-from __future__ import unicode_literals, absolute_import, print_function, division ++from __future__ import absolute_import, division, print_function, unicode_literals + ######################################################################### + # # + # # +@@ -15,6 +15,8 @@ import sys, os + + from calibre.ebooks.rtf2xml import copy + from calibre.ptempfile import better_mktemp ++from polyglot.builtins import unicode_type ++ + from . import open_for_read, open_for_write + + +@@ -274,8 +276,8 @@ class Sections: + my_string += 'mi%s' + 'rtf-native' + '0\n' +- % (str(self.__section_num), str(self.__section_num)) ++ % (unicode_type(self.__section_num), unicode_type(self.__section_num)) + ) + self.__found_first_sec = 1 + elif self.__token_info == 'tx%s' + 'rtf-native' + '0\n' +- % (str(self.__section_num), str(self.__section_num)) ++ % (unicode_type(self.__section_num), unicode_type(self.__section_num)) + ) + self.__write_obj.write( + 'cw%s' % str(num) ++ 'mi%s' % unicode_type(num) + ) + if self.__list_of_sec_values: + keys = self.__list_of_sec_values[0].keys() +@@ -471,7 +473,7 @@ class Sections: + self.__list_of_sec_values = self.__list_of_sec_values[1:] + self.__write_obj.write('0') + self.__write_obj.write('rtf-native') +- self.__write_obj.write('%s' % str(self.__section_num)) ++ self.__write_obj.write('%s' % unicode_type(self.__section_num)) + self.__write_obj.write('\n') + # Look here + +diff --git a/src/calibre/ebooks/rtf2xml/table.py b/src/calibre/ebooks/rtf2xml/table.py +index 4424a4978a..139dea4971 100644 +--- a/src/calibre/ebooks/rtf2xml/table.py ++++ b/src/calibre/ebooks/rtf2xml/table.py +@@ -1,4 +1,4 @@ +-from __future__ import unicode_literals, absolute_import, print_function, division ++from __future__ import absolute_import, division, print_function, unicode_literals + ######################################################################### + # # + # # +@@ -12,8 +12,11 @@ from __future__ import unicode_literals, absolute_import, print_function, divisi + # # + ######################################################################### + import sys, os ++ + from calibre.ebooks.rtf2xml import copy, border_parse + from calibre.ptempfile import better_mktemp ++from polyglot.builtins import unicode_type ++ + from . import open_for_read, open_for_write + + """ +@@ -397,13 +400,13 @@ class Table: + left_position = float(left_position) + width = new_cell_position - self.__last_cell_position - left_position + # width = round(width, 2) +- width = str('%.2f' % width) ++ width = unicode_type('%.2f' % width) + self.__last_cell_position = new_cell_position + widths_exists = self.__row_dict.get('widths') + if widths_exists: +- self.__row_dict['widths'] += ', %s' % str(width) ++ self.__row_dict['widths'] += ', %s' % unicode_type(width) + else: +- self.__row_dict['widths'] = str(width) ++ self.__row_dict['widths'] = unicode_type(width) + self.__cell_list[-1]['width'] = width + self.__cell_list.append({}) + self.__cell_widths.append(width) +diff --git a/src/calibre/ebooks/snb/__init__.py b/src/calibre/ebooks/snb/__init__.py +index 3d97812b9b..25d2014767 100644 +--- a/src/calibre/ebooks/snb/__init__.py ++++ b/src/calibre/ebooks/snb/__init__.py +@@ -1,4 +1,6 @@ + #!/usr/bin/env python2 ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + __license__ = 'GPL v3' + __copyright__ = '2010, Li Fanxi ' + __docformat__ = 'restructuredtext en' +@@ -6,4 +8,3 @@ __docformat__ = 'restructuredtext en' + ''' + Used for snb output + ''' +- +diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py +index 659959e6f0..c93d3d3701 100644 +--- a/src/calibre/ebooks/snb/snbml.py ++++ b/src/calibre/ebooks/snb/snbml.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010, Li Fanxi ' +@@ -93,19 +94,19 @@ class SNBMLizer(object): + snbcHead = etree.SubElement(snbcTree, "head") + etree.SubElement(snbcHead, "title").text = subtitle + if self.opts and self.opts.snb_hide_chapter_name: +- etree.SubElement(snbcHead, "hidetitle").text = u"true" ++ etree.SubElement(snbcHead, "hidetitle").text = "true" + etree.SubElement(snbcTree, "body") + trees[subitem] = snbcTree +- output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, "")) ++ output.append('%s%s\n\n' % (CALIBRE_SNB_BM_TAG, "")) + output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0] +- output = self.cleanup_text(u''.join(output)) ++ output = self.cleanup_text(''.join(output)) + + subitem = '' + bodyTree = trees[subitem].find(".//body") + for line in output.splitlines(): + pos = line.find(CALIBRE_SNB_PRE_TAG) + if pos == -1: +- line = line.strip(u' \t\n\r\u3000') ++ line = line.strip(' \t\n\r\u3000') + else: + etree.SubElement(bodyTree, "text").text = \ + etree.CDATA(line[pos+len(CALIBRE_SNB_PRE_TAG):]) +@@ -124,14 +125,14 @@ class SNBMLizer(object): + bodyTree = trees[subitem].find(".//body") + else: + if self.opts and not self.opts.snb_dont_indent_first_line: +- prefix = u'\u3000\u3000' ++ prefix = '\u3000\u3000' + else: +- prefix = u'' ++ prefix = '' + etree.SubElement(bodyTree, "text").text = \ + etree.CDATA(unicode_type(prefix + line)) + if self.opts and self.opts.snb_insert_empty_line: + etree.SubElement(bodyTree, "text").text = \ +- etree.CDATA(u'') ++ etree.CDATA('') + + return trees + +@@ -146,9 +147,9 @@ class SNBMLizer(object): + def cleanup_text(self, text): + self.log.debug('\tClean up text...') + # Replace bad characters. +- text = text.replace(u'\xc2', '') +- text = text.replace(u'\xa0', ' ') +- text = text.replace(u'\xa9', '(C)') ++ text = text.replace('\xc2', '') ++ text = text.replace('\xa0', ' ') ++ text = text.replace('\xa9', '(C)') + + # Replace tabs, vertical tags and form feeds with single space. + text = text.replace('\t+', ' ') +@@ -226,7 +227,7 @@ class SNBMLizer(object): + if elem.attrib.get('id') is not None and elem.attrib['id'] in [href for href, title in subitems]: + if self.curSubItem is not None and self.curSubItem != elem.attrib['id']: + self.curSubItem = elem.attrib['id'] +- text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem)) ++ text.append('\n\n%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem)) + + if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ + or style['visibility'] == 'hidden': +@@ -240,18 +241,18 @@ class SNBMLizer(object): + # Are we in a paragraph block? + if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES: + in_block = True +- if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text: +- text.append(u'\n\n') ++ if not end.endswith('\n\n') and hasattr(elem, 'text') and elem.text: ++ text.append('\n\n') + + if tag in SPACE_TAGS: + if not end.endswith('u ') and hasattr(elem, 'text') and elem.text: +- text.append(u' ') ++ text.append(' ') + + if tag == 'img': +- text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) ++ text.append('\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) + + if tag == 'br': +- text.append(u'\n\n') ++ text.append('\n\n') + + if tag == 'li': + li = '- ' +@@ -260,24 +261,24 @@ class SNBMLizer(object): + # Process tags that contain text. + if hasattr(elem, 'text') and elem.text: + if pre: +- text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG).join((li + elem.text).splitlines())) ++ text.append(('\n\n%s' % CALIBRE_SNB_PRE_TAG).join((li + elem.text).splitlines())) + else: + text.append(li + elem.text) + li = '' + + for item in elem: +- en = u'' ++ en = '' + if len(text) >= 2: + en = text[-1][-2:] + t = self.dump_text(subitems, item, stylizer, en, pre, li)[0] + text += t + + if in_block: +- text.append(u'\n\n') ++ text.append('\n\n') + + if hasattr(elem, 'tail') and elem.tail: + if pre: +- text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG).join(elem.tail.splitlines())) ++ text.append(('\n\n%s' % CALIBRE_SNB_PRE_TAG).join(elem.tail.splitlines())) + else: + text.append(li + elem.tail) + li = '' +diff --git a/src/calibre/ebooks/unihandecode/__init__.py b/src/calibre/ebooks/unihandecode/__init__.py +index 153d8221fe..91238eddaa 100644 +--- a/src/calibre/ebooks/unihandecode/__init__.py ++++ b/src/calibre/ebooks/unihandecode/__init__.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010, Hiroshi Miura ' +@@ -27,13 +28,13 @@ class Unihandecoder(object): + def __init__(self, lang="zh", encoding='utf-8'): + self.preferred_encoding = encoding + lang = lang.lower() +- if lang[:2] == u'ja': ++ if lang[:2] == 'ja': + from calibre.ebooks.unihandecode.jadecoder import Jadecoder + self.decoder = Jadecoder() +- elif lang[:2] == u'kr' or lang == u'korean': ++ elif lang[:2] == 'kr' or lang == 'korean': + from calibre.ebooks.unihandecode.krdecoder import Krdecoder + self.decoder = Krdecoder() +- elif lang[:2] == u'vn' or lang == u'vietnum': ++ elif lang[:2] == 'vn' or lang == 'vietnum': + from calibre.ebooks.unihandecode.vndecoder import Vndecoder + self.decoder = Vndecoder() + else: # zh and others +diff --git a/src/calibre/ebooks/unihandecode/jacodepoints.py b/src/calibre/ebooks/unihandecode/jacodepoints.py +index 5d8e3db279..89d83d94b6 100644 +--- a/src/calibre/ebooks/unihandecode/jacodepoints.py ++++ b/src/calibre/ebooks/unihandecode/jacodepoints.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010 Hiroshi Miura ' +@@ -9,7 +10,7 @@ Unicode code point dictionary. + Based on Unicode.org Unihan database. + ''' + +-CODEPOINTS = { ++CODEPOINTS = { + 'x34':[ + 'Qiu ','Tian ','','','Kua ','Wu ','Yin ','','','','','','Si ','','','', + '','','','','','','Ye ','','','','','','Chou ','','','', +diff --git a/src/calibre/ebooks/unihandecode/jadecoder.py b/src/calibre/ebooks/unihandecode/jadecoder.py +index b49c1144c0..d6318c270d 100644 +--- a/src/calibre/ebooks/unihandecode/jadecoder.py ++++ b/src/calibre/ebooks/unihandecode/jadecoder.py +@@ -1,4 +1,6 @@ + # coding:utf-8 ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + __license__ = 'GPL 3' + __copyright__ = '2010, Hiroshi Miura ' + __docformat__ = 'restructuredtext en' +@@ -39,4 +41,3 @@ class Jadecoder(Unidecoder): + return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result) + except: + return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text) +- +diff --git a/src/calibre/ebooks/unihandecode/krcodepoints.py b/src/calibre/ebooks/unihandecode/krcodepoints.py +index cdb711afdd..bca8e3f4b4 100644 +--- a/src/calibre/ebooks/unihandecode/krcodepoints.py ++++ b/src/calibre/ebooks/unihandecode/krcodepoints.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010 Hiroshi Miura ' +@@ -9,7 +10,7 @@ Unicode code point dictionary. + Based on Unicode.org Unihan database. + ''' + +-CODEPOINTS = { ++CODEPOINTS = { + 'x34':[ + 'Qiu ','Tian ','','','Kua ','Wu ','Yin ','','','','','','Si ','','','', + '','','','','','','Ye ','','','','','','Chou ','','','', +diff --git a/src/calibre/ebooks/unihandecode/krdecoder.py b/src/calibre/ebooks/unihandecode/krdecoder.py +index af5b3b39e8..914f6f41c6 100644 +--- a/src/calibre/ebooks/unihandecode/krdecoder.py ++++ b/src/calibre/ebooks/unihandecode/krdecoder.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010, Hiroshi Miura ' +@@ -22,4 +23,3 @@ class Krdecoder(Unidecoder): + def __init__(self): + self.codepoints = CODEPOINTS + self.codepoints.update(HANCODES) +- +diff --git a/src/calibre/ebooks/unihandecode/pykakasi/__init__.py b/src/calibre/ebooks/unihandecode/pykakasi/__init__.py +index c92e541a82..4995491752 100644 +--- a/src/calibre/ebooks/unihandecode/pykakasi/__init__.py ++++ b/src/calibre/ebooks/unihandecode/pykakasi/__init__.py +@@ -1,5 +1,6 @@ ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi + kakasi + + __all__ = ["pykakasi"] +- +diff --git a/src/calibre/ebooks/unihandecode/pykakasi/h2a.py b/src/calibre/ebooks/unihandecode/pykakasi/h2a.py +index 409a110093..ebbf06fdf3 100644 +--- a/src/calibre/ebooks/unihandecode/pykakasi/h2a.py ++++ b/src/calibre/ebooks/unihandecode/pykakasi/h2a.py +@@ -20,6 +20,7 @@ + # * GNU General Public License for more details. + # * + # */ ++from __future__ import absolute_import, division, print_function, unicode_literals + + from polyglot.builtins import range + +@@ -27,137 +28,137 @@ from polyglot.builtins import range + class H2a (object): + + H2a_table = { +- u"\u3041":"a", u"\u3042":"a", +- u"\u3043":"i", u"\u3044":"i", +- u"\u3045":"u", u"\u3046":"u", +- u"\u3046\u309b":"vu", u"\u3046\u309b\u3041":"va", +- u"\u3046\u309b\u3043":"vi", u"\u3046\u309b\u3047":"ve", +- u"\u3046\u309b\u3049":"vo", +- u"\u3047":"e", u"\u3048":"e", +- u"\u3049":"o", u"\u304a":"o", ++ "\u3041":"a", "\u3042":"a", ++ "\u3043":"i", "\u3044":"i", ++ "\u3045":"u", "\u3046":"u", ++ "\u3046\u309b":"vu", "\u3046\u309b\u3041":"va", ++ "\u3046\u309b\u3043":"vi", "\u3046\u309b\u3047":"ve", ++ "\u3046\u309b\u3049":"vo", ++ "\u3047":"e", "\u3048":"e", ++ "\u3049":"o", "\u304a":"o", + +- u"\u304b":"ka", u"\u304c":"ga", +- u"\u304d":"ki", u"\u304d\u3041":"kya", +- u"\u304d\u3045":"kyu", u"\u304d\u3049":"kyo", +- u"\u304e":"gi", u"\u3050\u3083":"gya", +- u"\u304e\u3045":"gyu", u"\u304e\u3087":"gyo", +- u"\u304f":"ku", u"\u3050":"gu", +- u"\u3051":"ke", u"\u3052":"ge", +- u"\u3053":"ko", u"\u3054":"go", ++ "\u304b":"ka", "\u304c":"ga", ++ "\u304d":"ki", "\u304d\u3041":"kya", ++ "\u304d\u3045":"kyu", "\u304d\u3049":"kyo", ++ "\u304e":"gi", "\u3050\u3083":"gya", ++ "\u304e\u3045":"gyu", "\u304e\u3087":"gyo", ++ "\u304f":"ku", "\u3050":"gu", ++ "\u3051":"ke", "\u3052":"ge", ++ "\u3053":"ko", "\u3054":"go", + +- u"\u3055":"sa", u"\u3056":"za", +- u"\u3057":"shi", u"\u3057\u3083":"sha", +- u"\u3057\u3085":"shu", u"\u3057\u3087":"sho", +- u"\u3058":"ji", u"\u3058\u3083":"ja", +- u"\u3058\u3085":"ju", u"\u3058\u3087":"jo", +- u"\u3059":"su", u"\u305a":"zu", +- u"\u305b":"se", u"\u305c":"ze", +- u"\u305d":"so", u"\u305e":"zo", ++ "\u3055":"sa", "\u3056":"za", ++ "\u3057":"shi", "\u3057\u3083":"sha", ++ "\u3057\u3085":"shu", "\u3057\u3087":"sho", ++ "\u3058":"ji", "\u3058\u3083":"ja", ++ "\u3058\u3085":"ju", "\u3058\u3087":"jo", ++ "\u3059":"su", "\u305a":"zu", ++ "\u305b":"se", "\u305c":"ze", ++ "\u305d":"so", "\u305e":"zo", + +- u"\u305f":"ta", u"\u3060":"da", +- u"\u3061":"chi", u"\u3061\u3047":"che", u"\u3061\u3083":"cha", +- u"\u3061\u3085":"chu", u"\u3061\u3087":"cho", +- u"\u3062":"ji", u"\u3062\u3083":"ja", +- u"\u3062\u3085":"ju", u"\u3062\u3087":"jo", ++ "\u305f":"ta", "\u3060":"da", ++ "\u3061":"chi", "\u3061\u3047":"che", "\u3061\u3083":"cha", ++ "\u3061\u3085":"chu", "\u3061\u3087":"cho", ++ "\u3062":"ji", "\u3062\u3083":"ja", ++ "\u3062\u3085":"ju", "\u3062\u3087":"jo", + +- u"\u3063":"tsu", +- u"\u3063\u3046\u309b":"vvu", +- u"\u3063\u3046\u309b\u3041":"vva", +- u"\u3063\u3046\u309b\u3043":"vvi", +- u"\u3063\u3046\u309b\u3047":"vve", +- u"\u3063\u3046\u309b\u3049":"vvo", +- u"\u3063\u304b":"kka", u"\u3063\u304c":"gga", +- u"\u3063\u304d":"kki", u"\u3063\u304d\u3083":"kkya", +- u"\u3063\u304d\u3085":"kkyu", u"\u3063\u304d\u3087":"kkyo", +- u"\u3063\u304e":"ggi", u"\u3063\u304e\u3083":"ggya", +- u"\u3063\u304e\u3085":"ggyu", u"\u3063\u304e\u3087":"ggyo", +- u"\u3063\u304f":"kku", u"\u3063\u3050":"ggu", +- u"\u3063\u3051":"kke", u"\u3063\u3052":"gge", +- u"\u3063\u3053":"kko", u"\u3063\u3054":"ggo", +- u"\u3063\u3055":"ssa", u"\u3063\u3056":"zza", +- u"\u3063\u3057":"sshi", u"\u3063\u3057\u3083":"ssha", +- u"\u3063\u3057\u3085":"sshu", u"\u3063\u3057\u3087":"ssho", +- u"\u3063\u3058":"jji", u"\u3063\u3058\u3083":"jja", +- u"\u3063\u3058\u3085":"jju", u"\u3063\u3058\u3087":"jjo", +- u"\u3063\u3059":"ssu", u"\u3063\u305a":"zzu", +- u"\u3063\u305b":"sse", u"\u3063\u305e":"zze", +- u"\u3063\u305d":"sso", u"\u3063\u305c":"zzo", +- u"\u3063\u305f":"tta", u"\u3063\u3060":"dda", +- u"\u3063\u3061":"tchi", u"\u3063\u3061\u3083":"tcha", +- u"\u3063\u3061\u3085":"tchu", u"\u3063\u3061\u3087":"tcho", +- u"\u3063\u3062":"jji", u"\u3063\u3062\u3083":"jjya", +- u"\u3063\u3062\u3085":"jjyu", u"\u3063\u3062\u3087":"jjyo", +- u"\u3063\u3064":"ttsu", u"\u3063\u3065":"zzu", +- u"\u3063\u3066":"tte", u"\u3063\u3067":"dde", +- u"\u3063\u3068":"tto", u"\u3063\u3069":"ddo", +- u"\u3063\u306f":"hha", u"\u3063\u3070":"bba", +- u"\u3063\u3071":"ppa", +- u"\u3063\u3072":"hhi", u"\u3063\u3072\u3083":"hhya", +- u"\u3063\u3072\u3085":"hhyu", u"\u3063\u3072\u3087":"hhyo", +- u"\u3063\u3073":"bbi", u"\u3063\u3073\u3083":"bbya", +- u"\u3063\u3073\u3085":"bbyu", u"\u3063\u3073\u3087":"bbyo", +- u"\u3063\u3074":"ppi", u"\u3063\u3074\u3083":"ppya", +- u"\u3063\u3074\u3085":"ppyu", u"\u3063\u3074\u3087":"ppyo", +- u"\u3063\u3075":"ffu", u"\u3063\u3075\u3041":"ffa", +- u"\u3063\u3075\u3043":"ffi", u"\u3063\u3075\u3047":"ffe", +- u"\u3063\u3075\u3049":"ffo", +- u"\u3063\u3076":"bbu", u"\u3063\u3077":"ppu", +- u"\u3063\u3078":"hhe", u"\u3063\u3079":"bbe", +- u"\u3063\u307a":"ppe", +- u"\u3063\u307b":"hho", u"\u3063\u307c":"bbo", +- u"\u3063\u307d":"ppo", +- u"\u3063\u3084":"yya", u"\u3063\u3086":"yyu", +- u"\u3063\u3088":"yyo", +- u"\u3063\u3089":"rra", u"\u3063\u308a":"rri", +- u"\u3063\u308a\u3083":"rrya", u"\u3063\u308a\u3085":"rryu", +- u"\u3063\u308a\u3087":"rryo", +- u"\u3063\u308b":"rru", u"\u3063\u308c":"rre", +- u"\u3063\u308d":"rro", ++ "\u3063":"tsu", ++ "\u3063\u3046\u309b":"vvu", ++ "\u3063\u3046\u309b\u3041":"vva", ++ "\u3063\u3046\u309b\u3043":"vvi", ++ "\u3063\u3046\u309b\u3047":"vve", ++ "\u3063\u3046\u309b\u3049":"vvo", ++ "\u3063\u304b":"kka", "\u3063\u304c":"gga", ++ "\u3063\u304d":"kki", "\u3063\u304d\u3083":"kkya", ++ "\u3063\u304d\u3085":"kkyu", "\u3063\u304d\u3087":"kkyo", ++ "\u3063\u304e":"ggi", "\u3063\u304e\u3083":"ggya", ++ "\u3063\u304e\u3085":"ggyu", "\u3063\u304e\u3087":"ggyo", ++ "\u3063\u304f":"kku", "\u3063\u3050":"ggu", ++ "\u3063\u3051":"kke", "\u3063\u3052":"gge", ++ "\u3063\u3053":"kko", "\u3063\u3054":"ggo", ++ "\u3063\u3055":"ssa", "\u3063\u3056":"zza", ++ "\u3063\u3057":"sshi", "\u3063\u3057\u3083":"ssha", ++ "\u3063\u3057\u3085":"sshu", "\u3063\u3057\u3087":"ssho", ++ "\u3063\u3058":"jji", "\u3063\u3058\u3083":"jja", ++ "\u3063\u3058\u3085":"jju", "\u3063\u3058\u3087":"jjo", ++ "\u3063\u3059":"ssu", "\u3063\u305a":"zzu", ++ "\u3063\u305b":"sse", "\u3063\u305e":"zze", ++ "\u3063\u305d":"sso", "\u3063\u305c":"zzo", ++ "\u3063\u305f":"tta", "\u3063\u3060":"dda", ++ "\u3063\u3061":"tchi", "\u3063\u3061\u3083":"tcha", ++ "\u3063\u3061\u3085":"tchu", "\u3063\u3061\u3087":"tcho", ++ "\u3063\u3062":"jji", "\u3063\u3062\u3083":"jjya", ++ "\u3063\u3062\u3085":"jjyu", "\u3063\u3062\u3087":"jjyo", ++ "\u3063\u3064":"ttsu", "\u3063\u3065":"zzu", ++ "\u3063\u3066":"tte", "\u3063\u3067":"dde", ++ "\u3063\u3068":"tto", "\u3063\u3069":"ddo", ++ "\u3063\u306f":"hha", "\u3063\u3070":"bba", ++ "\u3063\u3071":"ppa", ++ "\u3063\u3072":"hhi", "\u3063\u3072\u3083":"hhya", ++ "\u3063\u3072\u3085":"hhyu", "\u3063\u3072\u3087":"hhyo", ++ "\u3063\u3073":"bbi", "\u3063\u3073\u3083":"bbya", ++ "\u3063\u3073\u3085":"bbyu", "\u3063\u3073\u3087":"bbyo", ++ "\u3063\u3074":"ppi", "\u3063\u3074\u3083":"ppya", ++ "\u3063\u3074\u3085":"ppyu", "\u3063\u3074\u3087":"ppyo", ++ "\u3063\u3075":"ffu", "\u3063\u3075\u3041":"ffa", ++ "\u3063\u3075\u3043":"ffi", "\u3063\u3075\u3047":"ffe", ++ "\u3063\u3075\u3049":"ffo", ++ "\u3063\u3076":"bbu", "\u3063\u3077":"ppu", ++ "\u3063\u3078":"hhe", "\u3063\u3079":"bbe", ++ "\u3063\u307a":"ppe", ++ "\u3063\u307b":"hho", "\u3063\u307c":"bbo", ++ "\u3063\u307d":"ppo", ++ "\u3063\u3084":"yya", "\u3063\u3086":"yyu", ++ "\u3063\u3088":"yyo", ++ "\u3063\u3089":"rra", "\u3063\u308a":"rri", ++ "\u3063\u308a\u3083":"rrya", "\u3063\u308a\u3085":"rryu", ++ "\u3063\u308a\u3087":"rryo", ++ "\u3063\u308b":"rru", "\u3063\u308c":"rre", ++ "\u3063\u308d":"rro", + +- u"\u3064":"tsu", u"\u3065":"zu", +- u"\u3066":"te", u"\u3067":"de", u"\u3067\u3043":"di", +- u"\u3068":"to", u"\u3069":"do", ++ "\u3064":"tsu", "\u3065":"zu", ++ "\u3066":"te", "\u3067":"de", "\u3067\u3043":"di", ++ "\u3068":"to", "\u3069":"do", + +- u"\u306a":"na", +- u"\u306b":"ni", u"\u306b\u3083":"nya", +- u"\u306b\u3085":"nyu", u"\u306b\u3087":"nyo", +- u"\u306c":"nu", u"\u306d":"ne", u"\u306e":"no", ++ "\u306a":"na", ++ "\u306b":"ni", "\u306b\u3083":"nya", ++ "\u306b\u3085":"nyu", "\u306b\u3087":"nyo", ++ "\u306c":"nu", "\u306d":"ne", "\u306e":"no", + +- u"\u306f":"ha", u"\u3070":"ba", u"\u3071":"pa", +- u"\u3072":"hi", u"\u3072\u3083":"hya", +- u"\u3072\u3085":"hyu", u"\u3072\u3087":"hyo", +- u"\u3073":"bi", u"\u3073\u3083":"bya", +- u"\u3073\u3085":"byu", u"\u3073\u3087":"byo", +- u"\u3074":"pi", u"\u3074\u3083":"pya", +- u"\u3074\u3085":"pyu", u"\u3074\u3087":"pyo", +- u"\u3075":"fu", u"\u3075\u3041":"fa", +- u"\u3075\u3043":"fi", u"\u3075\u3047":"fe", +- u"\u3075\u3049":"fo", +- u"\u3076":"bu", u"\u3077":"pu", +- u"\u3078":"he", u"\u3079":"be", u"\u307a":"pe", +- u"\u307b":"ho", u"\u307c":"bo", u"\u307d":"po", ++ "\u306f":"ha", "\u3070":"ba", "\u3071":"pa", ++ "\u3072":"hi", "\u3072\u3083":"hya", ++ "\u3072\u3085":"hyu", "\u3072\u3087":"hyo", ++ "\u3073":"bi", "\u3073\u3083":"bya", ++ "\u3073\u3085":"byu", "\u3073\u3087":"byo", ++ "\u3074":"pi", "\u3074\u3083":"pya", ++ "\u3074\u3085":"pyu", "\u3074\u3087":"pyo", ++ "\u3075":"fu", "\u3075\u3041":"fa", ++ "\u3075\u3043":"fi", "\u3075\u3047":"fe", ++ "\u3075\u3049":"fo", ++ "\u3076":"bu", "\u3077":"pu", ++ "\u3078":"he", "\u3079":"be", "\u307a":"pe", ++ "\u307b":"ho", "\u307c":"bo", "\u307d":"po", + +- u"\u307e":"ma", +- u"\u307f":"mi", u"\u307f\u3083":"mya", +- u"\u307f\u3085":"myu", u"\u307f\u3087":"myo", +- u"\u3080":"mu", u"\u3081":"me", u"\u3082":"mo", ++ "\u307e":"ma", ++ "\u307f":"mi", "\u307f\u3083":"mya", ++ "\u307f\u3085":"myu", "\u307f\u3087":"myo", ++ "\u3080":"mu", "\u3081":"me", "\u3082":"mo", + +- u"\u3083":"ya", u"\u3084":"ya", +- u"\u3085":"yu", u"\u3086":"yu", +- u"\u3087":"yo", u"\u3088":"yo", ++ "\u3083":"ya", "\u3084":"ya", ++ "\u3085":"yu", "\u3086":"yu", ++ "\u3087":"yo", "\u3088":"yo", + +- u"\u3089":"ra", +- u"\u308a":"ri", u"\u308a\u3083":"rya", +- u"\u308a\u3085":"ryu", u"\u308a\u3087":"ryo", +- u"\u308b":"ru", u"\u308c":"re", u"\u308d":"ro", ++ "\u3089":"ra", ++ "\u308a":"ri", "\u308a\u3083":"rya", ++ "\u308a\u3085":"ryu", "\u308a\u3087":"ryo", ++ "\u308b":"ru", "\u308c":"re", "\u308d":"ro", + +- u"\u308e":"wa", u"\u308f":"wa", +- u"\u3090":"i", u"\u3091":"e", +- u"\u3092":"wo", u"\u3093":"n", ++ "\u308e":"wa", "\u308f":"wa", ++ "\u3090":"i", "\u3091":"e", ++ "\u3092":"wo", "\u3093":"n", + +- u"\u3093\u3042":"n'a", u"\u3093\u3044":"n'i", +- u"\u3093\u3046":"n'u", u"\u3093\u3048":"n'e", +- u"\u3093\u304a":"n'o", ++ "\u3093\u3042":"n'a", "\u3093\u3044":"n'i", ++ "\u3093\u3046":"n'u", "\u3093\u3048":"n'e", ++ "\u3093\u304a":"n'o", + } + + # this class is Borg +diff --git a/src/calibre/ebooks/unihandecode/pykakasi/j2h.py b/src/calibre/ebooks/unihandecode/pykakasi/j2h.py +index 79cd2c29ac..3a9d836df2 100644 +--- a/src/calibre/ebooks/unihandecode/pykakasi/j2h.py ++++ b/src/calibre/ebooks/unihandecode/pykakasi/j2h.py +@@ -20,6 +20,7 @@ + # * GNU General Public License for more details. + # * + # */ ++from __future__ import absolute_import, division, print_function, unicode_literals + + import re + +@@ -48,7 +49,7 @@ class J2H (object): + return (0x3400 <= ord(c) and ord(c) < 0xfa2e) + + def isCletter(self, l, c): +- if (ord(u"ぁ") <= ord(c) and ord(c) <= 0x309f) and (l in self.cl_table[ord(c) - ord(u"ぁ")-1]): ++ if (ord("ぁ") <= ord(c) and ord(c) <= 0x309f) and (l in self.cl_table[ord(c) - ord("ぁ")-1]): + return True + return False + +diff --git a/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py b/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py +index 6ff55ad90d..2c1c95f32a 100644 +--- a/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py ++++ b/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py +@@ -2,6 +2,8 @@ + # jisyo.py + # + # Copyright 2011 Hiroshi Miura ++from __future__ import absolute_import, division, print_function, unicode_literals ++ + from zlib import decompress + + from polyglot.builtins import unicode_type +diff --git a/src/calibre/ebooks/unihandecode/pykakasi/k2a.py b/src/calibre/ebooks/unihandecode/pykakasi/k2a.py +index 7650199128..fb5b5da1ac 100644 +--- a/src/calibre/ebooks/unihandecode/pykakasi/k2a.py ++++ b/src/calibre/ebooks/unihandecode/pykakasi/k2a.py +@@ -20,6 +20,7 @@ + # * GNU General Public License for more details. + # * + # */ ++from __future__ import absolute_import, division, print_function, unicode_literals + + from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo + from polyglot.builtins import range +diff --git a/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py b/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py +index 39b84b545a..47a8c026f8 100644 +--- a/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py ++++ b/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py +@@ -20,6 +20,7 @@ + # * GNU General Public License for more details. + # * + # */ ++from __future__ import absolute_import, division, print_function, unicode_literals + + from calibre.ebooks.unihandecode.pykakasi.j2h import J2H + from calibre.ebooks.unihandecode.pykakasi.h2a import H2a +@@ -94,4 +95,3 @@ class kakasi(object): + i += 1 + + return otext +- +diff --git a/src/calibre/ebooks/unihandecode/unicodepoints.py b/src/calibre/ebooks/unihandecode/unicodepoints.py +index d9130123fb..13eeed2e03 100644 +--- a/src/calibre/ebooks/unihandecode/unicodepoints.py ++++ b/src/calibre/ebooks/unihandecode/unicodepoints.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2009, John Schember ' +diff --git a/src/calibre/ebooks/unihandecode/unidecoder.py b/src/calibre/ebooks/unihandecode/unidecoder.py +index 748fcb64f1..e66a11da2f 100644 +--- a/src/calibre/ebooks/unihandecode/unidecoder.py ++++ b/src/calibre/ebooks/unihandecode/unidecoder.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010, Hiroshi Miura ' +diff --git a/src/calibre/ebooks/unihandecode/vncodepoints.py b/src/calibre/ebooks/unihandecode/vncodepoints.py +index 7e10fde1c8..2a74bb157b 100644 +--- a/src/calibre/ebooks/unihandecode/vncodepoints.py ++++ b/src/calibre/ebooks/unihandecode/vncodepoints.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010 Hiroshi Miura ' +@@ -9,7 +10,7 @@ Unicode code point dictionary. + Based on Unicode.org Unihan database. + ''' + +-CODEPOINTS = { ++CODEPOINTS = { + 'x34':[ + 'Qiu ','Tian ','','','Kua ','Wu ','Yin ','','','','','','Si ','','','', + '','','','','','','Ye ','','','','','','Chou ','','','', +diff --git a/src/calibre/ebooks/unihandecode/vndecoder.py b/src/calibre/ebooks/unihandecode/vndecoder.py +index 76d926d7b7..60ee7fbdf9 100644 +--- a/src/calibre/ebooks/unihandecode/vndecoder.py ++++ b/src/calibre/ebooks/unihandecode/vndecoder.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010, Hiroshi Miura ' +@@ -21,4 +22,3 @@ class Vndecoder(Unidecoder): + def __init__(self): + self.codepoints = CODEPOINTS + self.codepoints.update(HANCODES) +- +diff --git a/src/calibre/ebooks/unihandecode/zhcodepoints.py b/src/calibre/ebooks/unihandecode/zhcodepoints.py +index 1715ecb46e..57e4de3326 100644 +--- a/src/calibre/ebooks/unihandecode/zhcodepoints.py ++++ b/src/calibre/ebooks/unihandecode/zhcodepoints.py +@@ -1,4 +1,5 @@ + # -*- coding: utf-8 -*- ++from __future__ import absolute_import, division, print_function, unicode_literals + + __license__ = 'GPL 3' + __copyright__ = '2010 Hiroshi Miura ' +@@ -9,7 +10,7 @@ Unicode code point dictionary. + Based on Unicode.org Unihan database. + ''' + +-CODEPOINTS = { ++CODEPOINTS = { + 'x34':[ + 'Qiu ','Tian ','','','Kua ','Wu ','Yin ','','','','','','Si ','','','', + '','','','','','','Ye ','','','','','','Chou ','','','', +diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py +index 031cbac7b4..357ea55c65 100644 +--- a/src/calibre/gui2/book_details.py ++++ b/src/calibre/gui2/book_details.py +@@ -1,6 +1,7 @@ + #!/usr/bin/env python2 + # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + # License: GPLv3 Copyright: 2010, Kovid Goyal ++from __future__ import absolute_import, division, print_function, unicode_literals + + import os + import re +@@ -59,7 +60,7 @@ def css(): + val = P('templates/book_details.css', data=True).decode('utf-8') + col = QApplication.instance().palette().color(QPalette.Link).name() + val = val.replace('LINK_COLOR', col) +- _css = re.sub(unicode_type(r'/\*.*?\*/'), u'', val, flags=re.DOTALL) ++ _css = re.sub(unicode_type(r'/\*.*?\*/'), '', val, flags=re.DOTALL) + return _css + + +@@ -127,7 +128,7 @@ def render_html(mi, css, vertical, widget, all_fields=False, render_data_func=No + + c = color_to_string(QApplication.palette().color(QPalette.Normal, + QPalette.WindowText)) +- templ = u'''\ ++ templ = '''\ + + +