sharkcz / rpms / calibre

Forked from rpms/calibre 4 years ago
Clone
Blob Blame History Raw
From 99a673a711234dfd098316c85d5ec384f35ed7c0 Mon Sep 17 00:00:00 2001
From: a10kiloham <github@robk.com>
Date: Tue, 6 Aug 2019 15:51:38 +0100
Subject: [PATCH 36/71] Update login mechanism for Times Online

Fixes #1025 (Update login mechanism)
Fixes #1026 (Fix login mechanism)
---
 recipes/sunday_times_magazine.recipe | 47 ++++++++++++++++++----------
 recipes/times_online.recipe          | 43 ++++++++++++++++---------
 2 files changed, 59 insertions(+), 31 deletions(-)

diff --git a/recipes/sunday_times_magazine.recipe b/recipes/sunday_times_magazine.recipe
index b7bebff615..f59dd15422 100644
--- a/recipes/sunday_times_magazine.recipe
+++ b/recipes/sunday_times_magazine.recipe
@@ -1,13 +1,13 @@
 __license__ = 'GPL v3'
-__copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2019'
 '''
 www.thetimes.co.uk/magazine/the-sunday-times-magazine/
 '''
+
+from mechanize import Request
+
+from calibre import random_user_agent
 from calibre.web.feeds.news import BasicNewsRecipe
-try:
-    from urllib.parse import urlencode
-except ImportError:
-    from urllib import urlencode
 
 
 def classes(classes):
@@ -31,8 +31,9 @@ class TimesOnline(BasicNewsRecipe):
     delay = 1
     needs_subscription = True
     publication_type = 'newspaper'
-    INDEX = 'http://www.thetimes.co.uk/'
-    PREFIX = u'http://www.thetimes.co.uk/'
+    INDEX = 'https://www.thetimes.co.uk'
+    LOGIN = 'https://login.thetimes.co.uk/'
+    PREFIX = u'https://www.thetimes.co.uk'
     extra_css = """
         .author-name,.authorName{font-style: italic}
         .published-date,.multi-position-photo-text{
@@ -48,16 +49,30 @@ class TimesOnline(BasicNewsRecipe):
         'publisher': publisher,
         'language': language}
 
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        br.open('http://www.thetimes.co.uk/')
-        if self.username is not None and self.password is not None:
-            data = urlencode({
-                'gotoUrl': self.INDEX,
-                'username': self.username,
-                'password': self.password})
-            br.open('https://login.thetimes.co.uk/', data)
+    def get_browser(self, *a, **kw):
+        start_url = self.INDEX
+        kw['user_agent'] = random_user_agent(allow_ie=False)
+        br = BasicNewsRecipe.get_browser(self, *a, **kw)
+        self.log('Starting login process...')
+        res = br.open(start_url)
+        sso_url = res.geturl()
+        self.log(sso_url)
+        request_query = {
+            'username': self.username,
+            'password': self.password,
+            's': 1,
+            'gotoUrl': self.INDEX,
+        }
+        rq = Request(self.LOGIN, headers={
+            'Accept': 'text/html',
+            'Accept-Language': 'en-US,en;q=0.8',
+            'X-HTTP-Method-Override': 'POST',
+            'X-Requested-With': 'XMLHttpRequest',
+        }, data=request_query)
+        self.log('Sending login request...')
+        res = br.open(rq)
         return br
+    # }}}
 
     def get_cover_url(self):
         from datetime import date
diff --git a/recipes/times_online.recipe b/recipes/times_online.recipe
index 76bf09d467..ad3a0ce576 100644
--- a/recipes/times_online.recipe
+++ b/recipes/times_online.recipe
@@ -1,15 +1,14 @@
 __license__ = 'GPL v3'
-__copyright__ = '2010-2017, Bobby Steel <bob at xdca.com>, Darko Miletic'
+__copyright__ = '2010-2019, Bobby Steel <bob at xdca.com>, Darko Miletic'
 '''
 www.thetimes.co.uk
 '''
+from mechanize import Request
+from calibre import random_user_agent
+from calibre.web.feeds.news import BasicNewsRecipe
+
 import html5lib
-try:
-    from urllib.parse import urlencode
-except ImportError:
-    from urllib import urlencode
 from lxml import html
-from calibre.web.feeds.news import BasicNewsRecipe
 
 
 def classes(classes):
@@ -35,6 +34,7 @@ class TimesOnline(BasicNewsRecipe):
     needs_subscription = True
     publication_type = 'newspaper'
     INDEX = 'http://www.thetimes.co.uk/'
+    LOGIN = 'https://login.thetimes.co.uk/'
     PREFIX = u'http://www.thetimes.co.uk'
     extra_css = """
         .author-name,.authorName{font-style: italic}
@@ -78,15 +78,28 @@ def get_cover_url(self):
             br.open(cover)
         return cover
 
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        br.open('http://www.thetimes.co.uk/')
-        if self.username is not None and self.password is not None:
-            data = urlencode({
-                'gotoUrl': self.INDEX,
-                'username': self.username,
-                'password': self.password})
-            br.open('https://login.thetimes.co.uk/', data)
+    def get_browser(self, *a, **kw):
+        start_url = self.INDEX
+        kw['user_agent'] = random_user_agent(allow_ie=False)
+        br = BasicNewsRecipe.get_browser(self, *a, **kw)
+        self.log('Starting login process...')
+        res = br.open(start_url)
+        sso_url = res.geturl()
+        self.log(sso_url)
+        request_query = {
+            'username': self.username,
+            'password': self.password,
+            's': 1,
+            'gotoUrl': self.INDEX,
+        }
+        rq = Request(self.LOGIN, headers={
+            'Accept': 'text/html',
+            'Accept-Language': 'en-US,en;q=0.8',
+            'X-HTTP-Method-Override': 'POST',
+            'X-Requested-With': 'XMLHttpRequest',
+        }, data=request_query)
+        self.log('Sending login request...')
+        res = br.open(rq)
         return br
 
     remove_tags = [