[readcomiconline] detect 'AreYouHuman' redirects (#279)

5 years ago · 48233f00c0
parent 1cde38110d
commit 48233f00c0
3 changed files with 16 additions and 8 deletions
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@ -24,10 +24,10 @@ class KissmangaBase():

    def request(self, url):
        response = super().request(url)
-        if response.history and "/Message/AreYouHuman?" in response.url:
-            self.log.error("Requesting too many pages caused a redirect to %s."
-                           " Try visiting this URL in your browser and solve"
-                           " the CAPTCHA to continue.", response.url)
+        if response.history and "/AreYouHuman" in response.url:
+            self.log.error("Redirect to \n%s\n"
+                           "Visit this URL in your browser and solve "
+                           "the CAPTCHA to continue.", response.url)
            raise exception.StopExtraction()
        return response

--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@ -9,7 +9,7 @@
 """Extract comic-issues and entire comics from https://readcomiconline.to/"""

 from .common import ChapterExtractor, MangaExtractor
-from .. import text
+from .. import text, exception
 import re


@ -21,6 +21,15 @@ class ReadcomiconlineBase():
    archive_fmt = "{issue_id}_{page}"
    root = "https://readcomiconline.to"

+    def request(self, url):
+        response = super().request(url)
+        if response.history and "/AreYouHuman" in response.url:
+            self.log.error("Redirect to \n%s\n"
+                           "Visit this URL in your browser and solve "
+                           "the CAPTCHA to continue.", response.url)
+            raise exception.StopExtraction()
+        return response
+

 class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
    """Extractor for comic-issues from readcomiconline.to"""
@ -49,7 +58,6 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
        }

    def images(self, page):
-        self.session.headers["Referer"] = None
        return [
            (url, None)
            for url in text.extract_iter(
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@ -111,13 +111,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
    test = (
        ("https://acapella.smugmug.com/Micro-Macro/Drops/i-g2Dmf9z", {
            "url": "78f0bf3516b6d670b7319216bdeccb35942ca4cf",
-            "keyword": "008a29d6e90729ef7639617db6c049ecb1d0ab54",
+            "keyword": "b298ef7ed2b1918263b6a7dc6f56e54401584381",
            "content": "64a8f69a1d824921eebbdf2420087937adfa45cd",
        }),
        # video
        ("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
            "url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
-            "keyword": "cafec30861ac7569b12a2a6b671b4b5ce273b370",
+            "keyword": "c708c4b9527a2fb29396c19f7628f9cf4b0b3a39",
        }),
    )