[kissmanga][readcomiconline] add 'captcha' option (#279)

to configure how to handle CAPTCHA page redirects:
- either interactively wait for the user to solve the CAPTCHA
- or raise StopExtraction like before
pull/359/head
Mike Fährmann 5 years ago
parent e30ada162d
commit 4465a3ea68
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -593,6 +593,18 @@ Description Controls whether to choose the GIF or MP4 version of an animation.
=========== ===== =========== =====
extractor.kissmanga.captcha
---------------------------
=========== =====
Type ``string``
Default ``"stop"``
Description Controls how to handle redirects to CAPTCHA pages.
* ``"stop``: Stop the current extractor run.
* ``"wait``: Ask the user to solve the CAPTCHA and wait.
=========== =====
extractor.oauth.browser extractor.oauth.browser
----------------------- -----------------------
=========== ===== =========== =====
@ -646,6 +658,18 @@ Description Minimum and maximum wait time in seconds between HTTP requests
=========== ===== =========== =====
extractor.readcomiconline.captcha
---------------------------------
=========== =====
Type ``string``
Default ``"stop"``
Description Controls how to handle redirects to CAPTCHA pages.
* ``"stop``: Stop the current extractor run.
* ``"wait``: Ask the user to solve the CAPTCHA and wait.
=========== =====
extractor.recursive.blacklist extractor.recursive.blacklist
----------------------------- -----------------------------
=========== ===== =========== =====

@ -62,6 +62,10 @@
{ {
"mp4": true "mp4": true
}, },
"kissmanga":
{
"captcha": "stop"
},
"nijie": "nijie":
{ {
"username": null, "username": null,
@ -82,6 +86,10 @@
"wait-min": 3.0, "wait-min": 3.0,
"wait-max": 6.0 "wait-max": 6.0
}, },
"readcomiconline":
{
"captcha": "stop"
},
"recursive": "recursive":
{ {
"blacklist": ["directlink", "oauth", "recursive", "test"] "blacklist": ["directlink", "oauth", "recursive", "test"]

@ -8,7 +8,7 @@
"""Extract manga-chapters and entire manga from https://kissmanga.com/""" """Extract manga-chapters and entire manga from https://kissmanga.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor, Extractor
from .. import text, aes, exception from .. import text, aes, exception
from ..cache import cache from ..cache import cache
import hashlib import hashlib
@ -16,21 +16,35 @@ import ast
import re import re
class KissmangaBase(): class RedirectMixin():
"""Detect and handle redirects to CAPTCHA pages"""
def request(self, url):
while True:
response = Extractor.request(self, url)
if not response.history or "/AreYouHuman" not in response.url:
return response
if self.config("captcha", "stop") == "wait":
self.log.warning(
"Redirect to \n%s\nVisit this URL in your browser, solve "
"the CAPTCHA, and press ENTER to continue", response.url)
try:
input()
except (EOFError, OSError):
pass
else:
self.log.error(
"Redirect to \n%s\nVisit this URL in your browser and "
"solve the CAPTCHA to continue", response.url)
raise exception.StopExtraction()
class KissmangaBase(RedirectMixin):
"""Base class for kissmanga extractors""" """Base class for kissmanga extractors"""
category = "kissmanga" category = "kissmanga"
archive_fmt = "{chapter_id}_{page}" archive_fmt = "{chapter_id}_{page}"
root = "https://kissmanga.com" root = "https://kissmanga.com"
def request(self, url):
response = super().request(url)
if response.history and "/AreYouHuman" in response.url:
self.log.error("Redirect to \n%s\n"
"Visit this URL in your browser and solve "
"the CAPTCHA to continue.", response.url)
raise exception.StopExtraction()
return response
@staticmethod @staticmethod
def parse_chapter_string(data): def parse_chapter_string(data):
"""Parse 'chapter_string' value contained in 'data'""" """Parse 'chapter_string' value contained in 'data'"""

@ -9,11 +9,12 @@
"""Extract comic-issues and entire comics from https://readcomiconline.to/""" """Extract comic-issues and entire comics from https://readcomiconline.to/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, exception from .kissmanga import RedirectMixin
from .. import text
import re import re
class ReadcomiconlineBase(): class ReadcomiconlineBase(RedirectMixin):
"""Base class for readcomiconline extractors""" """Base class for readcomiconline extractors"""
category = "readcomiconline" category = "readcomiconline"
directory_fmt = ("{category}", "{comic}", "{issue:>03}") directory_fmt = ("{category}", "{comic}", "{issue:>03}")
@ -21,15 +22,6 @@ class ReadcomiconlineBase():
archive_fmt = "{issue_id}_{page}" archive_fmt = "{issue_id}_{page}"
root = "https://readcomiconline.to" root = "https://readcomiconline.to"
def request(self, url):
response = super().request(url)
if response.history and "/AreYouHuman" in response.url:
self.log.error("Redirect to \n%s\n"
"Visit this URL in your browser and solve "
"the CAPTCHA to continue.", response.url)
raise exception.StopExtraction()
return response
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to""" """Extractor for comic-issues from readcomiconline.to"""

Loading…
Cancel
Save