From 8e01cf0ef85845a9e60f6aab3be6b28ffdc5365a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 7 Jan 2019 16:59:26 +0100
Subject: [PATCH] [reactor] generalize extractors (#148)

- support *.reactor.cc domains
- combine joyreactor and pornreactor modules
---
 gallery_dl/extractor/__init__.py              |   3 +-
 gallery_dl/extractor/pornreactor.py           |  75 -------
 .../extractor/{joyreactor.py => reactor.py}   | 199 +++++++++++++-----
 gallery_dl/extractor/simplyhentai.py          |   4 +-
 4 files changed, 154 insertions(+), 127 deletions(-)
 delete mode 100644 gallery_dl/extractor/pornreactor.py
 rename gallery_dl/extractor/{joyreactor.py => reactor.py} (62%)

diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index eca704b9..48c4351d 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -47,7 +47,6 @@ modules = [
     "imgur",
     "instagram",
     "jaiminisbox",
-    "joyreactor",
     "khinsider",
     "kireicake",
     "kissmanga",
@@ -72,8 +71,8 @@ modules = [
     "piczel",
     "pinterest",
     "pixiv",
-    "pornreactor",
     "powermanga",
+    "reactor",
     "readcomiconline",
     "rebeccablacktech",
     "reddit",
diff --git a/gallery_dl/extractor/pornreactor.py b/gallery_dl/extractor/pornreactor.py
deleted file mode 100644
index bbf0b8e6..00000000
--- a/gallery_dl/extractor/pornreactor.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2018-2019 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for http://pornreactor.cc/"""
-
-from .joyreactor import (
-    JoyreactorTagExtractor,
-    JoyreactorSearchExtractor,
-    JoyreactorUserExtractor,
-    JoyreactorPostExtractor,
-)
-
-
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)"
-
-
-class PornreactorTagExtractor(JoyreactorTagExtractor):
-    """Extractor for tag searches on pornreactor.cc"""
-    category = "pornreactor"
-    pattern = [BASE_PATTERN + r"/tag/([^/?&#]+)"]
-    test = [
-        ("http://pornreactor.cc/tag/RiceGnat", {
-            "count": ">= 120",
-        }),
-        ("http://fapreactor.com/tag/RiceGnat", None),
-    ]
-
-
-class PornreactorSearchExtractor(JoyreactorSearchExtractor):
-    """Extractor for search results on pornreactor.cc"""
-    category = "pornreactor"
-    pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
-    test = [
-        ("http://pornreactor.cc/search?q=ecchi+hentai", {
-            "range": "1-25",
-            "count": ">= 20",
-        }),
-        ("http://fapreactor.com/search/ecchi+hentai", None),
-    ]
-
-
-class PornreactorUserExtractor(JoyreactorUserExtractor):
-    """Extractor for all posts of a user on pornreactor.cc"""
-    category = "pornreactor"
-    pattern = [BASE_PATTERN + r"/user/([^/?&#]+)"]
-    test = [
-        ("http://pornreactor.cc/user/Disillusion", {
-            "url": "7e06f87f8dcce3fc7851b6d13aa55712ab45fb04",
-            "keyword": "edfefb54ea4863e3731c508ae6caeb4140be0d31",
-        }),
-        ("http://fapreactor.com/user/Disillusion", None),
-    ]
-
-
-class PornreactorPostExtractor(JoyreactorPostExtractor):
-    """Extractor for single posts on pornreactor.cc"""
-    category = "pornreactor"
-    subcategory = "post"
-    pattern = [BASE_PATTERN + r"/post/(\d+)"]
-    test = [
-        ("http://pornreactor.cc/post/863166", {
-            "url": "9e5f7b374605cbbd413f4f4babb9d1af6f95b843",
-            "keyword": "6e9e4bd4e2d4f3f2c7936340ec71f8693129f809",
-            "content": "3e2a09f8b5e5ed7722f51c5f423ff4c9260fb23e",
-        }),
-        ("http://fapreactor.com/post/863166", {
-            "url": "83ff7c87741c05bcf1de6825e2b4739afeb87ed5",
-            "keyword": "cf8159224fde59c1dab86677514b4aedeb533d66",
-        }),
-    ]
diff --git a/gallery_dl/extractor/joyreactor.py b/gallery_dl/extractor/reactor.py
similarity index 62%
rename from gallery_dl/extractor/joyreactor.py
rename to gallery_dl/extractor/reactor.py
index 07fca456..a8dae15e 100644
--- a/gallery_dl/extractor/joyreactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -1,34 +1,40 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2019 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for http://joyreactor.cc/"""
+"""Generic extractors for *reactor sites"""
 
-from .common import Extractor, Message
+from .common import SharedConfigExtractor, Message
 from .. import text
+import urllib.parse
 import json
 
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
+BASE_PATTERN = r"(?:https?://)?([^/.]+\.reactor\.cc)"
 
 
-class JoyreactorExtractor(Extractor):
-    """Base class for joyreactor extractors"""
-    category = "joyreactor"
+class ReactorExtractor(SharedConfigExtractor):
+    """Base class for *reactor.cc extractors"""
+    basecategory = "reactor"
     directory_fmt = ["{category}"]
     filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
     archive_fmt = "{post_id}_{num}"
 
     def __init__(self, match):
-        Extractor.__init__(self)
+        SharedConfigExtractor.__init__(self)
         self.url = match.group(0)
         self.root = "http://" + match.group(1)
         self.session.headers["Referer"] = self.root
 
+        if not self.category:
+            # set category based on domain name
+            netloc = urllib.parse.urlsplit(self.root).netloc
+            self.category = netloc.rpartition(".")[0]
+
     def items(self):
         data = self.metadata()
         yield Message.Version, 1
@@ -70,6 +76,7 @@ class JoyreactorExtractor(Extractor):
             data = json.loads(script)
         except ValueError:
             try:
+                # remove control characters and escape backslashes
                 mapping = dict.fromkeys(range(32))
                 script = script.translate(mapping).replace("\\", "\\\\")
                 data = json.loads(script)
@@ -115,36 +122,92 @@ class JoyreactorExtractor(Extractor):
             }
 
 
-class JoyreactorTagExtractor(JoyreactorExtractor):
-    """Extractor for tag searches on joyreactor.cc"""
+class ReactorTagExtractor(ReactorExtractor):
+    """Extractor for tag searches on *reactor.cc sites"""
     subcategory = "tag"
     directory_fmt = ["{category}", "{search_tags}"]
     archive_fmt = "{search_tags}_{post_id}_{num}"
     pattern = [BASE_PATTERN + r"/tag/([^/?&#]+)"]
-    test = [
-        ("http://joyreactor.com/tag/Cirno", {
-            "url": "a81382a3146da50b647c475f87427a6ca1d737df",
-            "keyword": "dcd3b101cae0a93fbb91281235de1410faf88455",
-        }),
-        ("http://joyreactor.cc/tag/Advent+Cirno", {
-            "count": ">= 17",
-        }),
-    ]
+    test = [("http://anime.reactor.cc/tag/Anime+Art", None)]
 
     def __init__(self, match):
-        JoyreactorExtractor.__init__(self, match)
+        ReactorExtractor.__init__(self, match)
         self.tag = match.group(2)
 
     def metadata(self):
         return {"search_tags": text.unescape(self.tag).replace("+", " ")}
 
 
-class JoyreactorSearchExtractor(JoyreactorTagExtractor):
-    """Extractor for search results on joyreactor.cc"""
+class ReactorSearchExtractor(ReactorTagExtractor):
+    """Extractor for search results on *reactor.cc sites"""
     subcategory = "search"
     directory_fmt = ["{category}", "search", "{search_tags}"]
     archive_fmt = "s_{search_tags}_{post_id}_{num}"
     pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
+    test = [("http://anime.reactor.cc/search?q=Art", None)]
+
+
+class ReactorUserExtractor(ReactorExtractor):
+    """Extractor for all posts of a user on *reactor.cc sites"""
+    subcategory = "user"
+    directory_fmt = ["{category}", "user", "{user}"]
+    pattern = [BASE_PATTERN + r"/user/([^/?&#]+)"]
+    test = [("http://anime.reactor.cc/user/Shuster", None)]
+
+    def __init__(self, match):
+        ReactorExtractor.__init__(self, match)
+        self.user = match.group(2)
+
+    def metadata(self):
+        return {"user": text.unescape(self.user).replace("+", " ")}
+
+
+class ReactorPostExtractor(ReactorExtractor):
+    """Extractor for single posts on *reactor.cc sites"""
+    subcategory = "post"
+    pattern = [BASE_PATTERN + r"/post/(\d+)"]
+    test = [("http://anime.reactor.cc/post/3576250", None)]
+
+    def __init__(self, match):
+        ReactorExtractor.__init__(self, match)
+        self.post_id = match.group(2)
+
+    def items(self):
+        yield Message.Version, 1
+        post = self.request(self.url).text
+        pos = post.find('class="uhead">')
+        for image in self._parse_post(post[pos:]):
+            if image["num"] == 1:
+                yield Message.Directory, image
+            url = image["file_url"]
+            yield Message.Url, url, text.nameext_from_url(url, image)
+
+
+# --------------------------------------------------------------------
+# JoyReactor
+
+JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
+
+
+class JoyreactorTagExtractor(ReactorTagExtractor):
+    """Extractor for tag searches on joyreactor.cc"""
+    category = "joyreactor"
+    pattern = [JR_BASE_PATTERN + r"/tag/([^/?&#]+)"]
+    test = [
+        ("http://joyreactor.com/tag/Cirno", {
+            "url": "a81382a3146da50b647c475f87427a6ca1d737df",
+            "keyword": "dcd3b101cae0a93fbb91281235de1410faf88455",
+        }),
+        ("http://joyreactor.cc/tag/Advent+Cirno", {
+            "count": ">= 17",
+        }),
+    ]
+
+
+class JoyreactorSearchExtractor(ReactorSearchExtractor):
+    """Extractor for search results on joyreactor.cc"""
+    category = "joyreactor"
+    pattern = [JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
     test = [
         ("http://joyreactor.com/search?q=Cirno+Gifs", {
             "count": 0,  # no search results on joyreactor.com
@@ -156,11 +219,10 @@ class JoyreactorSearchExtractor(JoyreactorTagExtractor):
     ]
 
 
-class JoyreactorUserExtractor(JoyreactorExtractor):
+class JoyreactorUserExtractor(ReactorUserExtractor):
     """Extractor for all posts of a user on joyreactor.cc"""
-    subcategory = "user"
-    directory_fmt = ["{category}", "user", "{user}"]
-    pattern = [BASE_PATTERN + r"/user/([^/?&#]+)"]
+    category = "joyreactor"
+    pattern = [JR_BASE_PATTERN + r"/user/([^/?&#]+)"]
     test = [
         ("http://joyreactor.com/user/Tacoman123", {
             "url": "0444158f17c22f08515ad4e7abf69ad2f3a63b35",
@@ -169,18 +231,11 @@ class JoyreactorUserExtractor(JoyreactorExtractor):
         ("http://joyreactor.cc/user/hemantic", None),
     ]
 
-    def __init__(self, match):
-        JoyreactorExtractor.__init__(self, match)
-        self.user = match.group(2)
-
-    def metadata(self):
-        return {"user": text.unescape(self.user).replace("+", " ")}
-
 
-class JoyreactorPostExtractor(JoyreactorExtractor):
+class JoyreactorPostExtractor(ReactorPostExtractor):
     """Extractor for single posts on joyreactor.cc"""
-    subcategory = "post"
-    pattern = [BASE_PATTERN + r"/post/(\d+)"]
+    category = "joyreactor"
+    pattern = [JR_BASE_PATTERN + r"/post/(\d+)"]
     test = [
         ("http://joyreactor.com/post/3721876", {  # single image
             "url": "904779f6571436f3d5adbce30c2c272f6401e14a",
@@ -204,16 +259,64 @@ class JoyreactorPostExtractor(JoyreactorExtractor):
         }),
     ]
 
-    def __init__(self, match):
-        JoyreactorExtractor.__init__(self, match)
-        self.post_id = match.group(2)
 
-    def items(self):
-        yield Message.Version, 1
-        post = self.request(self.url).text
-        pos = post.find('class="uhead">')
-        for image in self._parse_post(post[pos:]):
-            if image["num"] == 1:
-                yield Message.Directory, image
-            url = image["file_url"]
-            yield Message.Url, url, text.nameext_from_url(url, image)
+# --------------------------------------------------------------------
+# PornReactor
+
+PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)"
+
+
+class PornreactorTagExtractor(ReactorTagExtractor):
+    """Extractor for tag searches on pornreactor.cc"""
+    category = "pornreactor"
+    pattern = [PR_BASE_PATTERN + r"/tag/([^/?&#]+)"]
+    test = [
+        ("http://pornreactor.cc/tag/RiceGnat", {
+            "count": ">= 120",
+        }),
+        ("http://fapreactor.com/tag/RiceGnat", None),
+    ]
+
+
+class PornreactorSearchExtractor(ReactorSearchExtractor):
+    """Extractor for search results on pornreactor.cc"""
+    category = "pornreactor"
+    pattern = [PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
+    test = [
+        ("http://pornreactor.cc/search?q=ecchi+hentai", {
+            "range": "1-25",
+            "count": ">= 20",
+        }),
+        ("http://fapreactor.com/search/ecchi+hentai", None),
+    ]
+
+
+class PornreactorUserExtractor(ReactorUserExtractor):
+    """Extractor for all posts of a user on pornreactor.cc"""
+    category = "pornreactor"
+    pattern = [PR_BASE_PATTERN + r"/user/([^/?&#]+)"]
+    test = [
+        ("http://pornreactor.cc/user/Disillusion", {
+            "url": "7e06f87f8dcce3fc7851b6d13aa55712ab45fb04",
+            "keyword": "edfefb54ea4863e3731c508ae6caeb4140be0d31",
+        }),
+        ("http://fapreactor.com/user/Disillusion", None),
+    ]
+
+
+class PornreactorPostExtractor(ReactorPostExtractor):
+    """Extractor for single posts on pornreactor.cc"""
+    category = "pornreactor"
+    subcategory = "post"
+    pattern = [PR_BASE_PATTERN + r"/post/(\d+)"]
+    test = [
+        ("http://pornreactor.cc/post/863166", {
+            "url": "9e5f7b374605cbbd413f4f4babb9d1af6f95b843",
+            "keyword": "6e9e4bd4e2d4f3f2c7936340ec71f8693129f809",
+            "content": "3e2a09f8b5e5ed7722f51c5f423ff4c9260fb23e",
+        }),
+        ("http://fapreactor.com/post/863166", {
+            "url": "83ff7c87741c05bcf1de6825e2b4739afeb87ed5",
+            "keyword": "cf8159224fde59c1dab86677514b4aedeb533d66",
+        }),
+    ]
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index 2e1d8cb4..586d75e9 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -25,8 +25,8 @@ class SimplyhentaiGalleryExtractor(ChapterExtractor):
     test = [
         (("https://original-work.simply-hentai.com"
           "/amazon-no-hiyaku-amazon-elixir"), {
-            "url": "35f3843d0ea83e6a618df7afaebd2b03f3628db9",
-            "keyword": "1e22ccbe66412eab844f135ad9cd3424b8b064e8",
+            "url": "258289249990502c3138719cb89e995a60861e49",
+            "keyword": "3873c6078ce116e798fac8b7a955e3b3a4f526a6",
         }),
         ("https://www.simply-hentai.com/notfound", {
             "exception": exception.GalleryDLException,