update extractor class hierarchies

- let the GalleryExtractor class inherit directly from Extractor - make ChapterExtractor a subclass of GalleryExtractor - change enumeration field names of GalleryExtractors to 'num'
5 years ago · 1693d97bd3
parent 7ebd984e8d
commit 1693d97bd3
13 changed files with 44 additions and 41 deletions
--- a/gallery_dl/extractor/adultempire.py
+++ b/gallery_dl/extractor/adultempire.py
@ -21,12 +21,12 @@ class AdultempireGalleryExtractor(GalleryExtractor):
    test = (
        ("https://www.adultempire.com/5998/gallery.html", {
            "range": "1",
-            "keyword": "25c8171f5623678491a0d7bdf38a7a6ebfa4a361",
+            "keyword": "5b3266e69801db0d78c22181da23bc102886e027",
            "content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
        }),
        ("https://www.adultdvdempire.com/5683/gallery.html", {
            "url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
-            "keyword": "9634eb16cc6dbf347eb9dcdd9b2a499dfd04d167",
+            "keyword": "8d448d79c4ac5f5b10a3019d5b5129ddb43655e5",
        }),
    )

@ -55,4 +55,4 @@ class AdultempireGalleryExtractor(GalleryExtractor):
            if len(urls) < 24:
                return
            params["page"] += 1
-            page = self.request(self.chapter_url, params=params).text
+            page = self.request(self.gallery_url, params=params).text
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@ -249,24 +249,21 @@ class Extractor():
            yield test


-class ChapterExtractor(Extractor):
+class GalleryExtractor(Extractor):

-    subcategory = "chapter"
-    directory_fmt = (
-        "{category}", "{manga}",
-        "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}")
-    filename_fmt = (
-        "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
-    archive_fmt = (
-        "{manga}_{chapter}{chapter_minor}_{page}")
+    subcategory = "gallery"
+    filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
+    directory_fmt = ("{category}", "{gallery_id} {title}")
+    archive_fmt = "{gallery_id}_{num}"
+    enum = "num"

    def __init__(self, match, url=None):
        Extractor.__init__(self, match)
-        self.chapter_url = url or self.root + match.group(1)
+        self.gallery_url = self.root + match.group(1) if url is None else url

    def items(self):
        self.login()
-        page = self.request(self.chapter_url).text
+        page = self.request(self.gallery_url).text
        data = self.metadata(page)
        imgs = self.images(page)

@ -284,7 +281,7 @@ class ChapterExtractor(Extractor):

        yield Message.Version, 1
        yield Message.Directory, data
-        for data["page"], (url, imgdata) in images:
+        for data[self.enum], (url, imgdata) in images:
            if imgdata:
                data.update(imgdata)
            yield Message.Url, url, text.nameext_from_url(url, data)
@ -299,6 +296,19 @@ class ChapterExtractor(Extractor):
        """Return a list of all (image-url, metadata)-tuples"""


+class ChapterExtractor(GalleryExtractor):
+
+    subcategory = "chapter"
+    directory_fmt = (
+        "{category}", "{manga}",
+        "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}")
+    filename_fmt = (
+        "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
+    archive_fmt = (
+        "{manga}_{chapter}{chapter_minor}_{page}")
+    enum = "page"
+
+
 class MangaExtractor(Extractor):

    subcategory = "manga"
@ -333,14 +343,6 @@ class MangaExtractor(Extractor):
        """Return a list of all (chapter-url, metadata)-tuples"""


-class GalleryExtractor(ChapterExtractor):
-
-    subcategory = "gallery"
-    filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}"
-    directory_fmt = ("{category}", "{gallery_id} {title}")
-    archive_fmt = "{gallery_id}_{page}"
-
-
 class AsynchronousMixin():
    """Run info extraction in a separate thread"""

--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@ -44,14 +44,13 @@ class FoolslideBase(SharedConfigMixin):

 class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
    """Base class for chapter extractors for FoOlSlide based sites"""
-    directory_fmt = (
-        "{category}", "{manga}", "{chapter_string}")
+    directory_fmt = ("{category}", "{manga}", "{chapter_string}")
    archive_fmt = "{id}"
    pattern_fmt = r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
    decode = "default"

    def items(self):
-        page = self.request(self.chapter_url).text
+        page = self.request(self.gallery_url).text
        data = self.metadata(page)
        imgs = self.images(page)

@ -77,7 +76,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
    def metadata(self, page):
        extr = text.extract_from(page)
        extr('<h1 class="tbtitle dnone">', '')
-        return self.parse_chapter_url(self.chapter_url, {
+        return self.parse_chapter_url(self.gallery_url, {
            "manga"         : text.unescape(extr('title="', '"')).strip(),
            "chapter_string": text.unescape(extr('title="', '"')),
        })
--- a/gallery_dl/extractor/fuskator.py
+++ b/gallery_dl/extractor/fuskator.py
@ -42,7 +42,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):

    def metadata(self, page):
        headers = {
-            "Referer"         : self.chapter_url,
+            "Referer"         : self.gallery_url,
            "X-Requested-With": "XMLHttpRequest",
        }
        auth = self.request(
--- a/gallery_dl/extractor/hentaicafe.py
+++ b/gallery_dl/extractor/hentaicafe.py
@ -31,10 +31,10 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
        info = text.unescape(text.extract(page, '<title>', '</title>')[0])
        manga, _, chapter_string = info.partition(" :: ")

-        data = self._data(self.chapter_url.split("/")[5])
+        data = self._data(self.gallery_url.split("/")[5])
        data["manga"] = manga
        data["chapter_string"] = chapter_string.rstrip(" :")
-        return self.parse_chapter_url(self.chapter_url, data)
+        return self.parse_chapter_url(self.gallery_url, data)

    @memcache(keyarg=1)
    def _data(self, manga):
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@ -24,7 +24,7 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
    test = ("https://hentaifox.com/gallery/56622/", {
        "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
        "count": 24,
-        "keyword": "38f8517605feb6854d48833297da6b05c6541b69",
+        "keyword": "903ebe227d85e484460382fc6cbab42be7a244d5",
    })

    def __init__(self, match):
--- a/gallery_dl/extractor/hentainexus.py
+++ b/gallery_dl/extractor/hentainexus.py
@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor):
    test = (
        ("https://hentainexus.com/view/5688", {
            "url": "746d0043e20030f1171aae5ea113176607302517",
-            "keyword": "b05986369fbaf29cfa08b118960d92c49e59524b",
+            "keyword": "9512cf5f258130e5f75de9954d7a13217c2405e7",
        }),
        ("https://hentainexus.com/read/5688"),
    )
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@ -21,7 +21,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
    test = (
        ("https://hitomi.la/galleries/867789.html", {
            "pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg",
-            "keyword": "d097a8db8e810045131b4510c41714004f9eff3a",
+            "keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2",
            "count": 16,
        }),
        ("https://hitomi.la/galleries/1401410.html", {
@ -89,7 +89,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
        base = "https://" + subdomain + ".hitomi.la/galleries/"

        # set Referer header before image downloads (#239)
-        self.session.headers["Referer"] = self.chapter_url
+        self.session.headers["Referer"] = self.gallery_url

        # handle Game CG galleries with scenes (#321)
        scenes = text.extract(page, "var scene_indexes = [", "]")[0]
--- a/gallery_dl/extractor/nsfwalbum.py
+++ b/gallery_dl/extractor/nsfwalbum.py
@ -17,14 +17,14 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
    category = "nsfwalbum"
    subcategory = "album"
    root = "https://nsfwalbum.com"
-    filename_fmt = "{album_id}_{page:>03}_{id}.{extension}"
+    filename_fmt = "{album_id}_{num:>03}_{id}.{extension}"
    directory_fmt = ("{category}", "{album_id} {title}")
    archive_fmt = "{id}"
    pattern = r"(?:https?://)?(?:www\.)?nsfwalbum\.com(/album/(\d+))"
    test = ("https://nsfwalbum.com/album/401611", {
        "range": "1-5",
        "url": "b0481fc7fad5982da397b6359fbed8421b8ba284",
-        "keyword": "fc1ad4ebcd6d4cf32da15203120112b8bcf12eec",
+        "keyword": "e98f9b0d473c00000831618d0235863b1dd78294",
    })

    def __init__(self, match):
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@ -23,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
        (("https://original-work.simply-hentai.com"
          "/amazon-no-hiyaku-amazon-elixir"), {
            "url": "21613585ae5ec2f69ea579e9713f536fceab5bd5",
-            "keyword": "bf75f9ff0fb60756b1b9b92403526a72d9178d23",
+            "keyword": "9e87a0973553b2922ddee37958b8f5d87910af72",
        }),
        ("https://www.simply-hentai.com/notfound", {
            "exception": exception.GalleryDLException,
@ -43,7 +43,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
        extr = text.extract_from(page)
        split = text.split_html

-        self.chapter_url = extr('<link rel="canonical" href="', '"')
+        self.gallery_url = extr('<link rel="canonical" href="', '"')
        title = extr('<meta property="og:title" content="', '"')
        if not title:
            raise exception.NotFoundError("gallery")
@ -63,7 +63,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
        return data

    def images(self, _):
-        url = self.chapter_url + "/all-pages"
+        url = self.gallery_url + "/all-pages"
        headers = {"Accept": "application/json"}
        images = self.request(url, headers=headers).json()
        return [
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@ -109,7 +109,7 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):

    def images(self, page):
        url = "{}/Read/Index/{}?page=1".format(self.root, self.gallery_id)
-        headers = {"Referer": self.chapter_url}
+        headers = {"Referer": self.gallery_url}
        response = self.request(url, headers=headers, fatal=False)

        if "/Auth/" in response.url:
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-__version__ = "1.10.7-dev"
+__version__ = "1.11.0-dev"
--- a/test/test_results.py
+++ b/test/test_results.py
@ -27,6 +27,8 @@ TRAVIS_SKIP = {
 # temporary issues, etc.
 BROKEN = {
    "8chan",
+    "hentaifoundry",
+    "luscious",
    "mangapark",
 }