[tumblrgallery] simplify

4 years ago · 38e66940c1
parent 665499924d
commit 38e66940c1
2 changed files with 20 additions and 47 deletions
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@ -97,6 +97,7 @@ Mangoxo              https://www.mangoxo.com/            Albums, Channels
 mastodon.social      https://mastodon.social/            Images from Statuses, User Profiles                `OAuth <https://github.com/mikf/gallery-dl#oauth>`__
 My Hentai Gallery    https://myhentaigallery.com/        Galleries
 Naver                https://blog.naver.com/             Blogs, Posts
+NaverWebtoon         https://comic.naver.com/            Comics, Episodes
 Newgrounds           https://www.newgrounds.com/         |newgrounds-C|                                     Supported
 Ngomik               http://ngomik.in/                   Chapters
 nhentai              https://nhentai.net/                Galleries, Search Results
@ -146,6 +147,7 @@ The /b/ Archive      https://thebarchive.com/            Boards, Search Results,
 Thecollection        https://the-collection.booru.org/   Posts, Tag Searches
 Tsumino              https://www.tsumino.com/            Galleries, Search Results                          Supported
 Tumblr               https://www.tumblr.com/             Likes, Posts, Tag Searches, User Profiles          `OAuth <https://github.com/mikf/gallery-dl#oauth>`__
+TumblrGallery        https://tumblrgallery.xyz/          Posts, Search Results, Tumblrblogs
 Turboimagehost       https://www.turboimagehost.com/     individual Images
 Twitter              https://twitter.com/                |twitter-C|                                        Supported
 Unsplash             https://unsplash.com/               |unsplash-C|
--- a/gallery_dl/extractor/tumblrgallery.py
+++ b/gallery_dl/extractor/tumblrgallery.py
@ -1,50 +1,36 @@
 # -*- coding: utf-8 -*-

-# Copyright 2021 Mike Fährmann
-#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extract images from https://tumblrgallery.xyz/"""
+"""Extractors for https://tumblrgallery.xyz/"""

 from .common import GalleryExtractor
 from .. import text

+BASE_PATTERN = r"(?:https?://)?tumblrgallery\.xyz"

-BASE_PATTERN = r"(?:https?://)tumblrgallery\.xyz"

-
-class TumblrgalleryGalleryExtractor(GalleryExtractor):
+class TumblrgalleryExtractor(GalleryExtractor):
    """Base class for tumblrgallery extractors"""
    category = "tumblrgallery"
-    cookiedomain = None
-
-    def __init__(self, match):
-        self.root = "https://tumblrgallery.xyz"
-        GalleryExtractor.__init__(self, match)
+    filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}"
+    directory_fmt = ("{category}", "{gallery_id} {title}")
+    root = "https://tumblrgallery.xyz"


-class TumblrgalleryTumblrblogExtractor(TumblrgalleryGalleryExtractor):
+class TumblrgalleryTumblrblogExtractor(TumblrgalleryExtractor):
    """Extractor for Tumblrblog on tumblrgallery.xyz"""
    subcategory = "tumblrblog"
-    pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+).html)"
-    test = (
-        "https://tumblrgallery.xyz/tumblrblog/gallery/103975.html", {
-            "pattern": r"/tumblrblog/gallery/103975.html"
-                       r"103975",
-        }
-    )
-
-    filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}"
-    directory_fmt = ("{category}", "{gallery_id} {title}")
+    pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+)\.html)"
+    test = ("https://tumblrgallery.xyz/tumblrblog/gallery/103975.html",)

    def __init__(self, match):
-        TumblrgalleryGalleryExtractor.__init__(self, match)
+        TumblrgalleryExtractor.__init__(self, match)
        self.gallery_id = text.parse_int(match.group(2))

    def metadata(self, page):
-        """Collect metadata for extractor-job"""
        return {
            "title" : text.unescape(text.extract(page, "<h1>", "</h1>"))[0],
            "gallery_id": self.gallery_id,
@ -76,26 +62,17 @@ class TumblrgalleryTumblrblogExtractor(TumblrgalleryGalleryExtractor):
                }


-class TumblrgalleryPostExtractor(TumblrgalleryGalleryExtractor):
+class TumblrgalleryPostExtractor(TumblrgalleryExtractor):
    """Extractor for Posts on tumblrgallery.xyz"""
    subcategory = "post"
-    pattern = BASE_PATTERN + r"(/post/(\d+).html)"
-    test = (
-        "https://tumblrgallery.xyz/post/405674.html", {
-            "pattern": r"/post/405674.html"
-                       r"405674",
-        }
-    )
-
-    filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}"
-    directory_fmt = ("{category}", "{gallery_id} {title}")
+    pattern = BASE_PATTERN + r"(/post/(\d+)\.html)"
+    test = ("https://tumblrgallery.xyz/post/405674.html",)

    def __init__(self, match):
-        TumblrgalleryGalleryExtractor.__init__(self, match)
+        TumblrgalleryExtractor.__init__(self, match)
        self.gallery_id = text.parse_int(match.group(2))

    def metadata(self, page):
-        """Collect metadata for extractor-job"""
        return {
            "title" : text.remove_html(
                text.unescape(text.extract(page, "<title>", "</title>")[0])
@ -117,25 +94,19 @@ class TumblrgalleryPostExtractor(TumblrgalleryGalleryExtractor):
            }


-class TumblrgallerySearchExtractor(TumblrgalleryGalleryExtractor):
+class TumblrgallerySearchExtractor(TumblrgalleryExtractor):
    """Extractor for Search result on tumblrgallery.xyz"""
    subcategory = "search"
-    pattern = BASE_PATTERN + r"(/s\.php\?q=(.*))"
-    test = (
-        "https://tumblrgallery.xyz/s.php?q=everyday-life", {
-            "pattern": r"everyday-life",
-        }
-    )
-
    filename_fmt = "{category}_{num:>03}_{gallery_id}_{id}_{title}.{extension}"
    directory_fmt = ("{category}", "{search_term}")
+    pattern = BASE_PATTERN + r"(/s\.php\?q=([^&#]+))"
+    test = ("https://tumblrgallery.xyz/s.php?q=everyday-life",)

    def __init__(self, match):
+        TumblrgalleryExtractor.__init__(self, match)
        self.search_term = match.group(2)
-        TumblrgalleryGalleryExtractor.__init__(self, match)

    def metadata(self, page):
-        """Collect metadata for extractor-job"""
        return {
            "search_term": self.search_term,
        }