[tumblrgallery] simplify

pull/1374/head
Mike Fährmann 4 years ago
parent 665499924d
commit 38e66940c1
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -97,6 +97,7 @@ Mangoxo https://www.mangoxo.com/ Albums, Channels
mastodon.social https://mastodon.social/ Images from Statuses, User Profiles `OAuth <https://github.com/mikf/gallery-dl#oauth>`__
My Hentai Gallery https://myhentaigallery.com/ Galleries
Naver https://blog.naver.com/ Blogs, Posts
NaverWebtoon https://comic.naver.com/ Comics, Episodes
Newgrounds https://www.newgrounds.com/ |newgrounds-C| Supported
Ngomik http://ngomik.in/ Chapters
nhentai https://nhentai.net/ Galleries, Search Results
@ -146,6 +147,7 @@ The /b/ Archive https://thebarchive.com/ Boards, Search Results,
Thecollection https://the-collection.booru.org/ Posts, Tag Searches
Tsumino https://www.tsumino.com/ Galleries, Search Results Supported
Tumblr https://www.tumblr.com/ Likes, Posts, Tag Searches, User Profiles `OAuth <https://github.com/mikf/gallery-dl#oauth>`__
TumblrGallery https://tumblrgallery.xyz/ Posts, Search Results, Tumblrblogs
Turboimagehost https://www.turboimagehost.com/ individual Images
Twitter https://twitter.com/ |twitter-C| Supported
Unsplash https://unsplash.com/ |unsplash-C|

@ -1,50 +1,36 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://tumblrgallery.xyz/"""
"""Extractors for https://tumblrgallery.xyz/"""
from .common import GalleryExtractor
from .. import text
BASE_PATTERN = r"(?:https?://)?tumblrgallery\.xyz"
BASE_PATTERN = r"(?:https?://)tumblrgallery\.xyz"
class TumblrgalleryGalleryExtractor(GalleryExtractor):
class TumblrgalleryExtractor(GalleryExtractor):
"""Base class for tumblrgallery extractors"""
category = "tumblrgallery"
cookiedomain = None
def __init__(self, match):
self.root = "https://tumblrgallery.xyz"
GalleryExtractor.__init__(self, match)
filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}"
directory_fmt = ("{category}", "{gallery_id} {title}")
root = "https://tumblrgallery.xyz"
class TumblrgalleryTumblrblogExtractor(TumblrgalleryGalleryExtractor):
class TumblrgalleryTumblrblogExtractor(TumblrgalleryExtractor):
"""Extractor for Tumblrblog on tumblrgallery.xyz"""
subcategory = "tumblrblog"
pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+).html)"
test = (
"https://tumblrgallery.xyz/tumblrblog/gallery/103975.html", {
"pattern": r"/tumblrblog/gallery/103975.html"
r"103975",
}
)
filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}"
directory_fmt = ("{category}", "{gallery_id} {title}")
pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+)\.html)"
test = ("https://tumblrgallery.xyz/tumblrblog/gallery/103975.html",)
def __init__(self, match):
TumblrgalleryGalleryExtractor.__init__(self, match)
TumblrgalleryExtractor.__init__(self, match)
self.gallery_id = text.parse_int(match.group(2))
def metadata(self, page):
"""Collect metadata for extractor-job"""
return {
"title" : text.unescape(text.extract(page, "<h1>", "</h1>"))[0],
"gallery_id": self.gallery_id,
@ -76,26 +62,17 @@ class TumblrgalleryTumblrblogExtractor(TumblrgalleryGalleryExtractor):
}
class TumblrgalleryPostExtractor(TumblrgalleryGalleryExtractor):
class TumblrgalleryPostExtractor(TumblrgalleryExtractor):
"""Extractor for Posts on tumblrgallery.xyz"""
subcategory = "post"
pattern = BASE_PATTERN + r"(/post/(\d+).html)"
test = (
"https://tumblrgallery.xyz/post/405674.html", {
"pattern": r"/post/405674.html"
r"405674",
}
)
filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}"
directory_fmt = ("{category}", "{gallery_id} {title}")
pattern = BASE_PATTERN + r"(/post/(\d+)\.html)"
test = ("https://tumblrgallery.xyz/post/405674.html",)
def __init__(self, match):
TumblrgalleryGalleryExtractor.__init__(self, match)
TumblrgalleryExtractor.__init__(self, match)
self.gallery_id = text.parse_int(match.group(2))
def metadata(self, page):
"""Collect metadata for extractor-job"""
return {
"title" : text.remove_html(
text.unescape(text.extract(page, "<title>", "</title>")[0])
@ -117,25 +94,19 @@ class TumblrgalleryPostExtractor(TumblrgalleryGalleryExtractor):
}
class TumblrgallerySearchExtractor(TumblrgalleryGalleryExtractor):
class TumblrgallerySearchExtractor(TumblrgalleryExtractor):
"""Extractor for Search result on tumblrgallery.xyz"""
subcategory = "search"
pattern = BASE_PATTERN + r"(/s\.php\?q=(.*))"
test = (
"https://tumblrgallery.xyz/s.php?q=everyday-life", {
"pattern": r"everyday-life",
}
)
filename_fmt = "{category}_{num:>03}_{gallery_id}_{id}_{title}.{extension}"
directory_fmt = ("{category}", "{search_term}")
pattern = BASE_PATTERN + r"(/s\.php\?q=([^&#]+))"
test = ("https://tumblrgallery.xyz/s.php?q=everyday-life",)
def __init__(self, match):
TumblrgalleryExtractor.__init__(self, match)
self.search_term = match.group(2)
TumblrgalleryGalleryExtractor.__init__(self, match)
def metadata(self, page):
"""Collect metadata for extractor-job"""
return {
"search_term": self.search_term,
}

Loading…
Cancel
Save