diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a15566df..fc03ef22 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -109,12 +109,6 @@ Consider all sites to be NSFW unless otherwise known. Collections, Galleries, User Profiles - - Blogger - https://www.blogger.com/ - Blogs, Labels, Posts, Search Results - - Bunkr https://bunkrr.su/ @@ -998,6 +992,22 @@ Consider all sites to be NSFW unless otherwise known. + + Blogger Instances + + + Blogspot + https://www.blogger.com/ + Blogs, Labels, Posts, Search Results + + + + MIC MIC IDOL + https://www.micmicidol.club/ + Blogs, Labels, Posts, Search Results + + + Chevereto Instances diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index d75c3498..58ae59db 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -8,30 +8,22 @@ """Extractors for Blogger blogs""" -from .common import Extractor, Message +from .common import BaseExtractor, Message from .. import text, util import re -BASE_PATTERN = ( - r"(?:blogger:(?:https?://)?([^/]+)|" - r"(?:https?://)?([\w-]+\.blogspot\.com))") - -class BloggerExtractor(Extractor): +class BloggerExtractor(BaseExtractor): """Base class for blogger extractors""" - category = "blogger" - directory_fmt = ("{category}", "{blog[name]}", + basecategory = "blogger" + directory_fmt = ("blogger", "{blog[name]}", "{post[date]:%Y-%m-%d} {post[title]}") filename_fmt = "{num:>03}.{extension}" archive_fmt = "{post[id]}_{num}" - root = "https://www.blogger.com" - - def __init__(self, match): - Extractor.__init__(self, match) - self.blog = match.group(1) or match.group(2) def _init(self): self.api = BloggerAPI(self) + self.blog = self.root.rpartition("/")[2] self.videos = self.config("videos", True) def items(self): @@ -92,6 +84,18 @@ class BloggerExtractor(Extractor): """Return additional metadata""" +BASE_PATTERN = BloggerExtractor.update({ + "blogspot": { + "root": None, + "pattern": r"[\w-]+\.blogspot\.com", + }, + "micmicidol": { + "root": "https://www.micmicidol.club", + "pattern": r"(?:www\.)?micmicidol\.club", + }, +}) + + class BloggerPostExtractor(BloggerExtractor): """Extractor for a single blog post""" subcategory = "post" @@ -100,7 +104,7 @@ class BloggerPostExtractor(BloggerExtractor): def __init__(self, match): BloggerExtractor.__init__(self, match) - self.path = match.group(3) + self.path = match.group(match.lastindex) def posts(self, blog): return (self.api.post_by_path(blog["id"], self.path),) @@ -124,7 +128,7 @@ class BloggerSearchExtractor(BloggerExtractor): def __init__(self, match): BloggerExtractor.__init__(self, match) - self.query = text.unquote(match.group(3)) + self.query = text.unquote(match.group(match.lastindex)) def posts(self, blog): return self.api.blog_search(blog["id"], self.query) @@ -141,7 +145,7 @@ class BloggerLabelExtractor(BloggerExtractor): def __init__(self, match): BloggerExtractor.__init__(self, match) - self.label = text.unquote(match.group(3)) + self.label = text.unquote(match.group(match.lastindex)) def posts(self, blog): return self.api.blog_posts(blog["id"], self.label) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 470b629d..cd063f04 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -87,6 +87,7 @@ CATEGORY_MAP = { "mangaread" : "MangaRead", "mangasee" : "MangaSee", "mastodon.social": "mastodon.social", + "micmicidol" : "MIC MIC IDOL", "myhentaigallery": "My Hentai Gallery", "myportfolio" : "Adobe Portfolio", "naverwebtoon" : "NaverWebtoon", @@ -292,6 +293,10 @@ BASE_MAP = { "vichan" : "vichan Imageboards", } +URL_MAP = { + "blogspot": "https://www.blogger.com/", +} + _OAUTH = 'OAuth' _COOKIES = 'Cookies' _APIKEY_DB = \ @@ -362,7 +367,7 @@ IGNORE_LIST = ( def domain(cls): - """Return the web-domain related to an extractor class""" + """Return the domain name associated with an extractor class""" try: url = sys.modules[cls.__module__].__doc__.split()[-1] if url.startswith("http"): @@ -429,10 +434,13 @@ def build_extractor_list(): for category, root in extr.instances: base[category].append(extr.subcategory) if category not in domains: - if not root and results: - # use domain from first matching test - test = results.category(category)[0] - root = test["#class"].from_url(test["#url"]).root + if not root: + if category in URL_MAP: + root = URL_MAP[category].rstrip("/") + elif results: + # use domain from first matching test + test = results.category(category)[0] + root = test["#class"].from_url(test["#url"]).root domains[category] = root + "/" # sort subcategory lists diff --git a/test/results/blogger.py b/test/results/blogger.py index 214d450d..aeb82f76 100644 --- a/test/results/blogger.py +++ b/test/results/blogger.py @@ -8,100 +8,30 @@ from gallery_dl.extractor import blogger __tests__ = ( -{ - "#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html", - "#category": ("", "blogger", "post"), - "#class" : blogger.BloggerPostExtractor, - "#pattern" : "https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg", - "#sha1_url": "9928429fb62f712eb4de80f53625eccecc614aae", - - "blog": { - "date" : "dt:2010-11-21 18:19:42", - "description": "", - "id" : "5623928067739466034", - "kind" : "blogger#blog", - "locale" : dict, - "name" : "Julian Bunker Photography", - "pages" : int, - "posts" : int, - "published" : "2010-11-21T10:19:42-08:00", - "updated" : str, - "url" : "http://julianbphotography.blogspot.com/", - }, - "post": { - "author" : "Julian Bunker", - "content" : str, - "date" : "dt:2010-12-26 01:08:00", - "etag" : str, - "id" : "6955139236418998998", - "kind" : "blogger#post", - "published": "2010-12-25T17:08:00-08:00", - "replies" : "0", - "title" : "Moon Rise", - "updated" : "2011-12-06T05:21:24-08:00", - "url" : r"re:.+/2010/12/moon-rise.html$", - }, - "num" : int, - "url" : str, -}, - { "#url" : "blogger:http://www.julianbunker.com/2010/12/moon-rise.html", - "#category": ("", "blogger", "post"), - "#class" : blogger.BloggerPostExtractor, -}, - -{ - "#url" : "http://cfnmscenesinmovies.blogspot.com/2011/11/cfnm-scene-jenna-fischer-in-office.html", - "#comment" : "video (#587)", - "#category": ("", "blogger", "post"), + "#category": ("blogger", "www.julianbunker.com", "post"), "#class" : blogger.BloggerPostExtractor, - "#pattern" : r"https://.+\.googlevideo\.com/videoplayback", -}, - -{ - "#url" : "https://randomthingsthroughmyletterbox.blogspot.com/2022/01/bitter-flowers-by-gunnar-staalesen-blog.html", - "#comment" : "new image domain (#2204)", - "#category": ("", "blogger", "post"), - "#class" : blogger.BloggerPostExtractor, - "#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$", - "#count" : 8, -}, - -{ - "#url" : "https://julianbphotography.blogspot.com/", - "#category": ("", "blogger", "blog"), - "#class" : blogger.BloggerBlogExtractor, - "#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg", - "#range" : "1-25", - "#count" : 25, }, { "#url" : "blogger:https://www.kefblog.com.ng/", - "#category": ("", "blogger", "blog"), + "#category": ("blogger", "www.kefblog.com.ng", "blog"), "#class" : blogger.BloggerBlogExtractor, "#range" : "1-25", "#count" : 25, }, { - "#url" : "https://julianbphotography.blogspot.com/search?q=400mm", - "#category": ("", "blogger", "search"), + "#url" : "blogger:http://www.julianbunker.com/search?q=400mm", + "#category": ("blogger", "1www.julianbunker.com", "search"), "#class" : blogger.BloggerSearchExtractor, - "#count" : "< 10", - - "query": "400mm", }, { - "#url" : "https://dmmagazine.blogspot.com/search/label/D%26D", - "#category": ("", "blogger", "label"), + "#url" : "blogger:http://www.julianbunker.com/search/label/D%26D", + "#category": ("blogger", "www.julianbunker.com", "label"), "#class" : blogger.BloggerLabelExtractor, - "#range" : "1-25", - "#count" : 25, - - "label": "D&D", }, ) diff --git a/test/results/blogspot.py b/test/results/blogspot.py new file mode 100644 index 00000000..83f4e5f7 --- /dev/null +++ b/test/results/blogspot.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import blogger + + +__tests__ = ( +{ + "#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html", + "#category": ("blogger", "blogspot", "post"), + "#class" : blogger.BloggerPostExtractor, + "#urls" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg", + + "blog": { + "date" : "dt:2010-11-21 18:19:42", + "description": "", + "id" : "5623928067739466034", + "kind" : "blogger#blog", + "locale" : dict, + "name" : "Julian Bunker Photography", + "pages" : int, + "posts" : int, + "published" : "2010-11-21T10:19:42-08:00", + "updated" : str, + "url" : "http://julianbphotography.blogspot.com/", + }, + "post": { + "author" : "Julian Bunker", + "content" : str, + "date" : "dt:2010-12-26 01:08:00", + "etag" : str, + "id" : "6955139236418998998", + "kind" : "blogger#post", + "published": "2010-12-25T17:08:00-08:00", + "replies" : "0", + "title" : "Moon Rise", + "updated" : "2011-12-06T05:21:24-08:00", + "url" : "http://julianbphotography.blogspot.com/2010/12/moon-rise.html", + }, + "extension": "jpg", + "filename" : "Icy-Moonrise---For-Web", + "num" : 1, + "num" : int, + "url" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg", +}, + +{ + "#url" : "http://cfnmscenesinmovies.blogspot.com/2011/11/cfnm-scene-jenna-fischer-in-office.html", + "#comment" : "video (#587)", + "#category": ("blogger", "blogspot", "post"), + "#class" : blogger.BloggerPostExtractor, + "#pattern" : r"https://.+\.googlevideo\.com/videoplayback", +}, + +{ + "#url" : "https://randomthingsthroughmyletterbox.blogspot.com/2022/01/bitter-flowers-by-gunnar-staalesen-blog.html", + "#comment" : "new image domain (#2204)", + "#category": ("blogger", "blogspot", "post"), + "#class" : blogger.BloggerPostExtractor, + "#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$", + "#count" : 8, +}, + +{ + "#url" : "https://julianbphotography.blogspot.com/", + "#category": ("blogger", "blogspot", "blog"), + "#class" : blogger.BloggerBlogExtractor, + "#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg", + "#range" : "1-25", + "#count" : 25, +}, + +{ + "#url" : "https://julianbphotography.blogspot.com/search?q=400mm", + "#category": ("blogger", "blogspot", "search"), + "#class" : blogger.BloggerSearchExtractor, + "#count" : "< 10", + + "query": "400mm", +}, + +{ + "#url" : "https://dmmagazine.blogspot.com/search/label/D%26D", + "#category": ("blogger", "blogspot", "label"), + "#class" : blogger.BloggerLabelExtractor, + "#range" : "1-25", + "#count" : 25, + + "label": "D&D", +}, + +) diff --git a/test/results/micmicidol.py b/test/results/micmicidol.py new file mode 100644 index 00000000..f66bbd75 --- /dev/null +++ b/test/results/micmicidol.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import blogger + + +__tests__ = ( +{ + "#url" : "https://www.micmicidol.club/2023/11/weekly-taishu-20231113-cover.html", + "#category": ("blogger", "micmicidol", "post"), + "#class" : blogger.BloggerPostExtractor, + "#urls" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhgtpSSdrol9aKP_ztcc_mp9TUUS0U_t2DYJuGX3XCs6X5CkxIb-pM98QlxbkgJFvQj-0e6RbXNBf047qyMDZLcPJsm9dTqAn2XkTVfLhWRaxxVvIYnHYu0R0d7WsAUSFs0MDe4Sotpuqp5DQnjr45T17CXKbWtq9cR3op9dDQh3yiw2a6_HInIjLRm5io/s0/000-micmicidol.jpg", + + "blog": { + "date" : "dt:2023-09-18 19:48:53", + "description": "", + "id" : "7192714164191173242", + "kind" : "blogger#blog", + "locale" : { + "country" : "TW", + "language": "zh", + "variant" : "", + }, + "name" : "MIC MIC IDOL", + "pages" : int, + "posts" : int, + "published" : "2023-09-18T12:48:53-07:00", + "updated" : str, + "url" : "http://www.micmicidol.club/" + }, + "post": { + "author" : "MIC MIC IDOL", + "content" : " ", + "date" : "dt:2023-11-18 08:01:00", + "etag" : str, + "id" : "5395888649239375388", + "kind" : "blogger#post", + "labels" : [ + "- Cover", + "Weekly Taishu", + "Weekly Taishu Cover", + ], + "published": "2023-11-18T00:01:00-08:00", + "replies" : "0", + "title" : "Weekly Taishu 週刊大衆 2023.11.13 Cover", + "updated" : "2023-11-18T03:00:42-08:00", + "url" : "http://www.micmicidol.club/2023/11/weekly-taishu-20231113-cover.html" + }, + "num" : 1, + "url" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhgtpSSdrol9aKP_ztcc_mp9TUUS0U_t2DYJuGX3XCs6X5CkxIb-pM98QlxbkgJFvQj-0e6RbXNBf047qyMDZLcPJsm9dTqAn2XkTVfLhWRaxxVvIYnHYu0R0d7WsAUSFs0MDe4Sotpuqp5DQnjr45T17CXKbWtq9cR3op9dDQh3yiw2a6_HInIjLRm5io/s0/000-micmicidol.jpg", +}, + +{ + "#url" : "https://www.micmicidol.club/", + "#category": ("blogger", "micmicidol", "blog"), + "#class" : blogger.BloggerBlogExtractor, + "#range" : "1-25", + "#count" : 25, +}, + +{ + "#url" : "https://www.micmicidol.club/search?q=cover", + "#category": ("blogger", "micmicidol", "search"), + "#class" : blogger.BloggerSearchExtractor, + "#range" : "1-25", + "#count" : 25, + + "query" : "cover", +}, + +{ + "#url" : "https://www.micmicidol.club/search/label/Weekly%20Taishu%20Cover", + "#category": ("blogger", "micmicidol", "label"), + "#class" : blogger.BloggerLabelExtractor, + "#range" : "1-25", + "#count" : 25, + + "label" : "Weekly Taishu Cover", +}, + +)