diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index a15566df..fc03ef22 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -109,12 +109,6 @@ Consider all sites to be NSFW unless otherwise known.
Collections, Galleries, User Profiles |
|
-
- Blogger |
- https://www.blogger.com/ |
- Blogs, Labels, Posts, Search Results |
- |
-
Bunkr |
https://bunkrr.su/ |
@@ -998,6 +992,22 @@ Consider all sites to be NSFW unless otherwise known.
|
+
+ Blogger Instances |
+
+
+ Blogspot |
+ https://www.blogger.com/ |
+ Blogs, Labels, Posts, Search Results |
+ |
+
+
+ MIC MIC IDOL |
+ https://www.micmicidol.club/ |
+ Blogs, Labels, Posts, Search Results |
+ |
+
+
Chevereto Instances |
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index d75c3498..58ae59db 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -8,30 +8,22 @@
"""Extractors for Blogger blogs"""
-from .common import Extractor, Message
+from .common import BaseExtractor, Message
from .. import text, util
import re
-BASE_PATTERN = (
- r"(?:blogger:(?:https?://)?([^/]+)|"
- r"(?:https?://)?([\w-]+\.blogspot\.com))")
-
-class BloggerExtractor(Extractor):
+class BloggerExtractor(BaseExtractor):
"""Base class for blogger extractors"""
- category = "blogger"
- directory_fmt = ("{category}", "{blog[name]}",
+ basecategory = "blogger"
+ directory_fmt = ("blogger", "{blog[name]}",
"{post[date]:%Y-%m-%d} {post[title]}")
filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{post[id]}_{num}"
- root = "https://www.blogger.com"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.blog = match.group(1) or match.group(2)
def _init(self):
self.api = BloggerAPI(self)
+ self.blog = self.root.rpartition("/")[2]
self.videos = self.config("videos", True)
def items(self):
@@ -92,6 +84,18 @@ class BloggerExtractor(Extractor):
"""Return additional metadata"""
+BASE_PATTERN = BloggerExtractor.update({
+ "blogspot": {
+ "root": None,
+ "pattern": r"[\w-]+\.blogspot\.com",
+ },
+ "micmicidol": {
+ "root": "https://www.micmicidol.club",
+ "pattern": r"(?:www\.)?micmicidol\.club",
+ },
+})
+
+
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
subcategory = "post"
@@ -100,7 +104,7 @@ class BloggerPostExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
- self.path = match.group(3)
+ self.path = match.group(match.lastindex)
def posts(self, blog):
return (self.api.post_by_path(blog["id"], self.path),)
@@ -124,7 +128,7 @@ class BloggerSearchExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
- self.query = text.unquote(match.group(3))
+ self.query = text.unquote(match.group(match.lastindex))
def posts(self, blog):
return self.api.blog_search(blog["id"], self.query)
@@ -141,7 +145,7 @@ class BloggerLabelExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
- self.label = text.unquote(match.group(3))
+ self.label = text.unquote(match.group(match.lastindex))
def posts(self, blog):
return self.api.blog_posts(blog["id"], self.label)
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 470b629d..cd063f04 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -87,6 +87,7 @@ CATEGORY_MAP = {
"mangaread" : "MangaRead",
"mangasee" : "MangaSee",
"mastodon.social": "mastodon.social",
+ "micmicidol" : "MIC MIC IDOL",
"myhentaigallery": "My Hentai Gallery",
"myportfolio" : "Adobe Portfolio",
"naverwebtoon" : "NaverWebtoon",
@@ -292,6 +293,10 @@ BASE_MAP = {
"vichan" : "vichan Imageboards",
}
+URL_MAP = {
+ "blogspot": "https://www.blogger.com/",
+}
+
_OAUTH = 'OAuth'
_COOKIES = 'Cookies'
_APIKEY_DB = \
@@ -362,7 +367,7 @@ IGNORE_LIST = (
def domain(cls):
- """Return the web-domain related to an extractor class"""
+ """Return the domain name associated with an extractor class"""
try:
url = sys.modules[cls.__module__].__doc__.split()[-1]
if url.startswith("http"):
@@ -429,10 +434,13 @@ def build_extractor_list():
for category, root in extr.instances:
base[category].append(extr.subcategory)
if category not in domains:
- if not root and results:
- # use domain from first matching test
- test = results.category(category)[0]
- root = test["#class"].from_url(test["#url"]).root
+ if not root:
+ if category in URL_MAP:
+ root = URL_MAP[category].rstrip("/")
+ elif results:
+ # use domain from first matching test
+ test = results.category(category)[0]
+ root = test["#class"].from_url(test["#url"]).root
domains[category] = root + "/"
# sort subcategory lists
diff --git a/test/results/blogger.py b/test/results/blogger.py
index 214d450d..aeb82f76 100644
--- a/test/results/blogger.py
+++ b/test/results/blogger.py
@@ -8,100 +8,30 @@ from gallery_dl.extractor import blogger
__tests__ = (
-{
- "#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html",
- "#category": ("", "blogger", "post"),
- "#class" : blogger.BloggerPostExtractor,
- "#pattern" : "https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg",
- "#sha1_url": "9928429fb62f712eb4de80f53625eccecc614aae",
-
- "blog": {
- "date" : "dt:2010-11-21 18:19:42",
- "description": "",
- "id" : "5623928067739466034",
- "kind" : "blogger#blog",
- "locale" : dict,
- "name" : "Julian Bunker Photography",
- "pages" : int,
- "posts" : int,
- "published" : "2010-11-21T10:19:42-08:00",
- "updated" : str,
- "url" : "http://julianbphotography.blogspot.com/",
- },
- "post": {
- "author" : "Julian Bunker",
- "content" : str,
- "date" : "dt:2010-12-26 01:08:00",
- "etag" : str,
- "id" : "6955139236418998998",
- "kind" : "blogger#post",
- "published": "2010-12-25T17:08:00-08:00",
- "replies" : "0",
- "title" : "Moon Rise",
- "updated" : "2011-12-06T05:21:24-08:00",
- "url" : r"re:.+/2010/12/moon-rise.html$",
- },
- "num" : int,
- "url" : str,
-},
-
{
"#url" : "blogger:http://www.julianbunker.com/2010/12/moon-rise.html",
- "#category": ("", "blogger", "post"),
- "#class" : blogger.BloggerPostExtractor,
-},
-
-{
- "#url" : "http://cfnmscenesinmovies.blogspot.com/2011/11/cfnm-scene-jenna-fischer-in-office.html",
- "#comment" : "video (#587)",
- "#category": ("", "blogger", "post"),
+ "#category": ("blogger", "www.julianbunker.com", "post"),
"#class" : blogger.BloggerPostExtractor,
- "#pattern" : r"https://.+\.googlevideo\.com/videoplayback",
-},
-
-{
- "#url" : "https://randomthingsthroughmyletterbox.blogspot.com/2022/01/bitter-flowers-by-gunnar-staalesen-blog.html",
- "#comment" : "new image domain (#2204)",
- "#category": ("", "blogger", "post"),
- "#class" : blogger.BloggerPostExtractor,
- "#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$",
- "#count" : 8,
-},
-
-{
- "#url" : "https://julianbphotography.blogspot.com/",
- "#category": ("", "blogger", "blog"),
- "#class" : blogger.BloggerBlogExtractor,
- "#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
- "#range" : "1-25",
- "#count" : 25,
},
{
"#url" : "blogger:https://www.kefblog.com.ng/",
- "#category": ("", "blogger", "blog"),
+ "#category": ("blogger", "www.kefblog.com.ng", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#range" : "1-25",
"#count" : 25,
},
{
- "#url" : "https://julianbphotography.blogspot.com/search?q=400mm",
- "#category": ("", "blogger", "search"),
+ "#url" : "blogger:http://www.julianbunker.com/search?q=400mm",
+ "#category": ("blogger", "1www.julianbunker.com", "search"),
"#class" : blogger.BloggerSearchExtractor,
- "#count" : "< 10",
-
- "query": "400mm",
},
{
- "#url" : "https://dmmagazine.blogspot.com/search/label/D%26D",
- "#category": ("", "blogger", "label"),
+ "#url" : "blogger:http://www.julianbunker.com/search/label/D%26D",
+ "#category": ("blogger", "www.julianbunker.com", "label"),
"#class" : blogger.BloggerLabelExtractor,
- "#range" : "1-25",
- "#count" : 25,
-
- "label": "D&D",
},
)
diff --git a/test/results/blogspot.py b/test/results/blogspot.py
new file mode 100644
index 00000000..83f4e5f7
--- /dev/null
+++ b/test/results/blogspot.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import blogger
+
+
+__tests__ = (
+{
+ "#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html",
+ "#category": ("blogger", "blogspot", "post"),
+ "#class" : blogger.BloggerPostExtractor,
+ "#urls" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
+
+ "blog": {
+ "date" : "dt:2010-11-21 18:19:42",
+ "description": "",
+ "id" : "5623928067739466034",
+ "kind" : "blogger#blog",
+ "locale" : dict,
+ "name" : "Julian Bunker Photography",
+ "pages" : int,
+ "posts" : int,
+ "published" : "2010-11-21T10:19:42-08:00",
+ "updated" : str,
+ "url" : "http://julianbphotography.blogspot.com/",
+ },
+ "post": {
+ "author" : "Julian Bunker",
+ "content" : str,
+ "date" : "dt:2010-12-26 01:08:00",
+ "etag" : str,
+ "id" : "6955139236418998998",
+ "kind" : "blogger#post",
+ "published": "2010-12-25T17:08:00-08:00",
+ "replies" : "0",
+ "title" : "Moon Rise",
+ "updated" : "2011-12-06T05:21:24-08:00",
+ "url" : "http://julianbphotography.blogspot.com/2010/12/moon-rise.html",
+ },
+ "extension": "jpg",
+ "filename" : "Icy-Moonrise---For-Web",
+ "num" : 1,
+ "num" : int,
+ "url" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
+},
+
+{
+ "#url" : "http://cfnmscenesinmovies.blogspot.com/2011/11/cfnm-scene-jenna-fischer-in-office.html",
+ "#comment" : "video (#587)",
+ "#category": ("blogger", "blogspot", "post"),
+ "#class" : blogger.BloggerPostExtractor,
+ "#pattern" : r"https://.+\.googlevideo\.com/videoplayback",
+},
+
+{
+ "#url" : "https://randomthingsthroughmyletterbox.blogspot.com/2022/01/bitter-flowers-by-gunnar-staalesen-blog.html",
+ "#comment" : "new image domain (#2204)",
+ "#category": ("blogger", "blogspot", "post"),
+ "#class" : blogger.BloggerPostExtractor,
+ "#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$",
+ "#count" : 8,
+},
+
+{
+ "#url" : "https://julianbphotography.blogspot.com/",
+ "#category": ("blogger", "blogspot", "blog"),
+ "#class" : blogger.BloggerBlogExtractor,
+ "#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
+ "#range" : "1-25",
+ "#count" : 25,
+},
+
+{
+ "#url" : "https://julianbphotography.blogspot.com/search?q=400mm",
+ "#category": ("blogger", "blogspot", "search"),
+ "#class" : blogger.BloggerSearchExtractor,
+ "#count" : "< 10",
+
+ "query": "400mm",
+},
+
+{
+ "#url" : "https://dmmagazine.blogspot.com/search/label/D%26D",
+ "#category": ("blogger", "blogspot", "label"),
+ "#class" : blogger.BloggerLabelExtractor,
+ "#range" : "1-25",
+ "#count" : 25,
+
+ "label": "D&D",
+},
+
+)
diff --git a/test/results/micmicidol.py b/test/results/micmicidol.py
new file mode 100644
index 00000000..f66bbd75
--- /dev/null
+++ b/test/results/micmicidol.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import blogger
+
+
+__tests__ = (
+{
+ "#url" : "https://www.micmicidol.club/2023/11/weekly-taishu-20231113-cover.html",
+ "#category": ("blogger", "micmicidol", "post"),
+ "#class" : blogger.BloggerPostExtractor,
+ "#urls" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhgtpSSdrol9aKP_ztcc_mp9TUUS0U_t2DYJuGX3XCs6X5CkxIb-pM98QlxbkgJFvQj-0e6RbXNBf047qyMDZLcPJsm9dTqAn2XkTVfLhWRaxxVvIYnHYu0R0d7WsAUSFs0MDe4Sotpuqp5DQnjr45T17CXKbWtq9cR3op9dDQh3yiw2a6_HInIjLRm5io/s0/000-micmicidol.jpg",
+
+ "blog": {
+ "date" : "dt:2023-09-18 19:48:53",
+ "description": "",
+ "id" : "7192714164191173242",
+ "kind" : "blogger#blog",
+ "locale" : {
+ "country" : "TW",
+ "language": "zh",
+ "variant" : "",
+ },
+ "name" : "MIC MIC IDOL",
+ "pages" : int,
+ "posts" : int,
+ "published" : "2023-09-18T12:48:53-07:00",
+ "updated" : str,
+ "url" : "http://www.micmicidol.club/"
+ },
+ "post": {
+ "author" : "MIC MIC IDOL",
+ "content" : " ",
+ "date" : "dt:2023-11-18 08:01:00",
+ "etag" : str,
+ "id" : "5395888649239375388",
+ "kind" : "blogger#post",
+ "labels" : [
+ "- Cover",
+ "Weekly Taishu",
+ "Weekly Taishu Cover",
+ ],
+ "published": "2023-11-18T00:01:00-08:00",
+ "replies" : "0",
+ "title" : "Weekly Taishu 週刊大衆 2023.11.13 Cover",
+ "updated" : "2023-11-18T03:00:42-08:00",
+ "url" : "http://www.micmicidol.club/2023/11/weekly-taishu-20231113-cover.html"
+ },
+ "num" : 1,
+ "url" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhgtpSSdrol9aKP_ztcc_mp9TUUS0U_t2DYJuGX3XCs6X5CkxIb-pM98QlxbkgJFvQj-0e6RbXNBf047qyMDZLcPJsm9dTqAn2XkTVfLhWRaxxVvIYnHYu0R0d7WsAUSFs0MDe4Sotpuqp5DQnjr45T17CXKbWtq9cR3op9dDQh3yiw2a6_HInIjLRm5io/s0/000-micmicidol.jpg",
+},
+
+{
+ "#url" : "https://www.micmicidol.club/",
+ "#category": ("blogger", "micmicidol", "blog"),
+ "#class" : blogger.BloggerBlogExtractor,
+ "#range" : "1-25",
+ "#count" : 25,
+},
+
+{
+ "#url" : "https://www.micmicidol.club/search?q=cover",
+ "#category": ("blogger", "micmicidol", "search"),
+ "#class" : blogger.BloggerSearchExtractor,
+ "#range" : "1-25",
+ "#count" : 25,
+
+ "query" : "cover",
+},
+
+{
+ "#url" : "https://www.micmicidol.club/search/label/Weekly%20Taishu%20Cover",
+ "#category": ("blogger", "micmicidol", "label"),
+ "#class" : blogger.BloggerLabelExtractor,
+ "#range" : "1-25",
+ "#count" : 25,
+
+ "label" : "Weekly Taishu Cover",
+},
+
+)