[blogger] inherit from BaseExtractor

- support www.micmicidol.club (#4759)
pull/4841/head
Mike Fährmann 10 months ago
parent 0fa85360a0
commit e17a48fe56
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -109,12 +109,6 @@ Consider all sites to be NSFW unless otherwise known.
<td>Collections, Galleries, User Profiles</td> <td>Collections, Galleries, User Profiles</td>
<td></td> <td></td>
</tr> </tr>
<tr>
<td>Blogger</td>
<td>https://www.blogger.com/</td>
<td>Blogs, Labels, Posts, Search Results</td>
<td></td>
</tr>
<tr> <tr>
<td>Bunkr</td> <td>Bunkr</td>
<td>https://bunkrr.su/</td> <td>https://bunkrr.su/</td>
@ -998,6 +992,22 @@ Consider all sites to be NSFW unless otherwise known.
<td></td> <td></td>
</tr> </tr>
<tr>
<td colspan="4"><strong>Blogger Instances</strong></td>
</tr>
<tr>
<td>Blogspot</td>
<td>https://www.blogger.com/</td>
<td>Blogs, Labels, Posts, Search Results</td>
<td></td>
</tr>
<tr>
<td>MIC MIC IDOL</td>
<td>https://www.micmicidol.club/</td>
<td>Blogs, Labels, Posts, Search Results</td>
<td></td>
</tr>
<tr> <tr>
<td colspan="4"><strong>Chevereto Instances</strong></td> <td colspan="4"><strong>Chevereto Instances</strong></td>
</tr> </tr>

@ -8,30 +8,22 @@
"""Extractors for Blogger blogs""" """Extractors for Blogger blogs"""
from .common import Extractor, Message from .common import BaseExtractor, Message
from .. import text, util from .. import text, util
import re import re
BASE_PATTERN = (
r"(?:blogger:(?:https?://)?([^/]+)|"
r"(?:https?://)?([\w-]+\.blogspot\.com))")
class BloggerExtractor(BaseExtractor):
class BloggerExtractor(Extractor):
"""Base class for blogger extractors""" """Base class for blogger extractors"""
category = "blogger" basecategory = "blogger"
directory_fmt = ("{category}", "{blog[name]}", directory_fmt = ("blogger", "{blog[name]}",
"{post[date]:%Y-%m-%d} {post[title]}") "{post[date]:%Y-%m-%d} {post[title]}")
filename_fmt = "{num:>03}.{extension}" filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{post[id]}_{num}" archive_fmt = "{post[id]}_{num}"
root = "https://www.blogger.com"
def __init__(self, match):
Extractor.__init__(self, match)
self.blog = match.group(1) or match.group(2)
def _init(self): def _init(self):
self.api = BloggerAPI(self) self.api = BloggerAPI(self)
self.blog = self.root.rpartition("/")[2]
self.videos = self.config("videos", True) self.videos = self.config("videos", True)
def items(self): def items(self):
@ -92,6 +84,18 @@ class BloggerExtractor(Extractor):
"""Return additional metadata""" """Return additional metadata"""
BASE_PATTERN = BloggerExtractor.update({
"blogspot": {
"root": None,
"pattern": r"[\w-]+\.blogspot\.com",
},
"micmicidol": {
"root": "https://www.micmicidol.club",
"pattern": r"(?:www\.)?micmicidol\.club",
},
})
class BloggerPostExtractor(BloggerExtractor): class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post""" """Extractor for a single blog post"""
subcategory = "post" subcategory = "post"
@ -100,7 +104,7 @@ class BloggerPostExtractor(BloggerExtractor):
def __init__(self, match): def __init__(self, match):
BloggerExtractor.__init__(self, match) BloggerExtractor.__init__(self, match)
self.path = match.group(3) self.path = match.group(match.lastindex)
def posts(self, blog): def posts(self, blog):
return (self.api.post_by_path(blog["id"], self.path),) return (self.api.post_by_path(blog["id"], self.path),)
@ -124,7 +128,7 @@ class BloggerSearchExtractor(BloggerExtractor):
def __init__(self, match): def __init__(self, match):
BloggerExtractor.__init__(self, match) BloggerExtractor.__init__(self, match)
self.query = text.unquote(match.group(3)) self.query = text.unquote(match.group(match.lastindex))
def posts(self, blog): def posts(self, blog):
return self.api.blog_search(blog["id"], self.query) return self.api.blog_search(blog["id"], self.query)
@ -141,7 +145,7 @@ class BloggerLabelExtractor(BloggerExtractor):
def __init__(self, match): def __init__(self, match):
BloggerExtractor.__init__(self, match) BloggerExtractor.__init__(self, match)
self.label = text.unquote(match.group(3)) self.label = text.unquote(match.group(match.lastindex))
def posts(self, blog): def posts(self, blog):
return self.api.blog_posts(blog["id"], self.label) return self.api.blog_posts(blog["id"], self.label)

@ -87,6 +87,7 @@ CATEGORY_MAP = {
"mangaread" : "MangaRead", "mangaread" : "MangaRead",
"mangasee" : "MangaSee", "mangasee" : "MangaSee",
"mastodon.social": "mastodon.social", "mastodon.social": "mastodon.social",
"micmicidol" : "MIC MIC IDOL",
"myhentaigallery": "My Hentai Gallery", "myhentaigallery": "My Hentai Gallery",
"myportfolio" : "Adobe Portfolio", "myportfolio" : "Adobe Portfolio",
"naverwebtoon" : "NaverWebtoon", "naverwebtoon" : "NaverWebtoon",
@ -292,6 +293,10 @@ BASE_MAP = {
"vichan" : "vichan Imageboards", "vichan" : "vichan Imageboards",
} }
URL_MAP = {
"blogspot": "https://www.blogger.com/",
}
_OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>' _OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>'
_COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>' _COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>'
_APIKEY_DB = \ _APIKEY_DB = \
@ -362,7 +367,7 @@ IGNORE_LIST = (
def domain(cls): def domain(cls):
"""Return the web-domain related to an extractor class""" """Return the domain name associated with an extractor class"""
try: try:
url = sys.modules[cls.__module__].__doc__.split()[-1] url = sys.modules[cls.__module__].__doc__.split()[-1]
if url.startswith("http"): if url.startswith("http"):
@ -429,10 +434,13 @@ def build_extractor_list():
for category, root in extr.instances: for category, root in extr.instances:
base[category].append(extr.subcategory) base[category].append(extr.subcategory)
if category not in domains: if category not in domains:
if not root and results: if not root:
# use domain from first matching test if category in URL_MAP:
test = results.category(category)[0] root = URL_MAP[category].rstrip("/")
root = test["#class"].from_url(test["#url"]).root elif results:
# use domain from first matching test
test = results.category(category)[0]
root = test["#class"].from_url(test["#url"]).root
domains[category] = root + "/" domains[category] = root + "/"
# sort subcategory lists # sort subcategory lists

@ -8,100 +8,30 @@ from gallery_dl.extractor import blogger
__tests__ = ( __tests__ = (
{
"#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html",
"#category": ("", "blogger", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : "https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg",
"#sha1_url": "9928429fb62f712eb4de80f53625eccecc614aae",
"blog": {
"date" : "dt:2010-11-21 18:19:42",
"description": "",
"id" : "5623928067739466034",
"kind" : "blogger#blog",
"locale" : dict,
"name" : "Julian Bunker Photography",
"pages" : int,
"posts" : int,
"published" : "2010-11-21T10:19:42-08:00",
"updated" : str,
"url" : "http://julianbphotography.blogspot.com/",
},
"post": {
"author" : "Julian Bunker",
"content" : str,
"date" : "dt:2010-12-26 01:08:00",
"etag" : str,
"id" : "6955139236418998998",
"kind" : "blogger#post",
"published": "2010-12-25T17:08:00-08:00",
"replies" : "0",
"title" : "Moon Rise",
"updated" : "2011-12-06T05:21:24-08:00",
"url" : r"re:.+/2010/12/moon-rise.html$",
},
"num" : int,
"url" : str,
},
{ {
"#url" : "blogger:http://www.julianbunker.com/2010/12/moon-rise.html", "#url" : "blogger:http://www.julianbunker.com/2010/12/moon-rise.html",
"#category": ("", "blogger", "post"), "#category": ("blogger", "www.julianbunker.com", "post"),
"#class" : blogger.BloggerPostExtractor,
},
{
"#url" : "http://cfnmscenesinmovies.blogspot.com/2011/11/cfnm-scene-jenna-fischer-in-office.html",
"#comment" : "video (#587)",
"#category": ("", "blogger", "post"),
"#class" : blogger.BloggerPostExtractor, "#class" : blogger.BloggerPostExtractor,
"#pattern" : r"https://.+\.googlevideo\.com/videoplayback",
},
{
"#url" : "https://randomthingsthroughmyletterbox.blogspot.com/2022/01/bitter-flowers-by-gunnar-staalesen-blog.html",
"#comment" : "new image domain (#2204)",
"#category": ("", "blogger", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$",
"#count" : 8,
},
{
"#url" : "https://julianbphotography.blogspot.com/",
"#category": ("", "blogger", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
"#range" : "1-25",
"#count" : 25,
}, },
{ {
"#url" : "blogger:https://www.kefblog.com.ng/", "#url" : "blogger:https://www.kefblog.com.ng/",
"#category": ("", "blogger", "blog"), "#category": ("blogger", "www.kefblog.com.ng", "blog"),
"#class" : blogger.BloggerBlogExtractor, "#class" : blogger.BloggerBlogExtractor,
"#range" : "1-25", "#range" : "1-25",
"#count" : 25, "#count" : 25,
}, },
{ {
"#url" : "https://julianbphotography.blogspot.com/search?q=400mm", "#url" : "blogger:http://www.julianbunker.com/search?q=400mm",
"#category": ("", "blogger", "search"), "#category": ("blogger", "1www.julianbunker.com", "search"),
"#class" : blogger.BloggerSearchExtractor, "#class" : blogger.BloggerSearchExtractor,
"#count" : "< 10",
"query": "400mm",
}, },
{ {
"#url" : "https://dmmagazine.blogspot.com/search/label/D%26D", "#url" : "blogger:http://www.julianbunker.com/search/label/D%26D",
"#category": ("", "blogger", "label"), "#category": ("blogger", "www.julianbunker.com", "label"),
"#class" : blogger.BloggerLabelExtractor, "#class" : blogger.BloggerLabelExtractor,
"#range" : "1-25",
"#count" : 25,
"label": "D&D",
}, },
) )

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import blogger
__tests__ = (
{
"#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html",
"#category": ("blogger", "blogspot", "post"),
"#class" : blogger.BloggerPostExtractor,
"#urls" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
"blog": {
"date" : "dt:2010-11-21 18:19:42",
"description": "",
"id" : "5623928067739466034",
"kind" : "blogger#blog",
"locale" : dict,
"name" : "Julian Bunker Photography",
"pages" : int,
"posts" : int,
"published" : "2010-11-21T10:19:42-08:00",
"updated" : str,
"url" : "http://julianbphotography.blogspot.com/",
},
"post": {
"author" : "Julian Bunker",
"content" : str,
"date" : "dt:2010-12-26 01:08:00",
"etag" : str,
"id" : "6955139236418998998",
"kind" : "blogger#post",
"published": "2010-12-25T17:08:00-08:00",
"replies" : "0",
"title" : "Moon Rise",
"updated" : "2011-12-06T05:21:24-08:00",
"url" : "http://julianbphotography.blogspot.com/2010/12/moon-rise.html",
},
"extension": "jpg",
"filename" : "Icy-Moonrise---For-Web",
"num" : 1,
"num" : int,
"url" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
},
{
"#url" : "http://cfnmscenesinmovies.blogspot.com/2011/11/cfnm-scene-jenna-fischer-in-office.html",
"#comment" : "video (#587)",
"#category": ("blogger", "blogspot", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : r"https://.+\.googlevideo\.com/videoplayback",
},
{
"#url" : "https://randomthingsthroughmyletterbox.blogspot.com/2022/01/bitter-flowers-by-gunnar-staalesen-blog.html",
"#comment" : "new image domain (#2204)",
"#category": ("blogger", "blogspot", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$",
"#count" : 8,
},
{
"#url" : "https://julianbphotography.blogspot.com/",
"#category": ("blogger", "blogspot", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
"#range" : "1-25",
"#count" : 25,
},
{
"#url" : "https://julianbphotography.blogspot.com/search?q=400mm",
"#category": ("blogger", "blogspot", "search"),
"#class" : blogger.BloggerSearchExtractor,
"#count" : "< 10",
"query": "400mm",
},
{
"#url" : "https://dmmagazine.blogspot.com/search/label/D%26D",
"#category": ("blogger", "blogspot", "label"),
"#class" : blogger.BloggerLabelExtractor,
"#range" : "1-25",
"#count" : 25,
"label": "D&D",
},
)

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import blogger
__tests__ = (
{
"#url" : "https://www.micmicidol.club/2023/11/weekly-taishu-20231113-cover.html",
"#category": ("blogger", "micmicidol", "post"),
"#class" : blogger.BloggerPostExtractor,
"#urls" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhgtpSSdrol9aKP_ztcc_mp9TUUS0U_t2DYJuGX3XCs6X5CkxIb-pM98QlxbkgJFvQj-0e6RbXNBf047qyMDZLcPJsm9dTqAn2XkTVfLhWRaxxVvIYnHYu0R0d7WsAUSFs0MDe4Sotpuqp5DQnjr45T17CXKbWtq9cR3op9dDQh3yiw2a6_HInIjLRm5io/s0/000-micmicidol.jpg",
"blog": {
"date" : "dt:2023-09-18 19:48:53",
"description": "",
"id" : "7192714164191173242",
"kind" : "blogger#blog",
"locale" : {
"country" : "TW",
"language": "zh",
"variant" : "",
},
"name" : "MIC MIC IDOL",
"pages" : int,
"posts" : int,
"published" : "2023-09-18T12:48:53-07:00",
"updated" : str,
"url" : "http://www.micmicidol.club/"
},
"post": {
"author" : "MIC MIC IDOL",
"content" : "&nbsp;",
"date" : "dt:2023-11-18 08:01:00",
"etag" : str,
"id" : "5395888649239375388",
"kind" : "blogger#post",
"labels" : [
"- Cover",
"Weekly Taishu",
"Weekly Taishu Cover",
],
"published": "2023-11-18T00:01:00-08:00",
"replies" : "0",
"title" : "Weekly Taishu 週刊大衆 2023.11.13 Cover",
"updated" : "2023-11-18T03:00:42-08:00",
"url" : "http://www.micmicidol.club/2023/11/weekly-taishu-20231113-cover.html"
},
"num" : 1,
"url" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhgtpSSdrol9aKP_ztcc_mp9TUUS0U_t2DYJuGX3XCs6X5CkxIb-pM98QlxbkgJFvQj-0e6RbXNBf047qyMDZLcPJsm9dTqAn2XkTVfLhWRaxxVvIYnHYu0R0d7WsAUSFs0MDe4Sotpuqp5DQnjr45T17CXKbWtq9cR3op9dDQh3yiw2a6_HInIjLRm5io/s0/000-micmicidol.jpg",
},
{
"#url" : "https://www.micmicidol.club/",
"#category": ("blogger", "micmicidol", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#range" : "1-25",
"#count" : 25,
},
{
"#url" : "https://www.micmicidol.club/search?q=cover",
"#category": ("blogger", "micmicidol", "search"),
"#class" : blogger.BloggerSearchExtractor,
"#range" : "1-25",
"#count" : 25,
"query" : "cover",
},
{
"#url" : "https://www.micmicidol.club/search/label/Weekly%20Taishu%20Cover",
"#category": ("blogger", "micmicidol", "label"),
"#class" : blogger.BloggerLabelExtractor,
"#range" : "1-25",
"#count" : 25,
"label" : "Weekly Taishu Cover",
},
)
Loading…
Cancel
Save