[blogger] inherit from BaseExtractor

- support www.micmicidol.club (#4759)
pull/4841/head
Mike Fährmann 10 months ago
parent 0fa85360a0
commit e17a48fe56
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -109,12 +109,6 @@ Consider all sites to be NSFW unless otherwise known.
<td>Collections, Galleries, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Blogger</td>
<td>https://www.blogger.com/</td>
<td>Blogs, Labels, Posts, Search Results</td>
<td></td>
</tr>
<tr>
<td>Bunkr</td>
<td>https://bunkrr.su/</td>
@ -998,6 +992,22 @@ Consider all sites to be NSFW unless otherwise known.
<td></td>
</tr>
<tr>
<td colspan="4"><strong>Blogger Instances</strong></td>
</tr>
<tr>
<td>Blogspot</td>
<td>https://www.blogger.com/</td>
<td>Blogs, Labels, Posts, Search Results</td>
<td></td>
</tr>
<tr>
<td>MIC MIC IDOL</td>
<td>https://www.micmicidol.club/</td>
<td>Blogs, Labels, Posts, Search Results</td>
<td></td>
</tr>
<tr>
<td colspan="4"><strong>Chevereto Instances</strong></td>
</tr>

@ -8,30 +8,22 @@
"""Extractors for Blogger blogs"""
from .common import Extractor, Message
from .common import BaseExtractor, Message
from .. import text, util
import re
BASE_PATTERN = (
r"(?:blogger:(?:https?://)?([^/]+)|"
r"(?:https?://)?([\w-]+\.blogspot\.com))")
class BloggerExtractor(Extractor):
class BloggerExtractor(BaseExtractor):
"""Base class for blogger extractors"""
category = "blogger"
directory_fmt = ("{category}", "{blog[name]}",
basecategory = "blogger"
directory_fmt = ("blogger", "{blog[name]}",
"{post[date]:%Y-%m-%d} {post[title]}")
filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{post[id]}_{num}"
root = "https://www.blogger.com"
def __init__(self, match):
Extractor.__init__(self, match)
self.blog = match.group(1) or match.group(2)
def _init(self):
self.api = BloggerAPI(self)
self.blog = self.root.rpartition("/")[2]
self.videos = self.config("videos", True)
def items(self):
@ -92,6 +84,18 @@ class BloggerExtractor(Extractor):
"""Return additional metadata"""
BASE_PATTERN = BloggerExtractor.update({
"blogspot": {
"root": None,
"pattern": r"[\w-]+\.blogspot\.com",
},
"micmicidol": {
"root": "https://www.micmicidol.club",
"pattern": r"(?:www\.)?micmicidol\.club",
},
})
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
subcategory = "post"
@ -100,7 +104,7 @@ class BloggerPostExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
self.path = match.group(3)
self.path = match.group(match.lastindex)
def posts(self, blog):
return (self.api.post_by_path(blog["id"], self.path),)
@ -124,7 +128,7 @@ class BloggerSearchExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
self.query = text.unquote(match.group(3))
self.query = text.unquote(match.group(match.lastindex))
def posts(self, blog):
return self.api.blog_search(blog["id"], self.query)
@ -141,7 +145,7 @@ class BloggerLabelExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
self.label = text.unquote(match.group(3))
self.label = text.unquote(match.group(match.lastindex))
def posts(self, blog):
return self.api.blog_posts(blog["id"], self.label)

@ -87,6 +87,7 @@ CATEGORY_MAP = {
"mangaread" : "MangaRead",
"mangasee" : "MangaSee",
"mastodon.social": "mastodon.social",
"micmicidol" : "MIC MIC IDOL",
"myhentaigallery": "My Hentai Gallery",
"myportfolio" : "Adobe Portfolio",
"naverwebtoon" : "NaverWebtoon",
@ -292,6 +293,10 @@ BASE_MAP = {
"vichan" : "vichan Imageboards",
}
URL_MAP = {
"blogspot": "https://www.blogger.com/",
}
_OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>'
_COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>'
_APIKEY_DB = \
@ -362,7 +367,7 @@ IGNORE_LIST = (
def domain(cls):
"""Return the web-domain related to an extractor class"""
"""Return the domain name associated with an extractor class"""
try:
url = sys.modules[cls.__module__].__doc__.split()[-1]
if url.startswith("http"):
@ -429,10 +434,13 @@ def build_extractor_list():
for category, root in extr.instances:
base[category].append(extr.subcategory)
if category not in domains:
if not root and results:
# use domain from first matching test
test = results.category(category)[0]
root = test["#class"].from_url(test["#url"]).root
if not root:
if category in URL_MAP:
root = URL_MAP[category].rstrip("/")
elif results:
# use domain from first matching test
test = results.category(category)[0]
root = test["#class"].from_url(test["#url"]).root
domains[category] = root + "/"
# sort subcategory lists

@ -8,100 +8,30 @@ from gallery_dl.extractor import blogger
__tests__ = (
{
"#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html",
"#category": ("", "blogger", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : "https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg",
"#sha1_url": "9928429fb62f712eb4de80f53625eccecc614aae",
"blog": {
"date" : "dt:2010-11-21 18:19:42",
"description": "",
"id" : "5623928067739466034",
"kind" : "blogger#blog",
"locale" : dict,
"name" : "Julian Bunker Photography",
"pages" : int,
"posts" : int,
"published" : "2010-11-21T10:19:42-08:00",
"updated" : str,
"url" : "http://julianbphotography.blogspot.com/",
},
"post": {
"author" : "Julian Bunker",
"content" : str,
"date" : "dt:2010-12-26 01:08:00",
"etag" : str,
"id" : "6955139236418998998",
"kind" : "blogger#post",
"published": "2010-12-25T17:08:00-08:00",
"replies" : "0",
"title" : "Moon Rise",
"updated" : "2011-12-06T05:21:24-08:00",
"url" : r"re:.+/2010/12/moon-rise.html$",
},
"num" : int,
"url" : str,
},
{
"#url" : "blogger:http://www.julianbunker.com/2010/12/moon-rise.html",
"#category": ("", "blogger", "post"),
"#class" : blogger.BloggerPostExtractor,
},
{
"#url" : "http://cfnmscenesinmovies.blogspot.com/2011/11/cfnm-scene-jenna-fischer-in-office.html",
"#comment" : "video (#587)",
"#category": ("", "blogger", "post"),
"#category": ("blogger", "www.julianbunker.com", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : r"https://.+\.googlevideo\.com/videoplayback",
},
{
"#url" : "https://randomthingsthroughmyletterbox.blogspot.com/2022/01/bitter-flowers-by-gunnar-staalesen-blog.html",
"#comment" : "new image domain (#2204)",
"#category": ("", "blogger", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$",
"#count" : 8,
},
{
"#url" : "https://julianbphotography.blogspot.com/",
"#category": ("", "blogger", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
"#range" : "1-25",
"#count" : 25,
},
{
"#url" : "blogger:https://www.kefblog.com.ng/",
"#category": ("", "blogger", "blog"),
"#category": ("blogger", "www.kefblog.com.ng", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#range" : "1-25",
"#count" : 25,
},
{
"#url" : "https://julianbphotography.blogspot.com/search?q=400mm",
"#category": ("", "blogger", "search"),
"#url" : "blogger:http://www.julianbunker.com/search?q=400mm",
"#category": ("blogger", "1www.julianbunker.com", "search"),
"#class" : blogger.BloggerSearchExtractor,
"#count" : "< 10",
"query": "400mm",
},
{
"#url" : "https://dmmagazine.blogspot.com/search/label/D%26D",
"#category": ("", "blogger", "label"),
"#url" : "blogger:http://www.julianbunker.com/search/label/D%26D",
"#category": ("blogger", "www.julianbunker.com", "label"),
"#class" : blogger.BloggerLabelExtractor,
"#range" : "1-25",
"#count" : 25,
"label": "D&D",
},
)

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import blogger
__tests__ = (
{
"#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html",
"#category": ("blogger", "blogspot", "post"),
"#class" : blogger.BloggerPostExtractor,
"#urls" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
"blog": {
"date" : "dt:2010-11-21 18:19:42",
"description": "",
"id" : "5623928067739466034",
"kind" : "blogger#blog",
"locale" : dict,
"name" : "Julian Bunker Photography",
"pages" : int,
"posts" : int,
"published" : "2010-11-21T10:19:42-08:00",
"updated" : str,
"url" : "http://julianbphotography.blogspot.com/",
},
"post": {
"author" : "Julian Bunker",
"content" : str,
"date" : "dt:2010-12-26 01:08:00",
"etag" : str,
"id" : "6955139236418998998",
"kind" : "blogger#post",
"published": "2010-12-25T17:08:00-08:00",
"replies" : "0",
"title" : "Moon Rise",
"updated" : "2011-12-06T05:21:24-08:00",
"url" : "http://julianbphotography.blogspot.com/2010/12/moon-rise.html",
},
"extension": "jpg",
"filename" : "Icy-Moonrise---For-Web",
"num" : 1,
"num" : int,
"url" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
},
{
"#url" : "http://cfnmscenesinmovies.blogspot.com/2011/11/cfnm-scene-jenna-fischer-in-office.html",
"#comment" : "video (#587)",
"#category": ("blogger", "blogspot", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : r"https://.+\.googlevideo\.com/videoplayback",
},
{
"#url" : "https://randomthingsthroughmyletterbox.blogspot.com/2022/01/bitter-flowers-by-gunnar-staalesen-blog.html",
"#comment" : "new image domain (#2204)",
"#category": ("blogger", "blogspot", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$",
"#count" : 8,
},
{
"#url" : "https://julianbphotography.blogspot.com/",
"#category": ("blogger", "blogspot", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
"#range" : "1-25",
"#count" : 25,
},
{
"#url" : "https://julianbphotography.blogspot.com/search?q=400mm",
"#category": ("blogger", "blogspot", "search"),
"#class" : blogger.BloggerSearchExtractor,
"#count" : "< 10",
"query": "400mm",
},
{
"#url" : "https://dmmagazine.blogspot.com/search/label/D%26D",
"#category": ("blogger", "blogspot", "label"),
"#class" : blogger.BloggerLabelExtractor,
"#range" : "1-25",
"#count" : 25,
"label": "D&D",
},
)

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import blogger
__tests__ = (
{
"#url" : "https://www.micmicidol.club/2023/11/weekly-taishu-20231113-cover.html",
"#category": ("blogger", "micmicidol", "post"),
"#class" : blogger.BloggerPostExtractor,
"#urls" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhgtpSSdrol9aKP_ztcc_mp9TUUS0U_t2DYJuGX3XCs6X5CkxIb-pM98QlxbkgJFvQj-0e6RbXNBf047qyMDZLcPJsm9dTqAn2XkTVfLhWRaxxVvIYnHYu0R0d7WsAUSFs0MDe4Sotpuqp5DQnjr45T17CXKbWtq9cR3op9dDQh3yiw2a6_HInIjLRm5io/s0/000-micmicidol.jpg",
"blog": {
"date" : "dt:2023-09-18 19:48:53",
"description": "",
"id" : "7192714164191173242",
"kind" : "blogger#blog",
"locale" : {
"country" : "TW",
"language": "zh",
"variant" : "",
},
"name" : "MIC MIC IDOL",
"pages" : int,
"posts" : int,
"published" : "2023-09-18T12:48:53-07:00",
"updated" : str,
"url" : "http://www.micmicidol.club/"
},
"post": {
"author" : "MIC MIC IDOL",
"content" : "&nbsp;",
"date" : "dt:2023-11-18 08:01:00",
"etag" : str,
"id" : "5395888649239375388",
"kind" : "blogger#post",
"labels" : [
"- Cover",
"Weekly Taishu",
"Weekly Taishu Cover",
],
"published": "2023-11-18T00:01:00-08:00",
"replies" : "0",
"title" : "Weekly Taishu 週刊大衆 2023.11.13 Cover",
"updated" : "2023-11-18T03:00:42-08:00",
"url" : "http://www.micmicidol.club/2023/11/weekly-taishu-20231113-cover.html"
},
"num" : 1,
"url" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhgtpSSdrol9aKP_ztcc_mp9TUUS0U_t2DYJuGX3XCs6X5CkxIb-pM98QlxbkgJFvQj-0e6RbXNBf047qyMDZLcPJsm9dTqAn2XkTVfLhWRaxxVvIYnHYu0R0d7WsAUSFs0MDe4Sotpuqp5DQnjr45T17CXKbWtq9cR3op9dDQh3yiw2a6_HInIjLRm5io/s0/000-micmicidol.jpg",
},
{
"#url" : "https://www.micmicidol.club/",
"#category": ("blogger", "micmicidol", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#range" : "1-25",
"#count" : 25,
},
{
"#url" : "https://www.micmicidol.club/search?q=cover",
"#category": ("blogger", "micmicidol", "search"),
"#class" : blogger.BloggerSearchExtractor,
"#range" : "1-25",
"#count" : 25,
"query" : "cover",
},
{
"#url" : "https://www.micmicidol.club/search/label/Weekly%20Taishu%20Cover",
"#category": ("blogger", "micmicidol", "label"),
"#class" : blogger.BloggerLabelExtractor,
"#range" : "1-25",
"#count" : 25,
"label" : "Weekly Taishu Cover",
},
)
Loading…
Cancel
Save