[blogger] support new image domain (fixes #2204)

pull/2243/head
Mike Fährmann 3 years ago
parent c587b678d0
commit 698f35215e
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2020 Mike Fährmann
# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -41,9 +41,11 @@ class BloggerExtractor(Extractor):
blog["date"] = text.parse_datetime(blog["published"])
del blog["selfLink"]
sub = re.compile(r"/(?:s\d+|w\d+-h\d+)/").sub
sub = re.compile(r"(/|=)(?:s\d+|w\d+-h\d+)(?=/|$)").sub
findall_image = re.compile(
r'src="(https?://\d+\.bp\.blogspot\.com/[^"]+)').findall
r'src="(https?://(?:'
r'blogger\.googleusercontent\.com/img|'
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
findall_video = re.compile(
r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
@ -52,7 +54,7 @@ class BloggerExtractor(Extractor):
files = findall_image(content)
for idx, url in enumerate(files):
files[idx] = sub("/s0/", url).replace("http:", "https:", 1)
files[idx] = sub(r"\1s0", url).replace("http:", "https:", 1)
if self.videos and 'id="BLOG_video-' in content:
page = self.request(post["url"]).text
@ -137,6 +139,12 @@ class BloggerPostExtractor(BloggerExtractor):
("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", {
"pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png",
}),
# new image domain (#2204)
(("https://randomthingsthroughmyletterbox.blogspot.com/2022/01"
"/bitter-flowers-by-gunnar-staalesen-blog.html"), {
"pattern": r"https://blogger.googleusercontent.com/img/a/.+=s0$",
"count": 8,
}),
)
def __init__(self, match):

Loading…
Cancel
Save