[telegraph] fix file URLs (#3506)

pull/3532/head
Mike Fährmann 2 years ago
parent cbaeee9533
commit 2cae4567ba
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -12,7 +12,6 @@ from .. import text
class TelegraphGalleryExtractor(GalleryExtractor):
"""Extractor for articles from telegra.ph"""
category = "telegraph"
root = "https://telegra.ph"
directory_fmt = ("{category}", "{slug}")
@ -52,6 +51,23 @@ class TelegraphGalleryExtractor(GalleryExtractor):
"url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
},
}),
("https://telegra.ph/Vsyo-o-druzyah-moej-sestricy-05-27", {
"url": "c1f3048e5d94bee53af30a8c27f70b0d3b15438e",
"pattern": r"^https://pith1\.ru/uploads"
r"/posts/2019-12/\d+_\d+\.jpg$",
"keyword": {
"author": "Shotacon - заходи сюда",
"caption": "",
"count": 19,
"date": "dt:2022-05-27 16:17:27",
"description": "",
"num_formatted": r"re:^\d{2}$",
"post_url": "https://telegra.ph"
"/Vsyo-o-druzyah-moej-sestricy-05-27",
"slug": "Vsyo-o-druzyah-moej-sestricy-05-27",
"title": "Всё о друзьях моей сестрицы",
},
}),
)
def metadata(self, page):
@ -79,11 +95,12 @@ class TelegraphGalleryExtractor(GalleryExtractor):
result = []
for figure in figures:
src, pos = text.extract(figure, 'src="', '"')
if src.startswith("/embed/"):
url, pos = text.extract(figure, 'src="', '"')
if url.startswith("/embed/"):
continue
elif url.startswith("/"):
url = self.root + url
caption, pos = text.extract(figure, "<figcaption>", "<", pos)
url = self.root + src
num += 1
result.append((url, {

Loading…
Cancel
Save