[archivedmoe] fix thebarchive webm URLs (#5116)

pull/5124/head
Mike Fährmann 8 months ago
parent 34a4ddc399
commit 1f7101d606
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -24,6 +24,8 @@ class FoolfuukaExtractor(BaseExtractor):
BaseExtractor.__init__(self, match)
if self.category == "b4k":
self.remote = self._remote_direct
elif self.category == "archivedmoe":
self.referer = False
def items(self):
yield Message.Directory, self.metadata()
@ -53,9 +55,12 @@ class FoolfuukaExtractor(BaseExtractor):
def remote(self, media):
"""Resolve a remote media link"""
needle = '<meta http-equiv="Refresh" content="0; url='
page = self.request(media["remote_media_link"]).text
return text.extr(page, needle, '"')
url = text.extr(page, 'http-equiv="Refresh" content="0; url=', '"')
if url.endswith(".webm") and \
url.startswith("https://thebarchive.com/"):
return url[:-1]
return url
@staticmethod
def _remote_direct(media):

@ -23,6 +23,18 @@ __tests__ = (
"#sha1_url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
},
{
"#url" : "https://archived.moe/b/thread/912594917/",
"#comment" : "broken thebarchive .webm URLs (#5116)",
"#category": ("foolfuuka", "archivedmoe", "thread"),
"#class" : foolfuuka.FoolfuukaThreadExtractor,
"#urls" : (
"https://thebarchive.com/b/full_image/1705625299234839.gif",
"https://thebarchive.com/b/full_image/1705625431133806.web",
"https://thebarchive.com/b/full_image/1705626190307840.web",
),
},
{
"#url" : "https://archived.moe/gd/",
"#category": ("foolfuuka", "archivedmoe", "board"),

Loading…
Cancel
Save