[nitter] handle base64-encoded filenames

pull/3353/head
Mike Fährmann 2 years ago
parent b14b33f19e
commit 4287a93202
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -10,6 +10,7 @@
from .common import BaseExtractor, Message
from .. import text
import binascii
class NitterExtractor(BaseExtractor):
@ -46,7 +47,13 @@ class NitterExtractor(BaseExtractor):
for url in text.extract_iter(
attachments, 'href="', '"'):
name = url.rpartition("%2F")[2]
if "/enc/" in url:
name = binascii.a2b_base64(url.rpartition(
"/")[2]).decode().rpartition("/")[2]
else:
name = url.rpartition("%2F")[2]
if url[0] == "/":
url = self.root + url
file = {"url": url}
@ -64,7 +71,13 @@ class NitterExtractor(BaseExtractor):
else:
for url in text.extract_iter(
attachments, 'data-url="', '"'):
name = url.rpartition("%2F")[2]
if "/enc/" in url:
name = binascii.a2b_base64(url.rpartition(
"/")[2]).decode().rpartition("/")[2]
else:
name = url.rpartition("%2F")[2]
if url[0] == "/":
url = self.root + url
append({
@ -400,7 +413,9 @@ class NitterTweetExtractor(NitterExtractor):
}),
# quoted tweet (#526, #854)
("https://nitter.1d4.us/StobiesGalaxy/status/1270755918330896395", {
"pattern": r"https://nitter\.1d4\.us/pic/orig/media%2FEaK.+\.jpg",
"pattern": r"https://nitter\.1d4\.us/pic/orig"
r"/enc/bWVkaWEvRWFL\w+LmpwZw==",
"keyword": {"filename": r"re:EaK.{12}"},
"count": 4,
}),
# deleted quote tweet (#2225)
@ -413,7 +428,6 @@ class NitterTweetExtractor(NitterExtractor):
}),
# age-restricted (#2354)
("https://nitter.unixfox.eu/mightbecurse/status/1492954264909479936", {
"options": (("syndication", True),),
"keywords": {"date": "dt:2022-02-13 20:10:09"},
"count": 1,
}),

Loading…
Cancel
Save