[nitter] add 'videos' option (#3279)

with the same semantics as for twitter
pull/3307/head
Mike Fährmann 2 years ago
parent 8c4e21b110
commit e31d12139c
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1860,6 +1860,20 @@ Description
You can use ``"all"`` instead of listing all values separately.
extractor.nitter.videos
-----------------------
Type
``bool`` or ``string``
Default
``true``
Description
Control video download behavior.
* ``true``: Download videos
* ``"ytdl"``: Download videos using `youtube-dl`_
* ``false``: Skip video Tweets
extractor.oauth.browser
-----------------------
Type

@ -204,6 +204,9 @@
"password": null,
"include": "illustration,doujin"
},
"nitter": {
"videos": true
},
"oauth":
{
"browser": true,

@ -24,31 +24,47 @@ class NitterExtractor(BaseExtractor):
self.user = match.group(match.lastindex)
def items(self):
videos = self.config("videos", True)
ytdl = (videos == "ytdl")
for tweet_html in self.tweets():
tweet = self._tweet_from_html(tweet_html)
attachments_html = tweet.pop("_attach", "")
if attachments_html:
attachments = list(text.extract_iter(
attachments_html, 'href="', '"'))
attachments.extend(text.extract_iter(
attachments_html, 'data-url="', '"'))
attachments = tweet.pop("_attach", "")
if attachments:
files = []
append = files.append
for url in text.extract_iter(
attachments, 'href="', '"'):
if url[0] == "/":
url = self.root + url
append({"url": url})
if videos and not files:
if ytdl:
append({
"url": "ytdl:{}/i/status/{}".format(
self.root, tweet["tweet_id"]),
"extension": None,
})
else:
for url in text.extract_iter(
attachments, 'data-url="', '"'):
if url[0] == "/":
url = self.root + url
append({"url": "ytdl:" + url})
else:
attachments = ()
tweet["count"] = len(attachments)
files = ()
tweet["count"] = len(files)
yield Message.Directory, tweet
for tweet["num"], url in enumerate(attachments, 1):
if url[0] == "/":
url = self.root + url
if "/video/" in url:
url = "ytdl:" + url
tweet["filename"] = url.rpartition(
"%2F")[2].partition(".")[0]
tweet["extension"] = "mp4"
else:
text.nameext_from_url(url, tweet)
yield Message.Url, url, tweet
for tweet["num"], file in enumerate(files, 1):
url = file["url"]
file.update(tweet)
if "extension" not in file:
text.nameext_from_url(url, file)
yield Message.Url, url, file
def _tweet_from_html(self, html):
extr = text.extract_from(html)

Loading…
Cancel
Save