[twitter] add 'avatar' and 'background' extractors (#349, #3023)

pull/3287/head
Mike Fährmann 2 years ago
parent 46d811bac0
commit 08fd1ff835
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -832,7 +832,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Twitter</td>
<td>https://twitter.com/</td>
<td>Bookmarks, Events, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets</td>
<td>Avatars, Backgrounds, Bookmarks, Events, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets</td>
<td>Supported</td>
</tr>
<tr>

@ -374,6 +374,24 @@ class TwitterExtractor(Extractor):
except Exception:
yield tweet
def _make_tweet(self, user, id_str, url, timestamp):
return {
"created_at": text.parse_timestamp(timestamp).strftime(
"%a %b %d %H:%M:%S +0000 %Y"),
"id_str": id_str,
"lang": None,
"user": user,
"entities": {},
"extended_entities": {
"media": [
{
"original_info": {},
"media_url": url,
},
],
},
}
def metadata(self):
"""Return general metadata"""
return {}
@ -842,6 +860,76 @@ class TwitterTweetExtractor(TwitterExtractor):
return itertools.chain(buffer, tweets)
class TwitterAvatarExtractor(TwitterExtractor):
subcategory = "avatar"
filename_fmt = "avatar {date}.{extension}"
archive_fmt = "AV_{user[id]}_{date}"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo"
test = (
("https://twitter.com/supernaturepics/photo", {
"pattern": r"https://pbs\.twimg\.com/profile_images"
r"/554585280938659841/FLVAlX18\.jpeg",
"keyword": {
"date": "dt:2015-01-12 10:26:49",
"extension": "jpeg",
"filename": "FLVAlX18",
"tweet_id": 554585280938659841,
},
}),
("https://twitter.com/User16/photo", {
"count": 0,
}),
)
def tweets(self):
self.api._user_id_by_screen_name(self.user)
user = self._user_obj
url = user["legacy"]["profile_image_url_https"]
if url == ("https://abs.twimg.com/sticky"
"/default_profile_images/default_profile_normal.png"):
return ()
url = url.replace("_normal.", ".")
id_str = url.rsplit("/", 2)[1]
timestamp = ((int(id_str) >> 22) + 1288834974657) // 1000
return (self._make_tweet(user, id_str, url, timestamp),)
class TwitterBackgroundExtractor(TwitterExtractor):
subcategory = "background"
filename_fmt = "background {date}.{extension}"
archive_fmt = "BG_{user[id]}_{date}"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo"
test = (
("https://twitter.com/supernaturepics/header_photo", {
"pattern": r"https://pbs\.twimg\.com/profile_banners"
r"/2976459548/1421058583",
"keyword": {
"date": "dt:2015-01-12 10:29:43",
"filename": "1421058583",
"tweet_id": 0,
},
}),
("https://twitter.com/User16/header_photo", {
"count": 0,
}),
)
def tweets(self):
self.api._user_id_by_screen_name(self.user)
user = user = self._user_obj
try:
url = user["legacy"]["profile_banner_url"]
_, timestamp = url.rsplit("/", 1)
except (KeyError, ValueError):
return ()
return (self._make_tweet(user, None, url, timestamp),)
class TwitterImageExtractor(Extractor):
category = "twitter"
subcategory = "image"

Loading…
Cancel
Save