[twitter] add 'user' extractor and 'include' option (#4275)

pull/4403/head
Mike Fährmann 1 year ago
parent 86be197d11
commit 20ed647f6f
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -3102,6 +3102,32 @@ Description
`syndication <extractor.twitter.syndication_>`__ API.
extractor.twitter.include
-------------------------
Type
* ``string``
* ``list`` of ``strings``
Default
``"timeline"``
Example
* ``"avatar,background,media"``
* ``["avatar", "background", "media"]``
Description
A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
``"avatar"``,
``"background"``,
``"timeline"``,
``"tweets"``,
``"media"``,
``"replies"``,
``"likes"``.
It is possible to use ``"all"`` instead of listing all values separately.
extractor.twitter.transform
---------------------------
Type

@ -874,7 +874,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Twitter</td>
<td>https://twitter.com/</td>
<td>Avatars, Backgrounds, Bookmarks, Events, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets</td>
<td>Avatars, Backgrounds, Bookmarks, Events, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles</td>
<td>Supported</td>
</tr>
<tr>

@ -461,23 +461,18 @@ class TwitterExtractor(Extractor):
self._update_cookies(_login_impl(self, username, password))
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for a Twitter user timeline"""
subcategory = "timeline"
class TwitterUserExtractor(TwitterExtractor):
"""Extractor for a Twitter user"""
subcategory = "user"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
test = (
("https://twitter.com/supernaturepics", {
"range": "1-40",
"url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
}),
# suspended account (#2216)
("https://twitter.com/OptionalTypo", {
"exception": exception.NotFoundError,
}),
# suspended account user ID
("https://twitter.com/id:772949683521978368", {
"exception": exception.NotFoundError,
"options": (("include", "all"),),
"pattern": r"https://twitter\.com/supernaturepics"
r"/(photo|header_photo|timeline|tweets"
r"|media|with_replies|likes)$",
"count": 7,
}),
("https://mobile.twitter.com/supernaturepics?p=i"),
("https://www.twitter.com/id:2976459548"),
@ -493,6 +488,40 @@ class TwitterTimelineExtractor(TwitterExtractor):
if user_id:
self.user = "id:" + user_id
def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((
(TwitterAvatarExtractor , base + "photo"),
(TwitterBackgroundExtractor, base + "header_photo"),
(TwitterTimelineExtractor , base + "timeline"),
(TwitterTweetsExtractor , base + "tweets"),
(TwitterMediaExtractor , base + "media"),
(TwitterRepliesExtractor , base + "with_replies"),
(TwitterLikesExtractor , base + "likes"),
), ("timeline",))
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for a Twitter user timeline"""
subcategory = "timeline"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
test = (
("https://twitter.com/supernaturepics/timeline", {
"range": "1-40",
"url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
}),
# suspended account (#2216)
("https://twitter.com/OptionalTypo/timeline", {
"exception": exception.NotFoundError,
}),
# suspended account user ID
("https://twitter.com/id:772949683521978368/timeline", {
"exception": exception.NotFoundError,
}),
("https://mobile.twitter.com/supernaturepics/timeline#t"),
("https://www.twitter.com/id:2976459548/timeline"),
)
def tweets(self):
# yield initial batch of (media) tweets
tweet = None

Loading…
Cancel
Save