[twitter] resolve t.co URLs in 'content' (#1532)

pull/1558/head
Mike Fährmann 3 years ago
parent 2b5d80862e
commit 41457dbb1b
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -168,7 +168,6 @@ class TwitterExtractor(Extractor):
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"user" : self._transform_user(tweet["user"]),
"lang" : tweet["lang"],
"content" : tweet["full_text"],
"favorite_count": tweet["favorite_count"],
"quote_count" : tweet["quote_count"],
"reply_count" : tweet["reply_count"],
@ -187,6 +186,13 @@ class TwitterExtractor(Extractor):
"nick": u["name"],
} for u in mentions]
content = tweet["full_text"]
urls = entities.get("urls")
if urls:
for url in urls:
content = content.replace(url["url"], url["expanded_url"])
tdata["content"] = content
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]

Loading…
Cancel
Save