|
|
@ -110,16 +110,17 @@ class TwitterExtractor(Extractor):
|
|
|
|
twitpics = []
|
|
|
|
twitpics = []
|
|
|
|
for url in tweet["entities"].get("urls", ()):
|
|
|
|
for url in tweet["entities"].get("urls", ()):
|
|
|
|
url = url["expanded_url"]
|
|
|
|
url = url["expanded_url"]
|
|
|
|
if "//twitpic.com/" in url:
|
|
|
|
if "//twitpic.com/" in url and "/photos/" not in url:
|
|
|
|
response = self.request(url, fatal=False)
|
|
|
|
response = self.request(url, fatal=False)
|
|
|
|
if response.status_code >= 400:
|
|
|
|
if response.status_code >= 400:
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
url = text.extract(
|
|
|
|
url = text.extract(
|
|
|
|
response.text, 'name="twitter:image" value="', '"')[0]
|
|
|
|
response.text, 'name="twitter:image" value="', '"')[0]
|
|
|
|
twitpics.append({
|
|
|
|
if url:
|
|
|
|
"original_info": {},
|
|
|
|
twitpics.append({
|
|
|
|
"media_url" : url,
|
|
|
|
"original_info": {},
|
|
|
|
})
|
|
|
|
"media_url" : url,
|
|
|
|
|
|
|
|
})
|
|
|
|
if twitpics:
|
|
|
|
if twitpics:
|
|
|
|
if "extended_entities" in tweet:
|
|
|
|
if "extended_entities" in tweet:
|
|
|
|
tweet["extended_entities"]["media"].extend(twitpics)
|
|
|
|
tweet["extended_entities"]["media"].extend(twitpics)
|
|
|
|