[newgrounds] make post extraction nonfatal

pull/644/head
Mike Fährmann 5 years ago
parent 823fbeaae6
commit 87d4f83597
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -36,16 +36,17 @@ class NewgroundsExtractor(Extractor):
for post_url in self.posts():
try:
file = self.extract_post(post_url)
url = file["url"]
# except Exception:
post = self.extract_post(post_url)
url = post.get("url")
except OSError:
url = None
if not url:
self.log.warning("Unable to get download URL for %s", post_url)
continue
yield Message.Directory, file
yield Message.Url, url, text.nameext_from_url(url, file)
if url:
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post)
else:
self.log.warning(
"Unable to get download URL for '%s'", post_url)
def posts(self):
"""Return urls of all relevant image pages"""
@ -83,7 +84,10 @@ class NewgroundsExtractor(Extractor):
}
def extract_post(self, post_url):
page = self.request(post_url).text
response = self.request(post_url, fatal=False)
if response.status_code >= 400:
return {}
page = response.text
extr = text.extract_from(page)
if "/art/view/" in post_url:
@ -98,8 +102,7 @@ class NewgroundsExtractor(Extractor):
data["favorites"] = text.parse_int(extr(
'id="faves_load">', '<').replace(",", ""))
data["score"] = text.parse_float(extr('id="score_number">', '<'))
data["tags"] = text.split_html(extr(
'<dd class="tags">', '</dd>'))
data["tags"] = text.split_html(extr('<dd class="tags">', '</dd>'))
data["artist"] = [
text.extract(user, '//', '.')[0]
for user in text.extract_iter(page, '<div class="item-user">', '>')

Loading…
Cancel
Save