[yaplog] improve metadata extraction (#443)

- provide a fallback if there is no numerical image ID
- add a 'filename' field
- convert 'date' to an actual datetime object
pull/448/head
Mike Fährmann 5 years ago
parent 15af2f8464
commit d4ffd6c952
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -36,11 +36,13 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
iurl = text.extract(page, '<img src="', '"')[0]
if iurl[0] == "/":
iurl = text.urljoin(self.root, iurl)
iid, _, ext = iurl.rpartition("/")[2].rpartition(".")
name, _, ext = iurl.rpartition("/")[2].rpartition(".")
iid = name.rpartition("_")[0] or name
image = {
"url" : iurl,
"num" : num,
"id" : text.parse_int(iid.partition("_")[0]),
"id" : text.parse_int(iid, iid),
"filename" : name,
"extension": ext,
"post" : post,
}
@ -75,7 +77,7 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
"id" : text.parse_int(pid),
"title": text.unescape(title[:-3]),
"user" : self.user,
"date" : date,
"date" : text.parse_datetime(date, "%B %d [%a], %Y, %H:%M"),
}
@ -102,7 +104,7 @@ class YaplogPostExtractor(YaplogExtractor):
test = (
("https://yaplog.jp/imamiami0726/image/1299", {
"url": "896cae20fa718735a57e723c48544e830ff31345",
"keyword": "f8d8781e61c4c38238a7622d6df6c905f864e5d3",
"keyword": "22df8ad6cb534514c6bb2ff000381d156769a620",
}),
# complete image URLs (#443)
("https://yaplog.jp/msjane/archive/246", {

Loading…
Cancel
Save