[ao3] extract detailed 'chapters' metadata (#6013)

master
Mike Fährmann 2 days ago
parent 1d7df9cef2
commit 2dfdc92b21
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -69,6 +69,12 @@ class Ao3WorkExtractor(Ao3Extractor):
url = "{}/works/{}".format(self.root, work_id)
extr = text.extract_from(self.request(url).text)
chapters = {}
cindex = extr(' id="chapter_index"', "</ul>")
for ch in text.extract_iter(cindex, ' value="', "</option>"):
cid, _, cname = ch.partition('">')
chapters[cid] = text.unescape(cname)
fmts = {}
path = ""
download = extr(' class="download"', "</ul>")
@ -102,8 +108,7 @@ class Ao3WorkExtractor(Ao3Extractor):
path.rpartition("updated_at=")[2]),
"words" : text.parse_int(
extr('<dd class="words">', "<").replace(",", "")),
"chapters" : text.parse_int(
extr('<dd class="chapters">', "/")),
"chapters" : chapters,
"comments" : text.parse_int(
extr('<dd class="comments">', "<").replace(",", "")),
"likes" : text.parse_int(

@ -16,7 +16,35 @@ __tests__ = (
"author" : "Flowers_for_ghouls",
"bookmarks": range(100, 300),
"chapters" : 27,
"chapters": {
"120506833": "1. Showtime",
"120866506": "2. A Comedy of Errors",
"121739140": "3. Gifts",
"121941313": "4. Date Night",
"123054364": "5. Breaking the News",
"123579898": "6. Isolated Events",
"124258153": "7. The Home Stretch",
"124886536": "8. Domestic Bliss",
"125335270": "9. The Offer",
"125871166": "10. The Promise",
"126223879": "11. Gifts II",
"126692398": "12. On the Move",
"127471375": "13. The Fruit Vignettes",
"128496448": "14. Respite",
"128994919": "15. Changes",
"129492154": "16. Halloween",
"130379002": "17. GIfts III",
"131066743": "18. R.A.S.B.E.W.",
"131884072": "19. The Longest Night",
"132730264": "20. Meeting the Pack",
"133714876": "21. A Mystery",
"134663854": "22. Growing Pains",
"135499822": "23. Presentation Day",
"136500946": "24. Revelations",
"137857876": "25. The Retirement Plan",
"139463056": "26. Two Birds, One Stone",
"141697141": "27. New Management",
},
"comments" : range(800, 2000),
"date" : "dt:2023-06-11 00:00:00",
"date_completed": "dt:2024-05-10 00:00:00",

Loading…
Cancel
Save