From 638a676495bb1b3c3e91f6ee13a837df321e1dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 15 Sep 2024 22:34:56 +0200 Subject: [PATCH] [ao3] add initial support (#6013) --- docs/configuration.rst | 14 +++ docs/supportedsites.md | 6 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/ao3.py | 200 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 6 + test/results/ao3.py | 191 +++++++++++++++++++++++++++++ 6 files changed, 418 insertions(+) create mode 100644 gallery_dl/extractor/ao3.py create mode 100644 test/results/ao3.py diff --git a/docs/configuration.rst b/docs/configuration.rst index 3574e2c3..b18ea3d8 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1279,6 +1279,20 @@ Extractor-specific Options ========================== +extractor.ao3.formats +--------------------- +Type + * ``string`` + * ``list`` of ``strings`` +Default + ``"pdf"`` +Example + * ``"azw3,epub,mobi,pdf,html"`` + * ``["azw3", "epub", "mobi", "pdf", "html"]`` +Description + Format(s) to download. + + extractor.artstation.external ----------------------------- Type diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 868c3b2f..d303f599 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -103,6 +103,12 @@ Consider all listed sites to potentially be NSFW. Firms, Projects + + Archive of Our Own + https://archiveofourown.org/ + Search Results, Series, Tag Searches, User Profiles, Bookmarks, Works + + ArtStation https://www.artstation.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index b6432cea..34b78c85 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -23,6 +23,7 @@ modules = [ "8muses", "adultempire", "agnph", + "ao3", "architizer", "artstation", "aryion", diff --git a/gallery_dl/extractor/ao3.py b/gallery_dl/extractor/ao3.py new file mode 100644 index 00000000..d5128a4c --- /dev/null +++ b/gallery_dl/extractor/ao3.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://archiveofourown.org/""" + +from .common import Extractor, Message +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?archiveofourown.org" + + +class Ao3Extractor(Extractor): + """Base class for ao3 extractors""" + category = "ao3" + root = "https://archiveofourown.org" + categorytransfer = True + request_interval = (0.5, 1.5) + + def items(self): + base = self.root + "/works/" + data = {"_extractor": Ao3WorkExtractor} + + for work_id in self.works(): + yield Message.Queue, base + work_id, data + + def works(self): + return self._pagination(self.groups[0]) + + def _pagination(self, path, needle='