From 0dedbe759c67381c388a4d3951380d0359118139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 12 Sep 2017 16:19:00 +0200 Subject: [PATCH] enable '--chapter-filter' The same filter infrastructure that can be applied to image URLS now also works for manga chapters and other delegated URLs. TODO: actually provide any metadata (currently supported is only deviantart and imagefap). --- gallery_dl/__init__.py | 2 +- gallery_dl/extractor/common.py | 8 ++++++-- gallery_dl/extractor/deviantart.py | 7 ++++--- gallery_dl/extractor/imagefap.py | 17 +++++++++++++---- gallery_dl/extractor/pinterest.py | 2 +- gallery_dl/extractor/pixiv.py | 2 +- gallery_dl/extractor/recursive.py | 2 +- gallery_dl/extractor/reddit.py | 2 +- gallery_dl/extractor/test.py | 2 +- gallery_dl/job.py | 24 +++++++++++++----------- gallery_dl/option.py | 2 +- 11 files changed, 43 insertions(+), 27 deletions(-) diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index ddd4eae4..73b4d4eb 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -153,7 +153,7 @@ def main(): prepare_range(args.image_range, "image") prepare_range(args.chapter_range, "chapter") prepare_filter(args.image_filter, "image") - # prepare_filter(args.chapter_filter, "chapter") + prepare_filter(args.chapter_filter, "chapter") pformat = config.get(("output", "progress"), True) if pformat and len(urls) > 1 and args.loglevel < logging.ERROR: diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 116fc36b..fa8bb990 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -161,8 +161,12 @@ class MangaExtractor(Extractor): chapters.reverse() yield Message.Version, 1 - for chapter in chapters: - yield Message.Queue, chapter + try: + for chapter, data in chapters: + yield Message.Queue, chapter, data + except ValueError: + for chapter in chapters: + yield Message.Queue, chapter, {} def login(self): """Login and set necessary cookies""" diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 662c5239..b122d7a8 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -44,8 +44,9 @@ class DeviantartExtractor(Extractor): def items(self): yield Message.Version, 1 for deviation in self.deviations(): - if isinstance(deviation, str): - yield Message.Queue, deviation + if isinstance(deviation, tuple): + url, data = deviation + yield Message.Queue, url, data continue self.prepare(deviation) @@ -159,7 +160,7 @@ class DeviantartExtractor(Extractor): def _folder_urls(self, folders, category): url = "https://{}.deviantart.com/{}/0/".format(self.user, category) - return [url + folder["name"] for folder in folders] + return [(url + folder["name"], folder) for folder in folders] class DeviantartGalleryExtractor(DeviantartExtractor): diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 86abd91a..003559a4 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -157,10 +157,12 @@ class ImagefapUserExtractor(Extractor): def items(self): yield Message.Version, 1 - for gallery in self.get_gallery_ids(): - yield Message.Queue, "http://www.imagefap.com/gallery/" + gallery + for gid, name in self.get_gallery_data(): + url = "http://www.imagefap.com/gallery/" + gid + data = {"gallery_id": int(gid), "name": name} + yield Message.Queue, url, data - def get_gallery_ids(self): + def get_gallery_data(self): """Yield all gallery_ids of a specific user""" folders = self.get_gallery_folders() url = "http://www.imagefap.com/ajax_usergallery_folder.php" @@ -168,7 +170,14 @@ class ImagefapUserExtractor(Extractor): for folder_id in folders: params["id"] = folder_id page = self.request(url, params=params).text - yield from text.extract_iter(page, '", "<", pos) + yield gid, name def get_gallery_folders(self): """Create a list of all folder_ids of a specific user""" diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index f7e49cb8..a5a34472 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -129,7 +129,7 @@ class PinterestPinitExtractor(PinterestExtractor): if not location or location in ("https://api.pinterest.com/None", "https://www.pinterest.com"): raise exception.NotFoundError("pin") - yield Message.Queue, location + yield Message.Queue, location, {} class PinterestAPI(): diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 301801f9..233310c5 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -174,7 +174,7 @@ class PixivMeExtractor(PixivExtractor): if response.status_code == 404: raise exception.NotFoundError("user") yield Message.Version, 1 - yield Message.Queue, response.headers["Location"] + yield Message.Queue, response.headers["Location"], {} class PixivWorkExtractor(PixivExtractor): diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py index d7b33419..4b1b404b 100644 --- a/gallery_dl/extractor/recursive.py +++ b/gallery_dl/extractor/recursive.py @@ -33,4 +33,4 @@ class RecursiveExtractor(Extractor): yield Message.Version, 1 with extractor.blacklist(blist): for match in re.finditer(r"https?://[^\s\"']+", page): - yield Message.Queue, match.group(0) + yield Message.Queue, match.group(0), {} diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 171fba6a..50c0928b 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -45,7 +45,7 @@ class RedditExtractor(Extractor): if match: extra.append(match.group(1)) else: - yield Message.Queue, url + yield Message.Queue, url, {} if not extra or depth == self.max_depth: return diff --git a/gallery_dl/extractor/test.py b/gallery_dl/extractor/test.py index 36e6a6ae..80710ab0 100644 --- a/gallery_dl/extractor/test.py +++ b/gallery_dl/extractor/test.py @@ -69,7 +69,7 @@ class TestExtractor(Extractor): yield Message.Version, 1 for test in tests: - yield Message.Queue, test[0] + yield Message.Queue, test[0], {} @staticmethod def __contains__(_): diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 42894401..4e321015 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -96,8 +96,9 @@ class Job(): self.handle_directory(msg[1]) elif msg[0] == Message.Queue: - if self.pred_queue(msg[1], None): - self.handle_queue(msg[1]) + _, url, kwds = msg + if self.pred_queue(url, kwds): + self.handle_queue(url, kwds) elif msg[0] == Message.Version: if msg[1] != 1: @@ -112,7 +113,7 @@ class Job(): def handle_directory(self, keywords): """Handle Message.Directory""" - def handle_queue(self, url): + def handle_queue(self, url, keywords): """Handle Message.Queue""" def update_kwdict(self, kwdict): @@ -147,7 +148,7 @@ class DownloadJob(Job): """Set and create the target directory for downloads""" self.pathfmt.set_directory(keywords) - def handle_queue(self, url): + def handle_queue(self, url, keywords): try: DownloadJob(url).run() except exception.NoExtractorError: @@ -181,10 +182,10 @@ class KeywordJob(Job): print("-----------------------------") self.print_keywords(keywords) - def handle_queue(self, url): - print("This extractor transfers work to other extractors and does not " - "provide any keywords on its own. Try " - "'gallery-dl --list-keywords \"", url, "\"' instead.", sep="") + def handle_queue(self, url, keywords): + print("Keywords for chapter filters:") + print("-----------------------------") + self.print_keywords(keywords) raise exception.StopExtraction() @staticmethod @@ -218,13 +219,13 @@ class UrlJob(Job): Job.__init__(self, url) self.depth = depth if depth == self.maxdepth: - self.handle_queue = print + self.handle_queue = self.handle_url @staticmethod def handle_url(url, _): print(url) - def handle_queue(self, url): + def handle_queue(self, url, _): try: UrlJob(url, self.depth + 1).run() except exception.NoExtractorError: @@ -277,8 +278,9 @@ class TestJob(DownloadJob): def handle_directory(self, keywords): self.update_keyword(keywords) - def handle_queue(self, url): + def handle_queue(self, url, keywords): self.update_url(url) + self.update_keyword(keywords) def update_url(self, url): """Update the URL hash""" diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 3c554b6d..fc0c0b85 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -213,7 +213,7 @@ def build_parser(): selection.add_argument( "--chapter-filter", metavar="EXPR", dest="chapter_filter", - help="Same as '--filter' except for chapters (not yet implemented)", + help="Same as '--filter' except for chapters", ) parser.add_argument(