From 6208d9dd79d95d8e8a31da2c79e090f26c5dd65a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 23 Feb 2017 21:51:29 +0100 Subject: [PATCH] implement '--images' and '--chapters' options - the former '--items' has been renamed to '--chapters' - #6 --- gallery_dl/__init__.py | 22 ++++++++++----- gallery_dl/exception.py | 6 ++++- gallery_dl/job.py | 60 ++++++++++++++++++++++------------------- gallery_dl/util.py | 59 +++++++++++++++++++++++----------------- test/test_extractors.py | 2 +- 5 files changed, 89 insertions(+), 60 deletions(-) diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 1f4bd70d..8e9a5d4b 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -52,11 +52,17 @@ def build_cmdline_parser(): help="download URLs found in local FILE", ) parser.add_argument( - "--items", - metavar="ITEM-SPEC", dest="items", - help=("specify which items to download through a comma seperated list " - "of indices or index-ranges; for example '--items -2,4,6-8,10-' " - "will download items 1, 2, 4, 6, 7, 8 and 10 up to the last one") + "--images", + metavar="ITEM-SPEC", dest="images", + help=("specify which images to download through a comma seperated list" + " of indices or index-ranges; " + "for example '--images -2,4,6-8,10-' will download images with " + "index 1, 2, 4, 6, 7, 8 and 10 up to the last one") + ) + parser.add_argument( + "--chapters", + metavar="ITEM-SPEC", dest="chapters", + help=("same as '--images' except for chapters") ) parser.add_argument( "-c", "--config", @@ -127,8 +133,10 @@ def main(): config.set(("username",), args.username) if args.password: config.set(("password",), args.password) - if args.items: - config.set(("items",), args.items) + if args.images: + config.set(("images",), args.images) + if args.chapters: + config.set(("chapters",), args.chapters) for opt in args.option: parse_option(opt) diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py index 8a9d5bc9..24769064 100644 --- a/gallery_dl/exception.py +++ b/gallery_dl/exception.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015, 2016 Mike Fährmann +# Copyright 2015-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -21,3 +21,7 @@ class AuthorizationError(Exception): class NotFoundError(Exception): """Requested resource (gallery/image) does not exist""" + + +class StopExtraction(Exception): + """Extraction should stop""" diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 8fac59a0..02e6ccb8 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -20,32 +20,41 @@ class Job(): if self.extractor is None: raise exception.NoExtractorError(url) - def run(self): - """Execute or run the job""" - for msg in self.extractor: - if msg[0] == Message.Url: - self.update_kwdict(msg[2]) - self.handle_url(msg[1], msg[2]) - - elif msg[0] == Message.Directory: - self.update_kwdict(msg[1]) - self.handle_directory(msg[1]) - - elif msg[0] == Message.Queue: - self.handle_queue(msg[1]) + items = config.get(("images",)) + self.pred_url = util.RangePredicate(items) if items else True - elif msg[0] == Message.Headers: - self.handle_headers(msg[1]) + items = config.get(("chapters",)) + self.pred_queue = util.RangePredicate(items) if items else True - elif msg[0] == Message.Cookies: - self.handle_cookies(msg[1]) - - elif msg[0] == Message.Version: - if msg[1] != 1: - raise "unsupported message-version ({}, {})".format( - self.extractor.category, msg[1] - ) - # TODO: support for multiple message versions + def run(self): + """Execute or run the job""" + try: + for msg in self.extractor: + if msg[0] == Message.Url and self.pred_url: + self.update_kwdict(msg[2]) + self.handle_url(msg[1], msg[2]) + + elif msg[0] == Message.Directory: + self.update_kwdict(msg[1]) + self.handle_directory(msg[1]) + + elif msg[0] == Message.Queue and self.pred_queue: + self.handle_queue(msg[1]) + + elif msg[0] == Message.Headers: + self.handle_headers(msg[1]) + + elif msg[0] == Message.Cookies: + self.handle_cookies(msg[1]) + + elif msg[0] == Message.Version: + if msg[1] != 1: + raise "unsupported message-version ({}, {})".format( + self.extractor.category, msg[1] + ) + # TODO: support for multiple message versions + except exception.StopExtraction: + pass def handle_url(self, url, kexwords): """Handle Message.Url""" @@ -81,9 +90,6 @@ class DownloadJob(Job): def run(self): Job.run(self) if self.queue: - itemspec = config.get(("items",)) - if itemspec: - self.queue = util.apply_range(self.queue, str(itemspec)) for url in self.queue: try: DownloadJob(url).run() diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 86151802..c1fe005e 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -8,40 +8,51 @@ """Utility functions""" - -def apply_range(iterable, rangespec): - """Return a new iterable containing only the items specified in the given - integer range - """ - try: - maxval = len(iterable) - except TypeError: - maxval = 0 - rset = parse_range(rangespec, maxval) - return ( - item - for index, item in enumerate(iterable, 1) - if index in rset - ) +import sys +from . import exception -def parse_range(rangespec, maxval=0): - """Parse an integer range and return the resulting set +def parse_range(rangespec): + """Parse an integer range and return the resulting ranges and upper limit Examples - parse_range("-2,4,6-8,10-", 12) -> set(1, 2, 4, 6, 7, 8, 10, 11, 12) - parse_range(" - 3 , 4- 4, 6-2") -> set(1, 2, 3, 4) + parse_range("-2,4,6-8,10-") + -> [(1,2), (4,4), (6,8), (10,INTMAX)], INTMAX + + parse_range(" - 3 , 4- 4, 2-6") + -> [(1,3), (4,4), (2,6)], 6 """ - result = set() + ranges = [] + limit = 0 for group in rangespec.split(","): parts = group.split("-", maxsplit=1) try: if len(parts) == 1: - result.add(int(parts[0])) + beg = int(parts[0]) + end = beg else: beg = int(parts[0]) if parts[0].strip() else 1 - end = int(parts[1]) if parts[1].strip() else maxval - result.update(range(beg, end+1)) + end = int(parts[1]) if parts[1].strip() else sys.maxsize + ranges.append((beg, end)) + limit = max(limit, end) except ValueError: pass - return result + return ranges, limit + + +class RangePredicate(): + """Predicate; is True if the current index is in the given range""" + def __init__(self, rangespec): + self.ranges, self.limit = parse_range(rangespec) + self.index = 0 + + def __bool__(self): + self.index += 1 + + if self.index > self.limit: + raise exception.StopExtraction() + + for lower, upper in self.ranges: + if lower <= self.index <= upper: + return True + return False diff --git a/test/test_extractors.py b/test/test_extractors.py index e8293a8d..84f4bb9f 100644 --- a/test/test_extractors.py +++ b/test/test_extractors.py @@ -48,7 +48,7 @@ def _generate_test(extr, tcase): # enable selective testing for direct calls -skip = ["3dbooru", "exhentai", "kissmanga", "mangafox"] +skip = ["4chan", "exhentai", "kissmanga", "mangafox"] if __name__ == '__main__' and len(sys.argv) > 1: extractors = [ extr for extr in extractor.extractors()