implement '--images' and '--chapters' options

- the former '--items' has been renamed to '--chapters'
- #6
pull/13/head
Mike Fährmann 8 years ago
parent 38aa496ecb
commit 6208d9dd79
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -52,11 +52,17 @@ def build_cmdline_parser():
help="download URLs found in local FILE",
)
parser.add_argument(
"--items",
metavar="ITEM-SPEC", dest="items",
help=("specify which items to download through a comma seperated list "
"of indices or index-ranges; for example '--items -2,4,6-8,10-' "
"will download items 1, 2, 4, 6, 7, 8 and 10 up to the last one")
"--images",
metavar="ITEM-SPEC", dest="images",
help=("specify which images to download through a comma seperated list"
" of indices or index-ranges; "
"for example '--images -2,4,6-8,10-' will download images with "
"index 1, 2, 4, 6, 7, 8 and 10 up to the last one")
)
parser.add_argument(
"--chapters",
metavar="ITEM-SPEC", dest="chapters",
help=("same as '--images' except for chapters")
)
parser.add_argument(
"-c", "--config",
@ -127,8 +133,10 @@ def main():
config.set(("username",), args.username)
if args.password:
config.set(("password",), args.password)
if args.items:
config.set(("items",), args.items)
if args.images:
config.set(("images",), args.images)
if args.chapters:
config.set(("chapters",), args.chapters)
for opt in args.option:
parse_option(opt)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015, 2016 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -21,3 +21,7 @@ class AuthorizationError(Exception):
class NotFoundError(Exception):
"""Requested resource (gallery/image) does not exist"""
class StopExtraction(Exception):
"""Extraction should stop"""

@ -20,32 +20,41 @@ class Job():
if self.extractor is None:
raise exception.NoExtractorError(url)
def run(self):
"""Execute or run the job"""
for msg in self.extractor:
if msg[0] == Message.Url:
self.update_kwdict(msg[2])
self.handle_url(msg[1], msg[2])
elif msg[0] == Message.Directory:
self.update_kwdict(msg[1])
self.handle_directory(msg[1])
elif msg[0] == Message.Queue:
self.handle_queue(msg[1])
items = config.get(("images",))
self.pred_url = util.RangePredicate(items) if items else True
elif msg[0] == Message.Headers:
self.handle_headers(msg[1])
items = config.get(("chapters",))
self.pred_queue = util.RangePredicate(items) if items else True
elif msg[0] == Message.Cookies:
self.handle_cookies(msg[1])
elif msg[0] == Message.Version:
if msg[1] != 1:
raise "unsupported message-version ({}, {})".format(
self.extractor.category, msg[1]
)
# TODO: support for multiple message versions
def run(self):
"""Execute or run the job"""
try:
for msg in self.extractor:
if msg[0] == Message.Url and self.pred_url:
self.update_kwdict(msg[2])
self.handle_url(msg[1], msg[2])
elif msg[0] == Message.Directory:
self.update_kwdict(msg[1])
self.handle_directory(msg[1])
elif msg[0] == Message.Queue and self.pred_queue:
self.handle_queue(msg[1])
elif msg[0] == Message.Headers:
self.handle_headers(msg[1])
elif msg[0] == Message.Cookies:
self.handle_cookies(msg[1])
elif msg[0] == Message.Version:
if msg[1] != 1:
raise "unsupported message-version ({}, {})".format(
self.extractor.category, msg[1]
)
# TODO: support for multiple message versions
except exception.StopExtraction:
pass
def handle_url(self, url, kexwords):
"""Handle Message.Url"""
@ -81,9 +90,6 @@ class DownloadJob(Job):
def run(self):
Job.run(self)
if self.queue:
itemspec = config.get(("items",))
if itemspec:
self.queue = util.apply_range(self.queue, str(itemspec))
for url in self.queue:
try:
DownloadJob(url).run()

@ -8,40 +8,51 @@
"""Utility functions"""
def apply_range(iterable, rangespec):
"""Return a new iterable containing only the items specified in the given
integer range
"""
try:
maxval = len(iterable)
except TypeError:
maxval = 0
rset = parse_range(rangespec, maxval)
return (
item
for index, item in enumerate(iterable, 1)
if index in rset
)
import sys
from . import exception
def parse_range(rangespec, maxval=0):
"""Parse an integer range and return the resulting set
def parse_range(rangespec):
"""Parse an integer range and return the resulting ranges and upper limit
Examples
parse_range("-2,4,6-8,10-", 12) -> set(1, 2, 4, 6, 7, 8, 10, 11, 12)
parse_range(" - 3 , 4- 4, 6-2") -> set(1, 2, 3, 4)
parse_range("-2,4,6-8,10-")
-> [(1,2), (4,4), (6,8), (10,INTMAX)], INTMAX
parse_range(" - 3 , 4- 4, 2-6")
-> [(1,3), (4,4), (2,6)], 6
"""
result = set()
ranges = []
limit = 0
for group in rangespec.split(","):
parts = group.split("-", maxsplit=1)
try:
if len(parts) == 1:
result.add(int(parts[0]))
beg = int(parts[0])
end = beg
else:
beg = int(parts[0]) if parts[0].strip() else 1
end = int(parts[1]) if parts[1].strip() else maxval
result.update(range(beg, end+1))
end = int(parts[1]) if parts[1].strip() else sys.maxsize
ranges.append((beg, end))
limit = max(limit, end)
except ValueError:
pass
return result
return ranges, limit
class RangePredicate():
"""Predicate; is True if the current index is in the given range"""
def __init__(self, rangespec):
self.ranges, self.limit = parse_range(rangespec)
self.index = 0
def __bool__(self):
self.index += 1
if self.index > self.limit:
raise exception.StopExtraction()
for lower, upper in self.ranges:
if lower <= self.index <= upper:
return True
return False

@ -48,7 +48,7 @@ def _generate_test(extr, tcase):
# enable selective testing for direct calls
skip = ["3dbooru", "exhentai", "kissmanga", "mangafox"]
skip = ["4chan", "exhentai", "kissmanga", "mangafox"]
if __name__ == '__main__' and len(sys.argv) > 1:
extractors = [
extr for extr in extractor.extractors()

Loading…
Cancel
Save