make --filter and --range config-file options

The functionality of --(chapter-)filter and --(chapter-)range are now
also exposed as the following config-file options:

- extractor.*.image-filter
- extractor.*.image-range
- extractor.*.chapter-filter
- extractor.*.chapter-range

TODO: update configuration.rst
pull/133/head
Mike Fährmann 6 years ago
parent 4a348990f4
commit 0514d6a0ae
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -116,25 +116,6 @@ def progress(urls, pformat):
yield pinfo["url"]
def prepare_range(rangespec, target):
if rangespec:
range = util.optimize_range(util.parse_range(rangespec))
if range:
config.set(("_", target, "range"), range)
else:
log.warning("invalid/empty %s range", target)
def prepare_filter(filterexpr, target):
if filterexpr:
try:
name = "<{} filter>".format(target)
codeobj = compile(filterexpr, name, "eval")
config.set(("_", target, "filter"), codeobj)
except (SyntaxError, ValueError, TypeError) as exc:
log.warning(exc)
def parse_inputfile(file):
"""Filter and process strings from an input file.
@ -317,11 +298,6 @@ def main():
ulog.propagate = False
job.Job.ulog = ulog
prepare_range(args.image_range, "image")
prepare_range(args.chapter_range, "chapter")
prepare_filter(args.image_filter, "image")
prepare_filter(args.chapter_filter, "chapter")
pformat = config.get(("output", "progress"), True)
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
urls = progress(urls, pformat)

@ -29,25 +29,12 @@ class Job():
"Using %s for '%s'", self.extractor.__class__.__name__, url)
# url predicates
predicates = [util.UniquePredicate()]
image = config.get(("_", "image"), {})
if "filter" in image:
predicates.append(util.FilterPredicate(image["filter"]))
if "range" in image:
pred = util.RangePredicate(image["range"])
if pred.lower > 1 and "filter" not in image:
pred.index += self.extractor.skip(pred.lower - 1)
predicates.append(pred)
self.pred_url = util.build_predicate(predicates)
self.pred_url = self._prepare_predicates(
"image", [util.UniquePredicate()], True)
# queue predicates
predicates = []
chapter = config.get(("_", "chapter"), {})
if "filter" in chapter:
predicates.append(util.FilterPredicate(chapter["filter"]))
if "range" in chapter:
predicates.append(util.RangePredicate(chapter["range"]))
self.pred_queue = util.build_predicate(predicates)
self.pred_queue = self._prepare_predicates(
"chapter", [], False)
# category transfer
if parent and parent.extractor.categorytransfer:
@ -148,6 +135,30 @@ class Job():
if self.userkwds:
kwdict.update(self.userkwds)
def _prepare_predicates(self, target, predicates, skip=True):
pfilter = self.extractor.config(target + "-filter")
if pfilter:
try:
pred = util.FilterPredicate(pfilter, target)
except (SyntaxError, ValueError, TypeError) as exc:
self.extractor.log.warning(exc)
else:
predicates.append(pred)
prange = self.extractor.config(target + "-range")
if prange:
try:
pred = util.RangePredicate(prange)
except ValueError as exc:
self.extractor.log.warning(
"invalid %s range: %s", target, exc)
else:
if skip and pred.lower > 1 and not pfilter:
pred.index += self.extractor.skip(pred.lower - 1)
predicates.append(pred)
return util.build_predicate(predicates)
def _write_unsupported(self, url):
if self.ulog:
self.ulog.info(url)

@ -134,7 +134,7 @@ def build_parser():
output.add_argument(
"--write-log",
metavar="FILE", dest="logfile", action=ConfigAction,
help=("Write logging output to FILE"),
help="Write logging output to FILE",
)
output.add_argument(
"--write-unsupported",
@ -183,18 +183,18 @@ def build_parser():
help="Additional configuration files",
)
configuration.add_argument(
"--config-yaml",
metavar="CFG", dest="yamlfiles", action="append",
help="Additional configuration files (YAML format)",
"-o", "--option",
metavar="OPT", action=ParseAction, dest="options", default=[],
help="Additional '<key>=<value>' option values",
)
configuration.add_argument(
"--ignore-config", dest="load_config", action="store_false",
help="Do not read the default configuration files",
)
configuration.add_argument(
"-o", "--option",
metavar="OPT", action=ParseAction, dest="options", default=[],
help="Additional '<key>=<value>' option values",
"--config-yaml",
metavar="CFG", dest="yamlfiles", action="append",
help=argparse.SUPPRESS,
)
authentication = parser.add_argument_group("Authentication Options")
@ -223,31 +223,30 @@ def build_parser():
)
selection.add_argument(
"--range",
metavar="RANGE", dest="image_range",
help=("Specify which images to download through a comma seperated list"
" of indices or index-ranges; "
"for example '--range -2,4,6-8,10-' will download images with "
metavar="RANGE", dest="image-range", action=ConfigAction,
help=("Index-range specifying which images to download. "
"For example '--range -2,4,6-8,10-' will download images with "
"index 1, 2, 4, 6, 7, 8 and 10 up to the last one"),
)
selection.add_argument(
"--chapter-range",
metavar="RANGE", dest="chapter_range",
help=("Same as '--range' except for chapters "
metavar="RANGE", dest="chapter-range", action=ConfigAction,
help=("Like '--range', but applies to manga-chapters "
"and other delegated URLs"),
)
selection.add_argument(
"--filter",
metavar="EXPR", dest="image_filter",
help=("Python expression controlling which images to download. Images "
"for which the expression evaluates to False are ignored. "
metavar="EXPR", dest="image-filter", action=ConfigAction,
help=("Python expression controlling which images to download. "
"Files for which the expression evaluates to False are ignored. "
"Available keys are the filename-specific ones listed by '-K'. "
"Example: --filter \"image_width >= 1000 and "
"rating in ('s', 'q')\""),
)
selection.add_argument(
"--chapter-filter",
metavar="EXPR", dest="chapter_filter",
help=("Same as '--filter' except for chapters "
metavar="EXPR", dest="chapter-filter", action=ConfigAction,
help=("Like '--filter', but applies to manga-chapters "
"and other delegated URLs"),
)

@ -22,55 +22,6 @@ import urllib.parse
from . import text, exception
def parse_range(rangespec):
"""Parse an integer range string and return the resulting ranges
Examples
parse_range("-2,4,6-8,10-") -> [(1,2), (4,4), (6,8), (10,INTMAX)]
parse_range(" - 3 , 4- 4, 2-6") -> [(1,3), (4,4), (2,6)]
"""
ranges = []
for group in rangespec.split(","):
first, sep, last = group.partition("-")
try:
if not sep:
beg = end = int(first)
else:
beg = int(first) if first.strip() else 1
end = int(last) if last.strip() else sys.maxsize
ranges.append((beg, end) if beg <= end else (end, beg))
except ValueError:
pass
return ranges
def optimize_range(ranges):
"""Simplify/Combine a parsed list of ranges
Examples
optimize_range([(2,4), (4,6), (5,8)]) -> [(2,8)]
optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)]
"""
if len(ranges) <= 1:
return ranges
ranges.sort()
riter = iter(ranges)
result = []
beg, end = next(riter)
for lower, upper in riter:
if lower > end+1:
result.append((beg, end))
beg, end = lower, upper
elif upper > end:
end = upper
result.append((beg, end))
return result
def bencode(num, alphabet="0123456789"):
"""Encode an integer into a base-N encoded string"""
data = ""
@ -186,9 +137,10 @@ def build_predicate(predicates):
class RangePredicate():
"""Predicate; True if the current index is in the given range"""
def __init__(self, ranges):
self.ranges = ranges
def __init__(self, rangespec):
self.ranges = self.optimize_range(self.parse_range(rangespec))
self.index = 0
if self.ranges:
self.lower, self.upper = self.ranges[0][0], self.ranges[-1][1]
else:
@ -205,6 +157,54 @@ class RangePredicate():
return True
return False
@staticmethod
def parse_range(rangespec):
"""Parse an integer range string and return the resulting ranges
Examples:
parse_range("-2,4,6-8,10-") -> [(1,2), (4,4), (6,8), (10,INTMAX)]
parse_range(" - 3 , 4- 4, 2-6") -> [(1,3), (4,4), (2,6)]
"""
ranges = []
for group in rangespec.split(","):
if not group:
continue
first, sep, last = group.partition("-")
if not sep:
beg = end = int(first)
else:
beg = int(first) if first.strip() else 1
end = int(last) if last.strip() else sys.maxsize
ranges.append((beg, end) if beg <= end else (end, beg))
return ranges
@staticmethod
def optimize_range(ranges):
"""Simplify/Combine a parsed list of ranges
Examples:
optimize_range([(2,4), (4,6), (5,8)]) -> [(2,8)]
optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)]
"""
if len(ranges) <= 1:
return ranges
ranges.sort()
riter = iter(ranges)
result = []
beg, end = next(riter)
for lower, upper in riter:
if lower > end+1:
result.append((beg, end))
beg, end = lower, upper
elif upper > end:
end = upper
result.append((beg, end))
return result
class UniquePredicate():
"""Predicate; True if given URL has not been encountered before"""
@ -230,8 +230,9 @@ class FilterPredicate():
"re": re,
}
def __init__(self, codeobj):
self.codeobj = codeobj
def __init__(self, filterexpr, target="image"):
name = "<{} filter>".format(target)
self.codeobj = compile(filterexpr, name, "eval")
def __call__(self, url, kwds):
try:

@ -17,38 +17,38 @@ import string
class TestRange(unittest.TestCase):
def test_parse_range(self):
def test_parse_range(self, f=util.RangePredicate.parse_range):
self.assertEqual(
util.parse_range(""),
f(""),
[])
self.assertEqual(
util.parse_range("1-2"),
f("1-2"),
[(1, 2)])
self.assertEqual(
util.parse_range("-"),
f("-"),
[(1, sys.maxsize)])
self.assertEqual(
util.parse_range("-2,4,6-8,10-"),
f("-2,4,6-8,10-"),
[(1, 2), (4, 4), (6, 8), (10, sys.maxsize)])
self.assertEqual(
util.parse_range(" - 3 , 4- 4, 2-6"),
f(" - 3 , 4- 4, 2-6"),
[(1, 3), (4, 4), (2, 6)])
def test_optimize_range(self):
def test_optimize_range(self, f=util.RangePredicate.optimize_range):
self.assertEqual(
util.optimize_range([]),
f([]),
[])
self.assertEqual(
util.optimize_range([(2, 4)]),
f([(2, 4)]),
[(2, 4)])
self.assertEqual(
util.optimize_range([(2, 4), (6, 8), (10, 12)]),
f([(2, 4), (6, 8), (10, 12)]),
[(2, 4), (6, 8), (10, 12)])
self.assertEqual(
util.optimize_range([(2, 4), (4, 6), (5, 8)]),
f([(2, 4), (4, 6), (5, 8)]),
[(2, 8)])
self.assertEqual(
util.optimize_range([(1, 1), (2, 2), (3, 6), (8, 9)]),
f([(1, 1), (2, 2), (3, 6), (8, 9)]),
[(1, 6), (8, 9)])
@ -57,13 +57,13 @@ class TestPredicate(unittest.TestCase):
def test_range_predicate(self):
dummy = None
pred = util.RangePredicate(util.parse_range(" - 3 , 4- 4, 2-6"))
pred = util.RangePredicate(" - 3 , 4- 4, 2-6")
for i in range(6):
self.assertTrue(pred(dummy, dummy))
with self.assertRaises(exception.StopExtraction):
bool(pred(dummy, dummy))
pred = util.RangePredicate(util.parse_range("1, 3, 5"))
pred = util.RangePredicate("1, 3, 5")
self.assertTrue(pred(dummy, dummy))
self.assertFalse(pred(dummy, dummy))
self.assertTrue(pred(dummy, dummy))
@ -72,7 +72,7 @@ class TestPredicate(unittest.TestCase):
with self.assertRaises(exception.StopExtraction):
bool(pred(dummy, dummy))
pred = util.RangePredicate(util.parse_range(""))
pred = util.RangePredicate("")
with self.assertRaises(exception.StopExtraction):
bool(pred(dummy, dummy))

Loading…
Cancel
Save