change config specifiers in input file format

Instead of a dictionary/object, input file options are now specified
by a 'key=value' pair starting with '-' for options only applying to
the next URL or '-G' for Global options applying to all following URLs.

See the docstring of parse_inputfile() for details.

Example option specifiers:

- filename = "{id}.{extension}"
- extractor.pixiv.user.directory = ["Pixiv Users", "{user[id]}"]
-spaces="are_optional"
-G keywords = {"global": "option"}
pull/79/head
Mike Fährmann 7 years ago
parent f970a8f13c
commit b50bdbf3d7
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -72,25 +72,34 @@ def prepare_filter(filterexpr, target):
def parse_inputfile(file):
"""Filter and strip strings from an input file
"""Filter and process strings from an input file.
Lines starting with '#' and empty lines will be ignored.
Lines starting with '{' will be interpreted as JSON-object and
its values, while processing the next URL, are going to be
applied to the global config.
Lines starting with '-' will be interpreted as a key-value pair separated
by an '='. where 'key' is a dot-separated option name and 'value' is a
JSON-parsable value for it. These config options will be applied while
processing the next URL.
Lines starting with '-G' are the same as above, except these options will
be valid for all following URLs, i.e. they are Global.
Everything else will be used as potential URL.
Example input file:
# this is a comment
{"base-directory": "/tmp/", "skip": false}
{"more": "multiple objects before an URL will be merged together"}
# settings global options
-G base-directory = "/tmp/"
-G skip = false
# setting local options for the next URL
-filename="spaces_are_optional.jpg"
-skip = true
https://example.org/
# config is back to its initial values
# next URL uses default filename and 'skip' is false.
https://example.com/index.htm
"""
confdict = None
gconf = []
lconf = []
for line in file:
line = line.strip()
@ -99,26 +108,36 @@ def parse_inputfile(file):
# empty line or comment
continue
elif line[0] == "{":
# url-specific config spec
elif line[0] == "-":
# config spec
if len(line) >= 2 and line[1] == "G":
conf = gconf
line = line[2:]
else:
conf = lconf
line = line[1:]
key, sep, value = line.partition("=")
if not sep:
log.warning("input file: invalid <key>=<value> pair: %s", line)
continue
try:
cfd = json.loads(line)
value = json.loads(value.strip())
except ValueError as exc:
log.warning("input file: unable to parse config line: %s",exc)
log.warning("input file: unable to parse '%s': %s", value, exc)
continue
if confdict:
util.combine_dict(confdict, cfd)
else:
confdict = cfd
conf.append((key.strip().split("."), value))
else:
# url
if confdict:
yield util.ExtendedUrl(line, confdict)
if gconf or lconf:
yield util.ExtendedUrl(line, gconf, lconf)
gconf = []
lconf = []
else:
yield line
confdict = None
def main():
@ -231,7 +250,9 @@ def main():
try:
log.debug("Starting %s for '%s'", jobtype.__name__, url)
if isinstance(url, util.ExtendedUrl):
with config.apply(url.config):
for key, value in url.gconfig:
config.set(key, value)
with config.apply(url.lconfig):
jobtype(url.value).run()
else:
jobtype(url).run()

@ -126,22 +126,32 @@ def setdefault(keys, value, conf=_config):
return conf.setdefault(keys[-1], value)
def unset(keys, conf=_config):
"""Unset the value of property 'key'"""
try:
for k in keys[:-1]:
conf = conf[k]
del conf[keys[-1]]
except (KeyError, AttributeError):
pass
class apply():
"""Context Manager to apply a dict to global config"""
"""Context Manager to temporarily apply a collection of key-value pairs"""
_sentinel = object()
def __init__(self, config_dict):
self.original_values = {}
self.config_dict = config_dict
for key, value in config_dict.items():
self.original_values[key] = _config.get(key, self._sentinel)
def __init__(self, kvlist):
self.original = []
self.kvlist = kvlist
def __enter__(self):
_config.update(self.config_dict)
for key, value in self.kvlist:
self.original.append((key, get(key, self._sentinel)))
set(key, value)
def __exit__(self, etype, value, traceback):
for key, value in self.original_values.items():
for key, value in self.original:
if value is self._sentinel:
del _config[key]
unset(key)
else:
_config[key] = value
set(key, value)

@ -261,10 +261,9 @@ class ChainPredicate():
class ExtendedUrl():
"""URL with attached config dict"""
def __init__(self, url, confdict):
self.value = url
self.config = confdict
"""URL with attached config key-value pairs"""
def __init__(self, url, gconf, lconf):
self.value, self.gconfig, self.lconfig = url, gconf, lconf
def __str__(self):
return self.value

@ -31,6 +31,15 @@ class TestConfig(unittest.TestCase):
self.assertEqual(config.get(["d"]), None)
self.assertEqual(config.get(["e", "f", "g"], 123), 123)
def test_interpolate(self):
self.assertEqual(config.interpolate(["a"]), "1")
self.assertEqual(config.interpolate(["b", "a"]), "1")
self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
config.set(["d"], 123)
self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
def test_set(self):
config.set(["b", "c"], [1, 2, 3])
config.set(["e", "f", "g"], value=234)
@ -43,14 +52,29 @@ class TestConfig(unittest.TestCase):
self.assertEqual(config.get(["b", "c"]), "text")
self.assertEqual(config.get(["e", "f", "g"]), 234)
def test_interpolate(self):
self.assertEqual(config.interpolate(["a"]), "1")
self.assertEqual(config.interpolate(["b", "a"]), "1")
self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
config.set(["d"], 123)
self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
def test_unset(self):
config.unset(["a"])
config.unset(["b", "c"])
config.unset(["c", "d"])
self.assertEqual(config.get(["a"]), None)
self.assertEqual(config.get(["b", "a"]), 2)
self.assertEqual(config.get(["b", "c"]), None)
def test_apply(self):
options = (
(["b", "c"], [1, 2, 3]),
(["e", "f", "g"], 234),
)
self.assertEqual(config.get(["b", "c"]), "text")
self.assertEqual(config.get(["e", "f", "g"]), None)
with config.apply(options):
self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
self.assertEqual(config.get(["e", "f", "g"]), 234)
self.assertEqual(config.get(["b", "c"]), "text")
self.assertEqual(config.get(["e", "f", "g"]), None)
if __name__ == '__main__':

Loading…
Cancel
Save