|
|
|
@ -7,7 +7,6 @@
|
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
import json
|
|
|
|
|
import logging
|
|
|
|
|
from . import version, config, option, output, extractor, job, util, exception
|
|
|
|
|
|
|
|
|
@ -32,81 +31,6 @@ def progress(urls, pformat):
|
|
|
|
|
yield pinfo["url"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_inputfile(file, log):
|
|
|
|
|
"""Filter and process strings from an input file.
|
|
|
|
|
|
|
|
|
|
Lines starting with '#' and empty lines will be ignored.
|
|
|
|
|
Lines starting with '-' will be interpreted as a key-value pair separated
|
|
|
|
|
by an '='. where 'key' is a dot-separated option name and 'value' is a
|
|
|
|
|
JSON-parsable value. These configuration options will be applied while
|
|
|
|
|
processing the next URL.
|
|
|
|
|
Lines starting with '-G' are the same as above, except these options will
|
|
|
|
|
be applied for *all* following URLs, i.e. they are Global.
|
|
|
|
|
Everything else will be used as a potential URL.
|
|
|
|
|
|
|
|
|
|
Example input file:
|
|
|
|
|
|
|
|
|
|
# settings global options
|
|
|
|
|
-G base-directory = "/tmp/"
|
|
|
|
|
-G skip = false
|
|
|
|
|
|
|
|
|
|
# setting local options for the next URL
|
|
|
|
|
-filename="spaces_are_optional.jpg"
|
|
|
|
|
-skip = true
|
|
|
|
|
|
|
|
|
|
https://example.org/
|
|
|
|
|
|
|
|
|
|
# next URL uses default filename and 'skip' is false.
|
|
|
|
|
https://example.com/index.htm # comment1
|
|
|
|
|
https://example.com/404.htm # comment2
|
|
|
|
|
"""
|
|
|
|
|
gconf = []
|
|
|
|
|
lconf = []
|
|
|
|
|
|
|
|
|
|
for line in file:
|
|
|
|
|
line = line.strip()
|
|
|
|
|
|
|
|
|
|
if not line or line[0] == "#":
|
|
|
|
|
# empty line or comment
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
elif line[0] == "-":
|
|
|
|
|
# config spec
|
|
|
|
|
if len(line) >= 2 and line[1] == "G":
|
|
|
|
|
conf = gconf
|
|
|
|
|
line = line[2:]
|
|
|
|
|
else:
|
|
|
|
|
conf = lconf
|
|
|
|
|
line = line[1:]
|
|
|
|
|
|
|
|
|
|
key, sep, value = line.partition("=")
|
|
|
|
|
if not sep:
|
|
|
|
|
log.warning("input file: invalid <key>=<value> pair: %s", line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
value = json.loads(value.strip())
|
|
|
|
|
except ValueError as exc:
|
|
|
|
|
log.warning("input file: unable to parse '%s': %s", value, exc)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
key = key.strip().split(".")
|
|
|
|
|
conf.append((key[:-1], key[-1], value))
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
# url
|
|
|
|
|
if " #" in line:
|
|
|
|
|
line = line.partition(" #")[0].rstrip()
|
|
|
|
|
elif "\t#" in line:
|
|
|
|
|
line = line.partition("\t#")[0].rstrip()
|
|
|
|
|
if gconf or lconf:
|
|
|
|
|
yield util.ExtendedUrl(line, gconf, lconf)
|
|
|
|
|
gconf = []
|
|
|
|
|
lconf = []
|
|
|
|
|
else:
|
|
|
|
|
yield line
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
try:
|
|
|
|
|
if sys.stdout and sys.stdout.encoding.lower() != "utf-8":
|
|
|
|
@ -275,12 +199,12 @@ def main():
|
|
|
|
|
try:
|
|
|
|
|
if inputfile == "-":
|
|
|
|
|
if sys.stdin:
|
|
|
|
|
urls += parse_inputfile(sys.stdin, log)
|
|
|
|
|
urls += util.parse_inputfile(sys.stdin, log)
|
|
|
|
|
else:
|
|
|
|
|
log.warning("input file: stdin is not readable")
|
|
|
|
|
else:
|
|
|
|
|
with open(inputfile, encoding="utf-8") as file:
|
|
|
|
|
urls += parse_inputfile(file, log)
|
|
|
|
|
urls += util.parse_inputfile(file, log)
|
|
|
|
|
except OSError as exc:
|
|
|
|
|
log.warning("input file: %s", exc)
|
|
|
|
|
|
|
|
|
|