You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gallery-dl/scripts/export_tests.py

248 lines
6.4 KiB

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import os
import re
import sys
import itertools
import collections
import util
from pyprint import pyprint
from gallery_dl import extractor
FORMAT = '''\
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
{imports}
__tests__ = (
{tests}\
)
'''
def extract_tests_from_source(lines):
tests = {}
match_url = re.compile(
r''' (?:test = | )?\(\(?"([^"]+)"(.*)''').match
match_end = re.compile(
r" (\}\)| \}\),)\n$").match
first = 0
url = ""
for index, line in enumerate(lines):
if first and match_end(line):
tests[url] = lines[first-1:index+1]
first = 0
elif (m := match_url(line)):
offset = index
while not m[2]:
offset += 1
next = lines[offset]
line = line[:-2] + next[next.index('"')+1:]
m = match_url(line)
url = m[1]
if m[2] in (",)", "),"):
tests[url] = lines[index-1:index+1]
first = 0
else:
first = index
return tests
def get_test_source(extr, *, cache={}):
try:
tests = cache[extr.__module__]
except KeyError:
path = sys.modules[extr.__module__].__file__
with open(path) as fp:
lines = fp.readlines()
tests = cache[extr.__module__] = extract_tests_from_source(lines)
return tests.get(extr.url) or ("",)
return tests[extr.url]
def comment_from_source(source):
match = re.match(r"\s+#\s*(.+)", source[0])
return match[1] if match else ""
def build_test(extr, data):
source = get_test_source(extr)
comment = comment_from_source(source)
head = {
"#url" : extr.url,
"#comment" : comment.replace('"', "'"),
"#category": (extr.basecategory,
extr.category,
extr.subcategory),
"#class" : extr.__class__,
}
if not comment:
del head["#comment"]
instr = {}
if not data:
data = {}
if (options := data.pop("options", None)):
instr["#options"] = {
name: value
for name, value in options
}
if (pattern := data.pop("pattern", None)):
if pattern in PATTERNS:
cls = PATTERNS[pattern]
pattern = f"lit:{pyprint(cls)}.pattern"
instr["#pattern"] = pattern
if (exception := data.pop("exception", None)):
instr["#exception"] = exception
if (range := data.pop("range", None)):
instr["#range"] = range
if (count := data.pop("count", None)) is not None:
instr["#count"] = count
if (archive := data.pop("archive", None)) is not None:
instr["#archive"] = archive
if (extractor := data.pop("extractor", None)) is not None:
instr["#extractor"] = extractor
if (url := data.pop("url", None)):
instr["#sha1_url"] = url
if (metadata := data.pop("keyword", None)):
if isinstance(metadata, str) and len(metadata) == 40:
instr["#sha1_metadata"] = metadata
metadata = {}
if (content := data.pop("content", None)):
if isinstance(content, tuple):
content = list(content)
instr["#sha1_content"] = content
if data:
print(extr)
for k in data:
print(k)
exit()
return head, instr, metadata
def collect_patterns():
return {
cls.pattern.pattern: cls
for cls in extractor._list_classes()
}
def collect_tests(whitelist=None):
tests = collections.defaultdict(list)
for cls in extractor._list_classes():
for url, data in cls._get_tests():
extr = cls.from_url(url)
if whitelist and extr.category not in whitelist:
continue
test = build_test(extr, data)
tests[extr.category].append(test)
return tests
def export_tests(data):
imports = {}
tests = []
for head, instr, metadata in data:
for v in itertools.chain(
head.values(),
instr.values() if instr else (),
metadata.values() if metadata else (),
):
if not isinstance(v, type) or v.__module__ == "builtins":
continue
module, _, name = v.__module__.rpartition(".")
if name[0].isdecimal():
stmt = f'''\
{module.partition(".")[0]} = __import__("{v.__module__}")
_{name} = getattr({module}, "{name}")'''
elif module:
stmt = f"from {module} import {name}"
else:
stmt = f"import {name}"
imports[v.__module__] = stmt
test = pyprint(head)
if instr:
test = f"{test[:-2]}{pyprint(instr)[1:]}"
if metadata:
for k, v in metadata.items():
if v == "type:datetime":
imports["datetime"] = "import datetime"
metadata[k] = "lit:datetime.datetime"
test = f"{test[:-1]}{pyprint(metadata, lmin=0)[1:]}"
tests.append(f"{test},\n\n")
return FORMAT.format(
imports="\n".join(imports.values()),
tests="".join(tests),
)
PATTERNS = None
DIRECTORY = "/tmp/_/results"
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"-t", "--target",
help="target directory",
)
parser.add_argument(
"-c", "--category", action="append",
help="extractor categories to export",
)
args = parser.parse_args()
if not args.target:
args.target = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"test", "results",
)
global PATTERNS
PATTERNS = collect_patterns()
os.makedirs(args.target, exist_ok=True)
for name, tests in collect_tests(args.category).items():
name = name.replace(".", "")
with util.lazy(f"{args.target}/{name}.py") as fp:
fp.write(export_tests(tests))
if __name__ == "__main__":
main()