From dd314279fbeeaac5f5fed154e8de530179cfef91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 24 Mar 2018 17:24:34 +0100 Subject: [PATCH] [test] add unit tests for extractor module functions --- CHANGELOG.md | 2 + gallery_dl/extractor/__init__.py | 2 +- gallery_dl/version.py | 2 +- scripts/run_tests.sh | 2 +- test/test_extractor.py | 138 +++++++++++++++++++++++++++++++ 5 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 test/test_extractor.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a48aab85..1a927e1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## Unreleased + ## 1.3.2 - 2018-03-23 - Added extractors for `artstation` albums, challenges and search results - Improved URL and metadata extraction for `hitomi`and `nhentai` diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 0fc1845b..c18c3dbd 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -107,7 +107,7 @@ def find(url): def add(klass): """Add 'klass' to the list of available extractors""" - for pattern in klass: + for pattern in klass.pattern: _cache.append((re.compile(pattern), klass)) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 586ce54f..48fb3f38 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.3.2" +__version__ = "1.3.3-dev" diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index bd457c12..82991b1a 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -1,6 +1,6 @@ #!/bin/bash -TESTS_CORE=(config cookies oauth text util) +TESTS_CORE=(config cookies extractor oauth text util) TESTS_RESULTS=(results) diff --git a/test/test_extractor.py b/test/test_extractor.py new file mode 100644 index 00000000..9c8b9a14 --- /dev/null +++ b/test/test_extractor.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import sys +import unittest + +import gallery_dl.extractor as extractor +from gallery_dl.extractor.common import Extractor, Message +from gallery_dl.extractor.directlink import DirectlinkExtractor as DLExtractor + + +class FakeExtractor(Extractor): + category = "fake" + subcategory = "test" + pattern = ["fake:"] + + def __init__(self, match=None): + Extractor.__init__(self) + + def items(self): + yield Message.Version, 1 + yield Message.Url, "text:foobar", {} + + +class TestExtractor(unittest.TestCase): + + def setUp(self): + extractor._cache.clear() + extractor._module_iter = iter(extractor.modules) + + def test_find(self): + valid_uris = ( + "https://example.org/file.jpg", + "tumblr:foobar", + "oauth:flickr", + "test:pixiv:", + "recursive:https://example.org/document.html", + ) + + for uri in valid_uris: + result = extractor.find(uri) + self.assertIsInstance(result, Extractor, uri) + + for not_found in ("", "/tmp/file.ext"): + self.assertIsNone(extractor.find(not_found)) + + for invalid in (None, [], {}, 123, b"test:"): + with self.assertRaises(TypeError): + extractor.find(invalid) + + def test_add(self): + uri = "fake:foobar" + self.assertIsNone(extractor.find(uri)) + + extractor.add(FakeExtractor) + self.assertIsInstance(extractor.find(uri), FakeExtractor) + + def test_add_module(self): + uri = "fake:foobar" + self.assertIsNone(extractor.find(uri)) + + tuples = extractor.add_module(sys.modules[__name__]) + self.assertEqual(len(tuples), 1) + self.assertEqual(tuples[0][0].pattern, FakeExtractor.pattern[0]) + self.assertEqual(tuples[0][1], FakeExtractor) + self.assertIsInstance(extractor.find(uri), FakeExtractor) + + def test_blacklist(self): + link_uri = "https://example.org/file.jpg" + test_uri = "test:" + fake_uri = "fake:" + + self.assertIsInstance(extractor.find(link_uri), DLExtractor) + self.assertIsInstance(extractor.find(test_uri), Extractor) + self.assertIsNone(extractor.find(fake_uri)) + + with extractor.blacklist(["directlink"]): + self.assertIsNone(extractor.find(link_uri)) + self.assertIsInstance(extractor.find(test_uri), Extractor) + self.assertIsNone(extractor.find(fake_uri)) + + with extractor.blacklist([], [DLExtractor, FakeExtractor]): + self.assertIsNone(extractor.find(link_uri)) + self.assertIsInstance(extractor.find(test_uri), Extractor) + self.assertIsNone(extractor.find(fake_uri)) + + with extractor.blacklist(["test"], [DLExtractor]): + self.assertIsNone(extractor.find(link_uri)) + self.assertIsNone(extractor.find(test_uri)) + self.assertIsNone(extractor.find(fake_uri)) + + def test_unique_pattern_matches(self): + test_urls = [] + + # collect testcase URLs + for extr in extractor.extractors(): + if not hasattr(extr, "test"): + continue + for testcase in extr.test: + test_urls.append((testcase[0], extr)) + + # iterate over all testcase URLs + for url, extr1 in test_urls: + matches = [] + + # ... and apply all regex patterns to each one + for pattern, extr2 in extractor._cache: + + # skip DirectlinkExtractor pattern if it isn't tested + if extr1 != DLExtractor and extr2 == DLExtractor: + continue + + match = pattern.match(url) + if match: + self.assertEqual(extr1, extr2) + matches.append(match) + + # fail if more or less than 1 match happened + if len(matches) > 1: + msg = "'{}' gets matched by more than one pattern:".format(url) + for match in matches: + msg += "\n- " + msg += match.re.pattern + self.fail(msg) + + if len(matches) < 1: + msg = "'{}' isn't matched by any pattern".format(url) + self.fail(msg) + + +if __name__ == "__main__": + unittest.main()