diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 525edcb1..5631bf72 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -69,6 +69,7 @@ modules = [ "yandere", "yonkouprod", "imagehosts", + "directlink", "recursive", "test", ] diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py new file mode 100644 index 00000000..1368a625 --- /dev/null +++ b/gallery_dl/extractor/directlink.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +# Copyright 2017 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Direct link handling""" + +from .common import Extractor, Message +from .. import text + + +class DirectlinkExtractor(Extractor): + """Extractor for direct links to images""" + category = "directlink" + directory_fmt = [] + filename_fmt = "{filename}" + pattern = [r"https?://[^?&#]+\.(?:jpe?g|png|gifv?|webm|mp4)"] + test = [("https://i.imgur.com/21yMxCS.png", { + "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", + "keyword": "6a9636d8dd6f71f14d6d20d24153fc83a9895ed9", + "content": "0c8768055e4e20e7c7259608b67799171b691140", + })] + + def __init__(self, match): + Extractor.__init__(self) + self.url = match.string + + def items(self): + data = text.nameext_from_url(self.url) + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, self.url, data diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py index 01c54e2c..0592bf42 100644 --- a/gallery_dl/extractor/recursive.py +++ b/gallery_dl/extractor/recursive.py @@ -10,7 +10,7 @@ import re from .common import Extractor, Message -from .. import adapter +from .. import extractor, adapter class RecursiveExtractor(Extractor): @@ -29,5 +29,6 @@ class RecursiveExtractor(Extractor): def items(self): page = self.request(self.url).text yield Message.Version, 1 - for match in re.finditer(r"https?://[^\s\"']+", page): - yield Message.Queue, match.group(0) + with extractor.blacklist("directlink"): + for match in re.finditer(r"https?://[^\s\"']+", page): + yield Message.Queue, match.group(0) diff --git a/test/test_extractors.py b/test/test_extractors.py index 61e338e3..8c296400 100644 --- a/test/test_extractors.py +++ b/test/test_extractors.py @@ -51,7 +51,7 @@ skip = [ # dont work on travis-ci "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", # temporary issues - "e621", + ] # enable selective testing for direct calls if __name__ == '__main__' and len(sys.argv) > 1: