From 6f1e34ec69235db12f3f4cccb8f5ea286852e115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 19 Apr 2023 15:28:26 +0200 Subject: [PATCH] [vipergirls] add 'thread' and 'post' extractors (#731, #2720, #3812) --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/vipergirls.py | 108 +++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 gallery_dl/extractor/vipergirls.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 207050b4..c40ea813 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -883,6 +883,12 @@ Consider all sites to be NSFW unless otherwise known. Files + + Vipergirls + https://vipergirls.to/ + Posts, Threads + + Vipr https://vipr.im/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 553a1104..7cc12108 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -156,6 +156,7 @@ modules = [ "urlshortener", "vanillarock", "vichan", + "vipergirls", "vk", "vsco", "wallhaven", diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py new file mode 100644 index 00000000..1cebdf75 --- /dev/null +++ b/gallery_dl/extractor/vipergirls.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://vipergirls.to/""" + +from .common import Extractor, Message +from .. import text, exception + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to" + + +class VipergirlsExtractor(Extractor): + """Base class for vipergirls extractors""" + category = "vipergirls" + root = "https://vipergirls.to" + + def __init__(self, match): + Extractor.__init__(self, match) + self.session.headers["Referer"] = self.root + + def items(self): + for html in self.posts(): + + pos = html.find('")[2].strip()), + } + + yield Message.Directory, data + for href in text.extract_iter(html, '', '') + + url = text.extr(page, '