# -*- coding: utf-8 -*- # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://tumblrgallery.xyz/""" from .common import GalleryExtractor from .. import text BASE_PATTERN = r"(?:https?://)?tumblrgallery\.xyz" class TumblrgalleryExtractor(GalleryExtractor): """Base class for tumblrgallery extractors""" category = "tumblrgallery" filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}" directory_fmt = ("{category}", "{gallery_id} {title}") root = "https://tumblrgallery.xyz" @staticmethod def _urls_from_page(page): return text.extract_iter( page, '
", "")), "gallery_id": self.gallery_id, } def images(self, _): page_num = 1 while True: url = "{}/tumblrblog/gallery/{}/{}.html".format( self.root, self.gallery_id, page_num) response = self.request(url, allow_redirects=False, fatal=False) if response.status_code >= 300: return for url in self._urls_from_page(response.text): yield url, self._data_from_url(url) page_num += 1 class TumblrgalleryPostExtractor(TumblrgalleryExtractor): """Extractor for Posts on tumblrgallery.xyz""" subcategory = "post" pattern = BASE_PATTERN + r"(/post/(\d+)\.html)" example = "https://tumblrgallery.xyz/post/12345.html" def __init__(self, match): TumblrgalleryExtractor.__init__(self, match) self.gallery_id = text.parse_int(match.group(2)) def metadata(self, page): return { "title" : text.remove_html( text.unescape(text.extr(page, "", "")) ).replace("_", "-"), "gallery_id": self.gallery_id, } def images(self, page): for url in self._urls_from_page(page): yield url, self._data_from_url(url) class TumblrgallerySearchExtractor(TumblrgalleryExtractor): """Extractor for Search result on tumblrgallery.xyz""" subcategory = "search" filename_fmt = "{category}_{num:>03}_{gallery_id}_{id}_{title}.{extension}" directory_fmt = ("{category}", "{search_term}") pattern = BASE_PATTERN + r"(/s\.php\?q=([^&#]+))" example = "https://tumblrgallery.xyz/s.php?q=QUERY" def __init__(self, match): TumblrgalleryExtractor.__init__(self, match) self.search_term = match.group(2) def metadata(self, page): return { "search_term": self.search_term, } def images(self, _): page_url = "s.php?q=" + self.search_term while True: page = self.request(self.root + "/" + page_url).text for gallery_id in text.extract_iter( page, '
", "") )).replace("_", "-") yield url, data next_url = text.extr( page, '