# -*- coding: utf-8 -*- # Copyright 2016-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://luscious.net/""" from .common import GalleryExtractor, Extractor, Message from .. import text, exception from ..cache import cache class LusciousBase(Extractor): """Base class for luscious extractors""" category = "luscious" cookiedomain = ".luscious.net" root = "https://members.luscious.net" def login(self): """Login and set necessary cookies""" username, password = self._get_auth_info() if username: self._update_cookies(self._login_impl(username, password)) @cache(maxage=14*24*3600, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) url = "https://members.luscious.net/accounts/login/" headers = {"Referer": "https://members.luscious.net/login/"} data = { "login": username, "password": password, "remember": "on", "next": "/", } response = self.request(url, method="POST", headers=headers, data=data) if "/accounts/login/" in response.url or not response.history: raise exception.AuthenticationError() for cookie in response.history[0].cookies: if cookie.name.startswith("sessionid_"): return {cookie.name: cookie.value} raise exception.AuthenticationError() @staticmethod def _parse_tags(tags): return [ text.unescape(tag.replace(":_", ":")) for tag in text.extract_iter(tags or "", "/tags/", "/") ] class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): """Extractor for image albums from luscious.net""" subcategory = "album" archive_fmt = "{gallery_id}_{image_id}" pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net" r"/(?:albums|pictures/c/[^/?&#]+/album)/([^/?&#]+_(\d+))") test = ( ("https://luscious.net/albums/okinami-no-koigokoro_277031/", { "url": "7e4984a271a1072ac6483e4228a045895aff86f3", "keyword": "c597c132834f4990f90bf5dee5de2a9d4ba263a4", "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", }), ("https://luscious.net/albums/virgin-killer-sweater_282582/", { "url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c", "keyword": "e1202078b504adeccd521aa932f456a5a85479a0", }), ("https://luscious.net/albums/not-found_277035/", { "exception": exception.NotFoundError, }), ("https://members.luscious.net/albums/login-required_323871/", { "options": (("username", None),), "exception": exception.AuthorizationError, }), ("https://www.luscious.net/albums/okinami_277031/"), ("https://members.luscious.net/albums/okinami_277031/"), ("https://luscious.net/pictures/c/video_game_manga/album" "/okinami-no-koigokoro_277031/sorted/position/id/16528978/@_1"), ) def __init__(self, match): path, self.gallery_id = match.groups() url = "{}/albums/{}/".format(self.root, path) GalleryExtractor.__init__(self, match, url) def metadata(self, page): pos = page.find("

404 Not Found

") if pos >= 0: msg = text.extract(page, '

', '

', pos)[0] if msg and "content is not available" in msg: raise exception.AuthorizationError() raise exception.NotFoundError("album") title, pos = text.extract(page, '"og:title" content="', '"') info , pos = text.extract(page, '

', "", pos) if info is None: count, pos = text.extract(page, '>Pages:', '<', pos) else: count, pos = text.extract(page, '

', ' ', pos) genre, pos = text.extract(page, '

Genre:', '

', pos) adnce, pos = text.extract(page, '

Audience:', '

', pos) tags , pos = text.extract(page, '"tag_list static">', '', pos) return { "gallery_id": text.parse_int(self.gallery_id), "title" : text.unescape(title or ""), "count" : text.parse_int(count), "genre" : text.remove_html(genre), "audience" : text.remove_html(adnce), "tags" : self._parse_tags(tags), } def images(self, page): extr = text.extract url = "{}/pictures/album/x_{}/sorted/old/page/1/".format( self.root, self.gallery_id) page = self.request(url).text pos = page.find('

', '', pos) _ , pos = extr(page, '

", "") if data["paginator_complete"]: return pnum += 1 def parse_album(self, album): url , pos = text.extract(album, 'href="', '"') title, pos = text.extract(album, ">", "<", pos) count, pos = text.extract(album, "# of pictures:", "<", pos) date , pos = text.extract(album, "Updated: ", "<", pos) desc , pos = text.extract(album, "class='desc'>", "<", pos) tags , pos = text.extract(album, "