# -*- coding: utf-8 -*- # Copyright 2015-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract images from https://nijie.info/""" from .common import Extractor, Message, AsynchronousMixin from .. import text, exception from ..cache import cache class NijieExtractor(AsynchronousMixin, Extractor): """Base class for nijie extractors""" category = "nijie" directory_fmt = ("{category}", "{user_id}") filename_fmt = "{category}_{artist_id}_{image_id}_p{index:>02}.{extension}" archive_fmt = "{image_id}_{index}" cookiedomain = "nijie.info" cookienames = ("nemail", "nlogin") root = "https://nijie.info" view_url = "https://nijie.info/view.php?id=" popup_url = "https://nijie.info/view_popup.php?id=" def __init__(self, match): Extractor.__init__(self, match) self.user_id = match.group(1) self.session.headers["Referer"] = self.root + "/" def items(self): self.login() data = self.get_job_metadata() yield Message.Version, 1 yield Message.Directory, data for image_id in self.get_image_ids(): for image_url, image_data in self.get_image_data(image_id): image_data.update(data) if not image_data["extension"]: image_data["extension"] = "jpg" yield Message.Url, image_url, image_data def get_job_metadata(self): """Collect metadata for extractor-job""" return {"user_id": text.parse_int(self.user_id)} def get_image_ids(self): """Collect all relevant image-ids""" def get_image_data(self, image_id): """Get URL and metadata for images specified by 'image_id'""" page = self.request(self.view_url + image_id).text return self.extract_image_data(page, image_id) def extract_image_data(self, page, image_id): """Get URL and metadata for images from 'page'""" title, pos = text.extract( page, '= 18", }) def get_image_ids(self): return self._pagination("members_dojin") class NijieFavoriteExtractor(NijieExtractor): """Extractor for all favorites/bookmarks of a nijie-user""" subcategory = "favorite" directory_fmt = ("{category}", "bookmarks", "{user_id}") archive_fmt = "f_{user_id}_{image_id}_{index}" pattern = (r"(?:https?://)?(?:www\.)?nijie\.info" r"/user_like_illust_view\.php\?id=(\d+)") test = ("https://nijie.info/user_like_illust_view.php?id=44", { "count": ">= 16", }) def get_image_ids(self): return self._pagination("user_like_illust_view") class NijieImageExtractor(NijieExtractor): """Extractor for a work/image from nijie.info""" subcategory = "image" pattern = (r"(?:https?://)?(?:www\.)?nijie\.info" r"/view(?:_popup)?\.php\?id=(\d+)") test = ( ("https://nijie.info/view.php?id=70720", { "url": "a10d4995645b5f260821e32c60a35f73546c2699", "keyword": "408393d010307c76d52cbd0a4368d6d357805aea", "content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6", }), ("https://nijie.info/view.php?id=70724", { "exception": exception.NotFoundError, }), ("https://nijie.info/view_popup.php?id=70720"), ) def __init__(self, match): NijieExtractor.__init__(self, match) self.image_id = match.group(1) self.page = "" def get_job_metadata(self): response = self.request(self.view_url + self.image_id, expect=(404,)) if response.status_code == 404: raise exception.NotFoundError("image") self.page = response.text self.user_id = text.extract( self.page, '"sameAs": "https://nijie.info/members.php?id=', '"')[0] return NijieExtractor.get_job_metadata(self) def get_image_ids(self): return (self.image_id,) def get_image_data(self, _): return self.extract_image_data(self.page, self.image_id)