# -*- coding: utf-8 -*- # Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.pornhub.com/""" from .common import Extractor, Message from .. import text, exception BASE_PATTERN = r"(?:https?://)?(?:[^.]+\.)?pornhub\.com" class PornhubExtractor(Extractor): """Base class for pornhub extractors""" category = "pornhub" root = "https://www.pornhub.com" class PornhubGalleryExtractor(PornhubExtractor): """Extractor for image galleries on pornhub.com""" subcategory = "gallery" directory_fmt = ("{category}", "{user}", "{gallery[id]} {gallery[title]}") filename_fmt = "{num:>03}_{id}.{extension}" archive_fmt = "{id}" pattern = BASE_PATTERN + r"/album/(\d+)" test = ( ("https://www.pornhub.com/album/19289801", { "pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/", "count": ">= 300", "keyword": { "id" : int, "num" : int, "score" : int, "views" : int, "caption": str, "user" : "Danika Mori", "gallery": { "id" : 19289801, "score": int, "views": int, "tags" : list, "title": "Danika Mori Best Moments", }, }, }), ("https://www.pornhub.com/album/69040172", { "exception": exception.AuthorizationError, }), ) def __init__(self, match): PornhubExtractor.__init__(self, match) self.gallery_id = match.group(1) self._first = None def items(self): data = self.metadata() yield Message.Directory, data for num, image in enumerate(self.images(), 1): url = image["url"] image.update(data) image["num"] = num yield Message.Url, url, text.nameext_from_url(url, image) def metadata(self): url = "{}/album/{}".format( self.root, self.gallery_id) extr = text.extract_from(self.request(url).text) title = extr("", "") score = extr('
', '<') tags = extr('
= 6", }), ("https://www.pornhub.com/users/flyings0l0/"), ("https://www.pornhub.com/users/flyings0l0/photos/public"), ("https://www.pornhub.com/users/flyings0l0/photos/private"), ("https://www.pornhub.com/users/flyings0l0/photos/favorites"), ("https://www.pornhub.com/model/bossgirl/photos"), ) def __init__(self, match): PornhubExtractor.__init__(self, match) self.type, self.user, self.cat = match.groups() def items(self): url = "{}/{}/{}/photos/{}/ajax".format( self.root, self.type, self.user, self.cat or "public") params = {"page": 1} headers = { "Referer": url[:-5], "X-Requested-With": "XMLHttpRequest", } data = {"_extractor": PornhubGalleryExtractor} while True: page = self.request( url, method="POST", headers=headers, params=params).text if not page: return for gid in text.extract_iter(page, 'id="albumphoto', '"'): yield Message.Queue, self.root + "/album/" + gid, data params["page"] += 1