gallery-dl/gallery_dl/extractor/imagefap.py

# -*- coding: utf-8 -*-

# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://www.imagefap.com/"""

from .common import Extractor, Message
from .. import text, util, exception

BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"


class ImagefapExtractor(Extractor):
    """Base class for imagefap extractors"""
    category = "imagefap"
    root = "https://www.imagefap.com"
    directory_fmt = ("{category}", "{gallery_id} {title}")
    filename_fmt = "{category}_{gallery_id}_{filename}.{extension}"
    archive_fmt = "{gallery_id}_{image_id}"
    request_interval = (2.0, 4.0)

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.session.headers["Referer"] = self.root

    def request(self, url, **kwargs):
        response = Extractor.request(self, url, **kwargs)

        if response.history and response.url.endswith("/human-verification"):
            msg = text.extr(response.text, '<div class="mt-4', '<')
            if msg:
                msg = " ".join(msg.partition(">")[2].split())
                raise exception.StopExtraction("'%s'", msg)
            self.log.warning("HTTP redirect to %s", response.url)

        return response


class ImagefapGalleryExtractor(ImagefapExtractor):
    """Extractor for image galleries from imagefap.com"""
    subcategory = "gallery"
    pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)"

    test = (
        ("https://www.imagefap.com/gallery/7102714", {
            "pattern": r"https://cdnh?\.imagefap\.com"
                       r"/images/full/\d+/\d+/\d+\.jpg",
            "keyword": "bdcb75b1e4b9dddc718f3d66e1a58afa9d81a38b",
            "content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
        }),
        ("https://www.imagefap.com/gallery/7876223", {
            "pattern": r"https://cdnh?\.imagefap\.com"
                       r"/images/full/\d+/\d+/\d+\.jpg",
            "keyword": {
                "categories": ["Asses", "Softcore", "Pornstars"],
                "count": 44,
                "description": "",
                "gallery_id": 7876223,
                "image_id": int,
                "num": int,
                "tags": ["big ass", "panties", "horny",
                         "pussy", "exposed", "outdoor"],
                "title": "Kelsi Monroe in lingerie",
                "uploader": "BdRachel",
            },
            "count": 44,
        }),
        # description (#3905)
        ("https://www.imagefap.com/gallery/6180555", {
            "range": "1",
            "keyword": {
                "categories": ["Amateur", "Softcore", "Homemade"],
                "count": 36,
                "description": "Nude and dressed sluts showing off the goods",
                "gallery_id": 6180555,
                "image_id": int,
                "num": int,
                "tags": []  ,
                "title": "Dressed or Undressed MG*",
                "uploader": "splitopen",
            },
        }),
        ("https://www.imagefap.com/pictures/7102714"),
        ("https://www.imagefap.com/gallery.php?gid=7102714"),
        ("https://beta.imagefap.com/gallery.php?gid=7102714"),
    )

    def __init__(self, match):
        ImagefapExtractor.__init__(self, match)
        self.gid = match.group(1)
        self.image_id = ""

    def items(self):
        url = "{}/gallery/{}".format(self.root, self.gid)
        page = self.request(url).text
        data = self.get_job_metadata(page)
        yield Message.Directory, data
        for url, image in self.get_images():
            data.update(image)
            yield Message.Url, url, data

    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        extr = text.extract_from(page)

        data = {
            "gallery_id": text.parse_int(self.gid),
            "uploader": extr("porn picture gallery by ", " to see hottest"),
            "title": text.unescape(extr("<title>", "<")),
            "description": text.unescape(extr(
                'id="gdesc_text"', '<').partition(">")[2]),
            "categories": text.split_html(extr(
                'id="cnt_cats"', '</div>'))[1::2],
            "tags": text.split_html(extr(
                'id="cnt_tags"', '</div>'))[1::2],
            "count": text.parse_int(extr(' 1 of ', ' pics"')),
        }

        self.image_id = extr('id="img_ed_', '"')
        self._count = data["count"]

        return data

    def get_images(self):
        """Collect image-urls and -metadata"""
        url = "{}/photo/{}/".format(self.root, self.image_id)
        params = {"gid": self.gid, "idx": 0, "partial": "true"}
        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "X-Requested-With": "XMLHttpRequest",
            "Referer": "{}?pgid=&gid={}&page=0".format(url, self.image_id)
        }

        num = 0
        total = self._count
        while True:
            page = self.request(url, params=params, headers=headers).text

            cnt = 0
            for image_url in text.extract_iter(page, '<a href="', '"'):
                num += 1
                cnt += 1
                data = text.nameext_from_url(image_url)
                data["num"] = num
                data["image_id"] = text.parse_int(data["filename"])
                yield image_url, data

            if not cnt or cnt < 24 and num >= total:
                return
            params["idx"] += cnt


class ImagefapImageExtractor(ImagefapExtractor):
    """Extractor for single images from imagefap.com"""
    subcategory = "image"
    pattern = BASE_PATTERN + r"/photo/(\d+)"
    test = (
        ("https://www.imagefap.com/photo/1962981893", {
            "pattern": r"https://cdnh?\.imagefap\.com"
                       r"/images/full/65/196/1962981893\.jpg",
            "keyword": {
                "date": "21/08/2014",
                "gallery_id": 7876223,
                "height": 1600,
                "image_id": 1962981893,
                "title": "Kelsi Monroe in lingerie",
                "uploader": "BdRachel",
                "width": 1066,
            },
        }),
        ("https://beta.imagefap.com/photo/1962981893"),
    )

    def __init__(self, match):
        ImagefapExtractor.__init__(self, match)
        self.image_id = match.group(1)

    def items(self):
        url, data = self.get_image()
        yield Message.Directory, data
        yield Message.Url, url, data

    def get_image(self):
        url = "{}/photo/{}/".format(self.root, self.image_id)
        page = self.request(url).text

        info, pos = text.extract(
            page, '<script type="application/ld+json">', '</script>')
        image_id, pos = text.extract(
            page, 'id="imageid_input" value="', '"', pos)
        gallery_id, pos = text.extract(
            page, 'id="galleryid_input" value="', '"', pos)
        info = util.json_loads(info)
        url = info["contentUrl"]

        return url, text.nameext_from_url(url, {
            "title": text.unescape(info["name"]),
            "uploader": info["author"],
            "date": info["datePublished"],
            "width": text.parse_int(info["width"]),
            "height": text.parse_int(info["height"]),
            "gallery_id": text.parse_int(gallery_id),
            "image_id": text.parse_int(image_id),
        })


class ImagefapFolderExtractor(ImagefapExtractor):
    """Extractor for imagefap user folders"""
    subcategory = "folder"
    pattern = (BASE_PATTERN + r"/(?:organizer/|"
               r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
               r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
    test = (
        ("https://www.imagefap.com/organizer/409758", {
            "pattern": r"https://www\.imagefap\.com/gallery/7876223",
            "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
            "count": 1,
        }),
        (("https://www.imagefap.com/usergallery.php"
          "?userid=1981976&folderid=409758"), {
            "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
        }),
        (("https://www.imagefap.com/usergallery.php"
          "?user=BdRachel&folderid=409758"), {
            "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
        }),
        ("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {
            "pattern": ImagefapGalleryExtractor.pattern,
            "range": "1-40",
        }),
        (("https://www.imagefap.com/usergallery.php"
          "?userid=1981976&folderid=-1"), {
            "pattern": ImagefapGalleryExtractor.pattern,
            "range": "1-40",
        }),
        (("https://www.imagefap.com/usergallery.php"
          "?user=BdRachel&folderid=-1"), {
            "pattern": ImagefapGalleryExtractor.pattern,
            "range": "1-40",
        }),
    )

    def __init__(self, match):
        ImagefapExtractor.__init__(self, match)
        self._id, user, profile, self.folder_id = match.groups()
        self.user = user or profile

    def items(self):
        for gallery_id, name in self.galleries(self.folder_id):
            url = "{}/gallery/{}".format(self.root, gallery_id)
            data = {
                "gallery_id": gallery_id,
                "title"     : text.unescape(name),
                "_extractor": ImagefapGalleryExtractor,
            }
            yield Message.Queue, url, data

    def galleries(self, folder_id):
        """Yield gallery IDs and titles of a folder"""
        if folder_id == "-1":
            if self._id:
                url = "{}/usergallery.php?userid={}&folderid=-1".format(
                    self.root, self.user)
            else:
                url = "{}/profile/{}/galleries?folderid=-1".format(
                    self.root, self.user)
        else:
            url = "{}/organizer/{}/".format(self.root, folder_id)

        params = {"page": 0}
        while True:
            extr = text.extract_from(self.request(url, params=params).text)
            cnt = 0

            while True:
                gid = extr('<a  href="/gallery/', '"')
                if not gid:
                    break
                yield gid, extr("<b>", "<")
                cnt += 1

            if cnt < 20:
                break
            params["page"] += 1


class ImagefapUserExtractor(ImagefapExtractor):
    """Extractor for an imagefap user profile"""
    subcategory = "user"
    pattern = (BASE_PATTERN +
               r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
               r"|usergallery\.php\?userid=(\d+))(?:$|#)")
    test = (
        ("https://www.imagefap.com/profile/BdRachel", {
            "pattern": ImagefapFolderExtractor.pattern,
            "count": ">= 18",
        }),
        ("https://www.imagefap.com/usergallery.php?userid=1862791", {
            "pattern": r"https://www\.imagefap\.com"
                       r"/profile/LucyRae/galleries\?folderid=-1",
            "count": 1,
        }),
        ("https://www.imagefap.com/profile/BdRachel/galleries"),
        ("https://www.imagefap.com/profile.php?user=BdRachel"),
        ("https://beta.imagefap.com/profile.php?user=BdRachel"),
    )

    def __init__(self, match):
        ImagefapExtractor.__init__(self, match)
        self.user, self.user_id = match.groups()

    def items(self):
        data = {"_extractor": ImagefapFolderExtractor}

        for folder_id in self.folders():
            if folder_id == "-1":
                url = "{}/profile/{}/galleries?folderid=-1".format(
                    self.root, self.user)
            else:
                url = "{}/organizer/{}/".format(self.root, folder_id)
            yield Message.Queue, url, data

    def folders(self):
        """Return a list of folder IDs of a user"""
        if self.user:
            url = "{}/profile/{}/galleries".format(self.root, self.user)
        else:
            url = "{}/usergallery.php?userid={}".format(
                self.root, self.user_id)

        response = self.request(url)
        self.user = response.url.split("/")[-2]
        folders = text.extr(response.text, ' id="tgl_all" value="', '"')
        return folders.rstrip("|").split("|")
[imagefap] add extractor 8 years ago			`# -- coding: utf-8 --`

[imagefap] use default delay between requests (#1140) 2 years ago			`# Copyright 2016-2023 Mike Fährmann`
[imagefap] add extractor 8 years ago			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

update extractor test results - tumblr: remove deleted post - jaiminisbox: replace removed manga/chapters - smugmug: one inconsequential field got removed 4 years ago			`"""Extractors for https://www.imagefap.com/"""`
[imagefap] add extractor 8 years ago
			`from .common import Extractor, Message`
replace json.loads with direct calls to JSONDecoder.decode 2 years ago			`from .. import text, util, exception`
[imagefap] add extractor 8 years ago
[imagefap] generalize URL patterns and add tests (#552) 5 years ago			`BASE_PATTERN = r"(?:https?://)?(?:www\.\|beta\.)?imagefap\.com"`


set 'archive_fmt' values These are going to be used to create an unique id for each image. 7 years ago			`class ImagefapExtractor(Extractor):`
			`"""Base class for imagefap extractors"""`
[imagefap] add extractor 8 years ago			`category = "imagefap"`
[imagefap] use default delay between requests (#1140) 2 years ago			`root = "https://www.imagefap.com"`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`directory_fmt = ("{category}", "{gallery_id} {title}")`
change results of text.nameext_from_url() Instead of getting a complete 'filename' from an URL and splitting that into 'name' and 'extension', the new approach gets rid of the complete version and renames 'name' to 'filename'. (Using anything other than {extension} for a filename extension doesn't really work anyway) Example: "https://example.org/path/filename.ext" before: - filename : filename.ext - name : filename - extension: ext now: - filename : filename - extension: ext 6 years ago			`filename_fmt = "{category}_{gallery_id}_{filename}.{extension}"`
set 'archive_fmt' values These are going to be used to create an unique id for each image. 7 years ago			`archive_fmt = "{gallery_id}_{image_id}"`
[imagefap] use default delay between requests (#1140) 2 years ago			`request_interval = (2.0, 4.0)`
set 'archive_fmt' values These are going to be used to create an unique id for each image. 7 years ago
[imagefap] include Referer headers 5 years ago			`def __init__(self, match):`
			`Extractor.__init__(self, match)`
			`self.session.headers["Referer"] = self.root`

[imagefap] warn about redirects to '/human-verification' (#1140) 2 years ago			`def request(self, url, **kwargs):`
			`response = Extractor.request(self, url, **kwargs)`

			`if response.history and response.url.endswith("/human-verification"):`
			`msg = text.extr(response.text, '<div class="mt-4', '<')`
			`if msg:`
			`msg = " ".join(msg.partition(">")[2].split())`
			`raise exception.StopExtraction("'%s'", msg)`
			`self.log.warning("HTTP redirect to %s", response.url)`

			`return response`

set 'archive_fmt' values These are going to be used to create an unique id for each image. 7 years ago
			`class ImagefapGalleryExtractor(ImagefapExtractor):`
			`"""Extractor for image galleries from imagefap.com"""`
			`subcategory = "gallery"`
[imagefap] generalize URL patterns and add tests (#552) 5 years ago			`pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=\|gallery/\|pictures/)(\d+)"`
Handle beta.imagefap.com URLs (#552) 5 years ago
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`test = (`
[imagefap] update 'gallery' URLs (#3595) 2 years ago			`("https://www.imagefap.com/gallery/7102714", {`
update extractor test results 2 years ago			`"pattern": r"https://cdnh?\.imagefap\.com"`
update extractor test results 3 years ago			`r"/images/full/\d+/\d+/\d+\.jpg",`
[imagefap] extract 'categories' metadata and fix empty 'tags' 1 year ago			`"keyword": "bdcb75b1e4b9dddc718f3d66e1a58afa9d81a38b",`
[imagefap] update unit tests old gallery/image has been deleted 7 years ago			`"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",`
[imagefap] don't rely on image-count (fixes #9) 8 years ago			`}),`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`("https://www.imagefap.com/gallery/7876223", {`
update extractor test results 2 years ago			`"pattern": r"https://cdnh?\.imagefap\.com"`
update extractor test results 3 years ago			`r"/images/full/\d+/\d+/\d+\.jpg",`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`"keyword": {`
[imagefap] extract 'categories' metadata and fix empty 'tags' 1 year ago			`"categories": ["Asses", "Softcore", "Pornstars"],`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`"count": 44,`
[imagefap] extract 'description' metadata (#3905) 1 year ago			`"description": "",`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`"gallery_id": 7876223,`
			`"image_id": int,`
			`"num": int,`
			`"tags": ["big ass", "panties", "horny",`
			`"pussy", "exposed", "outdoor"],`
			`"title": "Kelsi Monroe in lingerie",`
			`"uploader": "BdRachel",`
			`},`
			`"count": 44,`
[imagefap] don't rely on image-count (fixes #9) 8 years ago			`}),`
[imagefap] extract 'description' metadata (#3905) 1 year ago			`# description (#3905)`
			`("https://www.imagefap.com/gallery/6180555", {`
			`"range": "1",`
			`"keyword": {`
[imagefap] extract 'categories' metadata and fix empty 'tags' 1 year ago			`"categories": ["Amateur", "Softcore", "Homemade"],`
[imagefap] extract 'description' metadata (#3905) 1 year ago			`"count": 36,`
			`"description": "Nude and dressed sluts showing off the goods",`
			`"gallery_id": 6180555,`
			`"image_id": int,`
			`"num": int,`
[imagefap] extract 'categories' metadata and fix empty 'tags' 1 year ago			`"tags": [] ,`
[imagefap] extract 'description' metadata (#3905) 1 year ago			`"title": "Dressed or Undressed MG*",`
			`"uploader": "splitopen",`
			`},`
			`}),`
[imagefap] update 'gallery' URLs (#3595) 2 years ago			`("https://www.imagefap.com/pictures/7102714"),`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`("https://www.imagefap.com/gallery.php?gid=7102714"),`
[imagefap] generalize URL patterns and add tests (#552) 5 years ago			`("https://beta.imagefap.com/gallery.php?gid=7102714"),`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`)`
[imagefap] add extractor 8 years ago
			`def __init__(self, match):`
propagate 'match' to base extractor constructor 6 years ago			`ImagefapExtractor.__init__(self, match)`
[imagefap] add extractor 8 years ago			`self.gid = match.group(1)`
consistent extractor naming scheme + docstrings 8 years ago			`self.image_id = ""`
[imagefap] add extractor 8 years ago
			`def items(self):`
[imagefap] update 'gallery' URLs (#3595) 2 years ago			`url = "{}/gallery/{}".format(self.root, self.gid)`
[imagefap] add extractor 8 years ago			`page = self.request(url).text`
			`data = self.get_job_metadata(page)`
			`yield Message.Directory, data`
[imagefap] don't rely on image-count (fixes #9) 8 years ago			`for url, image in self.get_images():`
[imagefap] add extractor 8 years ago			`data.update(image)`
[imagefap] deal with long filenames 8 years ago			`yield Message.Url, url, data`
[imagefap] add extractor 8 years ago
			`def get_job_metadata(self, page):`
			`"""Collect metadata for extractor-job"""`
[imagefap] fix metadata extraction 2 years ago			`extr = text.extract_from(page)`

			`data = {`
[imagefap] fix extraction also adds tags to gallery-metadata and converts suitable values to int 6 years ago			`"gallery_id": text.parse_int(self.gid),`
[imagefap] fix metadata extraction 2 years ago			`"uploader": extr("porn picture gallery by ", " to see hottest"),`
			`"title": text.unescape(extr("<title>", "<")),`
[imagefap] extract 'description' metadata (#3905) 1 year ago			`"description": text.unescape(extr(`
			`'id="gdesc_text"', '<').partition(">")[2]),`
[imagefap] extract 'categories' metadata and fix empty 'tags' 1 year ago			`"categories": text.split_html(extr(`
			`'id="cnt_cats"', '</div>'))[1::2],`
			`"tags": text.split_html(extr(`
			`'id="cnt_tags"', '</div>'))[1::2],`
[imagefap] fix metadata extraction 2 years ago			`"count": text.parse_int(extr(' 1 of ', ' pics"')),`
[imagefap] fix extraction also adds tags to gallery-metadata and converts suitable values to int 6 years ago			`}`
[imagefap] add extractor 8 years ago
[imagefap] fix metadata extraction 2 years ago			`self.image_id = extr('id="img_ed_', '"')`
			`self._count = data["count"]`

			`return data`

[imagefap] don't rely on image-count (fixes #9) 8 years ago			`def get_images(self):`
[imagefap] deal with long filenames 8 years ago			`"""Collect image-urls and -metadata"""`
[imagefap] use HTTPS + update test results 6 years ago			`url = "{}/photo/{}/".format(self.root, self.image_id)`
[imagefap] deal with long filenames 8 years ago			`params = {"gid": self.gid, "idx": 0, "partial": "true"}`
[imagefap] fix and improve gallery pagination (#3013) 2 years ago			`headers = {`
			`"Content-Type": "application/x-www-form-urlencoded",`
			`"X-Requested-With": "XMLHttpRequest",`
			`"Referer": "{}?pgid=&gid={}&page=0".format(url, self.image_id)`
			`}`

			`num = 0`
			`total = self._count`
[imagefap] add extractor 8 years ago			`while True:`
[imagefap] fix and improve gallery pagination (#3013) 2 years ago			`page = self.request(url, params=params, headers=headers).text`

			`cnt = 0`
			`for image_url in text.extract_iter(page, '<a href="', '"'):`
[imagefap] deal with long filenames 8 years ago			`num += 1`
[imagefap] fix and improve gallery pagination (#3013) 2 years ago			`cnt += 1`
			`data = text.nameext_from_url(image_url)`
[imagefap] adapt to new image URL format 5 years ago			`data["num"] = num`
			`data["image_id"] = text.parse_int(data["filename"])`
[imagefap] fix and improve gallery pagination (#3013) 2 years ago			`yield image_url, data`

[imagefap] fix infinite pagination loop (#3594) 2 years ago			`if not cnt or cnt < 24 and num >= total:`
[imagefap] fix and improve gallery pagination (#3013) 2 years ago			`return`
			`params["idx"] += cnt`
[imagefap] add single-image extractor 8 years ago

set 'archive_fmt' values These are going to be used to create an unique id for each image. 7 years ago			`class ImagefapImageExtractor(ImagefapExtractor):`
consistent extractor naming scheme + docstrings 8 years ago			`"""Extractor for single images from imagefap.com"""`
[imagefap] add single-image extractor 8 years ago			`subcategory = "image"`
[imagefap] generalize URL patterns and add tests (#552) 5 years ago			`pattern = BASE_PATTERN + r"/photo/(\d+)"`
			`test = (`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`("https://www.imagefap.com/photo/1962981893", {`
update extractor test results 2 years ago			`"pattern": r"https://cdnh?\.imagefap\.com"`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`r"/images/full/65/196/1962981893\.jpg",`
			`"keyword": {`
			`"date": "21/08/2014",`
			`"gallery_id": 7876223,`
			`"height": 1600,`
			`"image_id": 1962981893,`
			`"title": "Kelsi Monroe in lingerie",`
			`"uploader": "BdRachel",`
			`"width": 1066,`
			`},`
[imagefap] generalize URL patterns and add tests (#552) 5 years ago			`}),`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`("https://beta.imagefap.com/photo/1962981893"),`
[imagefap] generalize URL patterns and add tests (#552) 5 years ago			`)`
[imagefap] add single-image extractor 8 years ago
			`def __init__(self, match):`
propagate 'match' to base extractor constructor 6 years ago			`ImagefapExtractor.__init__(self, match)`
[imagefap] add single-image extractor 8 years ago			`self.image_id = match.group(1)`

			`def items(self):`
[imagefap] adapt to new image URL format 5 years ago			`url, data = self.get_image()`
[imagefap] add single-image extractor 8 years ago			`yield Message.Directory, data`
[imagefap] adapt to new image URL format 5 years ago			`yield Message.Url, url, data`
[imagefap] add single-image extractor 8 years ago
[imagefap] adapt to new image URL format 5 years ago			`def get_image(self):`
[imagefap] fix extraction also adds tags to gallery-metadata and converts suitable values to int 6 years ago			`url = "{}/photo/{}/".format(self.root, self.image_id)`
			`page = self.request(url).text`
[imagefap] adapt to new image URL format 5 years ago
			`info, pos = text.extract(`
			`page, '<script type="application/ld+json">', '</script>')`
			`image_id, pos = text.extract(`
			`page, 'id="imageid_input" value="', '"', pos)`
			`gallery_id, pos = text.extract(`
			`page, 'id="galleryid_input" value="', '"', pos)`
replace json.loads with direct calls to JSONDecoder.decode 2 years ago			`info = util.json_loads(info)`
[imagefap] adapt to new image URL format 5 years ago			`url = info["contentUrl"]`

			`return url, text.nameext_from_url(url, {`
[imagefap] add single-image extractor 8 years ago			`"title": text.unescape(info["name"]),`
			`"uploader": info["author"],`
			`"date": info["datePublished"],`
[imagefap] fix extraction also adds tags to gallery-metadata and converts suitable values to int 6 years ago			`"width": text.parse_int(info["width"]),`
			`"height": text.parse_int(info["height"]),`
[imagefap] adapt to new image URL format 5 years ago			`"gallery_id": text.parse_int(gallery_id),`
			`"image_id": text.parse_int(image_id),`
[imagefap] add single-image extractor 8 years ago			`})`

[imagefap] add user extractor 8 years ago
[imagefap] add 'folder' extractor (#3504) 2 years ago			`class ImagefapFolderExtractor(ImagefapExtractor):`
			`"""Extractor for imagefap user folders"""`
			`subcategory = "folder"`
			`pattern = (BASE_PATTERN + r"/(?:organizer/\|"`
			`r"(?:usergallery\.php\?user(id)?=([^&#]+)&"`
			`r"\|profile/([^/?#]+)/galleries\?)folderid=)(\d+\|-1)")`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`test = (`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`("https://www.imagefap.com/organizer/409758", {`
			`"pattern": r"https://www\.imagefap\.com/gallery/7876223",`
			`"url": "37822523e6e4a56feb9dea35653760c86b44ff89",`
			`"count": 1,`
adjust some tests 6 years ago			`}),`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`(("https://www.imagefap.com/usergallery.php"`
			`"?userid=1981976&folderid=409758"), {`
			`"url": "37822523e6e4a56feb9dea35653760c86b44ff89",`
			`}),`
			`(("https://www.imagefap.com/usergallery.php"`
			`"?user=BdRachel&folderid=409758"), {`
			`"url": "37822523e6e4a56feb9dea35653760c86b44ff89",`
			`}),`
			`("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {`
			`"pattern": ImagefapGalleryExtractor.pattern,`
			`"range": "1-40",`
			`}),`
			`(("https://www.imagefap.com/usergallery.php"`
			`"?userid=1981976&folderid=-1"), {`
			`"pattern": ImagefapGalleryExtractor.pattern,`
			`"range": "1-40",`
			`}),`
			`(("https://www.imagefap.com/usergallery.php"`
			`"?user=BdRachel&folderid=-1"), {`
			`"pattern": ImagefapGalleryExtractor.pattern,`
			`"range": "1-40",`
adjust some tests 6 years ago			`}),`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`)`
[imagefap] add user extractor 8 years ago
			`def __init__(self, match):`
propagate 'match' to base extractor constructor 6 years ago			`ImagefapExtractor.__init__(self, match)`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`self._id, user, profile, self.folder_id = match.groups()`
			`self.user = user or profile`
[imagefap] add user extractor 8 years ago
			`def items(self):`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`for gallery_id, name in self.galleries(self.folder_id):`
			`url = "{}/gallery/{}".format(self.root, gallery_id)`
			`data = {`
			`"gallery_id": gallery_id,`
			`"title" : text.unescape(name),`
			`"_extractor": ImagefapGalleryExtractor,`
			`}`
			`yield Message.Queue, url, data`
[imagefap] fix and improve folder extraction (#3013) 2 years ago
			`def galleries(self, folder_id):`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`"""Yield gallery IDs and titles of a folder"""`
[imagefap] fix and improve folder extraction (#3013) 2 years ago			`if folder_id == "-1":`
[imagefap] add 'folder' extractor (#3504) 2 years ago			`if self._id:`
			`url = "{}/usergallery.php?userid={}&folderid=-1".format(`
			`self.root, self.user)`
			`else:`
			`url = "{}/profile/{}/galleries?folderid=-1".format(`
			`self.root, self.user)`
[imagefap] fix and improve folder extraction (#3013) 2 years ago			`else:`
			`url = "{}/organizer/{}/".format(self.root, folder_id)`

[imagefap] add 'folder' extractor (#3504) 2 years ago			`params = {"page": 0}`
[imagefap] fix and improve folder extraction (#3013) 2 years ago			`while True:`
			`extr = text.extract_from(self.request(url, params=params).text)`
			`cnt = 0`

			`while True:`
			`gid = extr('<a href="/gallery/', '"')`
			`if not gid:`
			`break`
			`yield gid, extr("<b>", "<")`
			`cnt += 1`

[imagefap] fix pagination (#3013) 1 year ago			`if cnt < 20:`
[imagefap] fix and improve folder extraction (#3013) 2 years ago			`break`
			`params["page"] += 1`
[imagefap] add 'folder' extractor (#3504) 2 years ago

			`class ImagefapUserExtractor(ImagefapExtractor):`
			`"""Extractor for an imagefap user profile"""`
			`subcategory = "user"`
			`pattern = (BASE_PATTERN +`
			`r"/(?:profile(?:\.php\?user=\|/)([^/?#]+)(?:/galleries)?"`
			`r"\|usergallery\.php\?userid=(\d+))(?:$\|#)")`
			`test = (`
			`("https://www.imagefap.com/profile/BdRachel", {`
			`"pattern": ImagefapFolderExtractor.pattern,`
			`"count": ">= 18",`
			`}),`
			`("https://www.imagefap.com/usergallery.php?userid=1862791", {`
			`"pattern": r"https://www\.imagefap\.com"`
			`r"/profile/LucyRae/galleries\?folderid=-1",`
			`"count": 1,`
			`}),`
			`("https://www.imagefap.com/profile/BdRachel/galleries"),`
			`("https://www.imagefap.com/profile.php?user=BdRachel"),`
			`("https://beta.imagefap.com/profile.php?user=BdRachel"),`
			`)`

			`def __init__(self, match):`
			`ImagefapExtractor.__init__(self, match)`
			`self.user, self.user_id = match.groups()`

			`def items(self):`
			`data = {"_extractor": ImagefapFolderExtractor}`

			`for folder_id in self.folders():`
			`if folder_id == "-1":`
			`url = "{}/profile/{}/galleries?folderid=-1".format(`
			`self.root, self.user)`
			`else:`
			`url = "{}/organizer/{}/".format(self.root, folder_id)`
			`yield Message.Queue, url, data`

			`def folders(self):`
			`"""Return a list of folder IDs of a user"""`
			`if self.user:`
			`url = "{}/profile/{}/galleries".format(self.root, self.user)`
			`else:`
			`url = "{}/usergallery.php?userid={}".format(`
			`self.root, self.user_id)`

			`response = self.request(url)`
			`self.user = response.url.split("/")[-2]`
			`folders = text.extr(response.text, ' id="tgl_all" value="', '"')`
			`return folders.rstrip("\|").split("\|")`