gallery-dl/gallery_dl/extractor/slickpic.py

# -*- coding: utf-8 -*-

# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://www.slickpic.com/"""

from .common import Extractor, Message
from .. import text
import time

BASE_PATTERN = r"(?:https?://)?([\w-]+)\.slickpic\.com"


class SlickpicExtractor(Extractor):
    """Base class for slickpic extractors"""
    category = "slickpic"

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.user = match.group(1)
        self.root = "https://{}.slickpic.com".format(self.user)


class SlickpicAlbumExtractor(SlickpicExtractor):
    """Extractor for albums on slickpic.com"""
    subcategory = "album"
    directory_fmt = ("{category}", "{user[name]}",
                     "{album[id]} {album[title]}")
    filename_fmt = "{num:>03}_{id}{title:?_//}.{extension}"
    archive_fmt = "{id}"
    pattern = BASE_PATTERN + r"/albums/([^/?#]+)"
    example = "https://USER.slickpic.com/albums/TITLE/"

    def __init__(self, match):
        SlickpicExtractor.__init__(self, match)
        self.album = match.group(2)

    def items(self):
        data = self.metadata()
        imgs = self.images(data)

        data = {
            "album": {
                "id"   : text.parse_int(data["aid"]),
                "title": text.unescape(data["title"]),
            },
            "user": {
                "id"  : text.parse_int(data["uid"]),
                "name": text.unescape(data["user"]),
                "nick": self.user
            },
            "count": len(imgs),
        }

        yield Message.Directory, data
        for num, img in enumerate(imgs, 1):
            url = img["url_rsz"] + "/o/" + img["fname"]
            img = text.nameext_from_url(img["fname"], {
                "url"        : url,
                "num"        : num,
                "id"         : text.parse_int(img["id"]),
                "width"      : text.parse_int(img["width"]),
                "height"     : text.parse_int(img["height"]),
                "title"      : img["title"],
                "description": img["descr"],
            })
            img.update(data)
            yield Message.Url, url, img

    def metadata(self):
        url = "{}/albums/{}/?wallpaper".format(self.root, self.album)
        extr = text.extract_from(self.request(url).text)

        title = text.unescape(extr("<title>", "</title>"))
        title, _, user = title.rpartition(" by ")

        return {
            "title": title,
            "user" : user,
            "tk"   : extr('tk = "', '"'),
            "shd"  : extr('shd = "', '"'),
            "aid"  : extr('data-aid="', '"', ),
            "uid"  : extr('data-uid="', '"', ),
        }

    def images(self, data):
        url = self.root + "/xhr/photo/get/list"
        data = {
            "tm"    : time.time(),
            "tk"    : data["tk"],
            "shd"   : data["shd"],
            "aid"   : data["aid"],
            "uid"   : data["uid"],
            "col"   : "0",
            "sys"   : self.album,
            "vw"    : "1280",
            "vh"    : "1024",
            "skey"  : "",
            "viewer": "false",
            "pub"   : "1",
            "sng"   : "0",
            "whq"   : "1",
        }
        return self.request(url, method="POST", data=data).json()["list"]


class SlickpicUserExtractor(SlickpicExtractor):
    subcategory = "user"
    pattern = BASE_PATTERN + r"(?:/gallery)?/?(?:$|[?#])"
    example = "https://USER.slickpic.com/"

    def items(self):
        page = self.request(self.root + "/gallery?viewer").text
        data = {"_extractor": SlickpicAlbumExtractor}
        base = self.root + "/albums/"

        for album in text.extract_iter(page, 'href="' + base, '"'):
            yield Message.Queue, base + album, data
[slickpic] add album extractor (#249) 5 years ago			`# -- coding: utf-8 --`

remove test results in extractor modules and add generic example URLs 1 year ago			`# Copyright 2019-2023 Mike Fährmann`
[slickpic] add album extractor (#249) 5 years ago			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

			`"""Extractors for https://www.slickpic.com/"""`

			`from .common import Extractor, Message`
			`from .. import text`
			`import time`

generic extractor (#735) * Generic extractor, see issue #683 * Fix failed test_names test, no subcategory needed * Prefix directory_fmt with "generic" * Relax regex (would break some urls) * Flake8 compliance * pattern: don't require a scheme This fixes a bug when we force the generic extractor on urls without a scheme (that are allowed by all other extractors). * Fix using g: and r: on urls without http(s) scheme Almost all extractors accept urls without an initial http(s) scheme. Many extractors also allow for generic subdomains in their "pattern" variable; some of them implement this with the regex character class "[^.]+" (everything but a dot). This leads to a problem when the extractor is given a url starting with g: or r: (to force using the generic or recursive extractor) and without the http(s) scheme: e.g. with "r:foobar.tumblr.com" the "r:" is wrongly considered part of the subdomain. This commit fixes the bug, replacing the too generic "[^.]+" with the more specific "[\w-]+" (letters, digits and "-", the only characters allowed in domain names), which is already used by some extractors. * Relax imageurl_pattern_ext: allow relative urls * First round of small suggested changes * Support image urls starting with "//" * self.baseurl: remove trailing slash * Relax regexp (didn't catch some image urls) * Some fixes and cleanup * Fix domain pattern; option to enable extractor Fixed the domain section for "pattern", to pass "test_add" and "test_add_module" tests. Added the "enabled" configuration option (default False) to enable the generic extractor. Using "g(eneric):URL" forces using the extractor. 3 years ago			`BASE_PATTERN = r"(?:https?://)?([\w-]+)\.slickpic\.com"`
[slickpic] add album extractor (#249) 5 years ago

			`class SlickpicExtractor(Extractor):`
			`"""Base class for slickpic extractors"""`
			`category = "slickpic"`

			`def __init__(self, match):`
			`Extractor.__init__(self, match)`
			`self.user = match.group(1)`
			`self.root = "https://{}.slickpic.com".format(self.user)`


			`class SlickpicAlbumExtractor(SlickpicExtractor):`
			`"""Extractor for albums on slickpic.com"""`
			`subcategory = "album"`
			`directory_fmt = ("{category}", "{user[name]}",`
			`"{album[id]} {album[title]}")`
			`filename_fmt = "{num:>03}_{id}{title:?_//}.{extension}"`
			`archive_fmt = "{id}"`
remove '&' from URL patterns '/?&#' -> '/?#' and '?&#' -> '?#' According to https://www.ietf.org/rfc/rfc3986.txt, URLs are "organized hierarchically" by using "the slash ("/"), question mark ("?"), and number sign ("#") characters to delimit components" 4 years ago			`pattern = BASE_PATTERN + r"/albums/([^/?#]+)"`
remove test results in extractor modules and add generic example URLs 1 year ago			`example = "https://USER.slickpic.com/albums/TITLE/"`
[slickpic] add album extractor (#249) 5 years ago
			`def __init__(self, match):`
			`SlickpicExtractor.__init__(self, match)`
			`self.album = match.group(2)`

			`def items(self):`
			`data = self.metadata()`
			`imgs = self.images(data)`

			`data = {`
			`"album": {`
			`"id" : text.parse_int(data["aid"]),`
			`"title": text.unescape(data["title"]),`
			`},`
			`"user": {`
			`"id" : text.parse_int(data["uid"]),`
			`"name": text.unescape(data["user"]),`
			`"nick": self.user`
			`},`
			`"count": len(imgs),`
			`}`

			`yield Message.Directory, data`
			`for num, img in enumerate(imgs, 1):`
			`url = img["url_rsz"] + "/o/" + img["fname"]`
			`img = text.nameext_from_url(img["fname"], {`
			`"url" : url,`
			`"num" : num,`
			`"id" : text.parse_int(img["id"]),`
			`"width" : text.parse_int(img["width"]),`
			`"height" : text.parse_int(img["height"]),`
			`"title" : img["title"],`
			`"description": img["descr"],`
			`})`
			`img.update(data)`
			`yield Message.Url, url, img`

			`def metadata(self):`
			`url = "{}/albums/{}/?wallpaper".format(self.root, self.album)`
			`extr = text.extract_from(self.request(url).text)`

			`title = text.unescape(extr("<title>", "</title>"))`
			`title, _, user = title.rpartition(" by ")`

			`return {`
			`"title": title,`
			`"user" : user,`
			`"tk" : extr('tk = "', '"'),`
			`"shd" : extr('shd = "', '"'),`
			`"aid" : extr('data-aid="', '"', ),`
			`"uid" : extr('data-uid="', '"', ),`
			`}`

			`def images(self, data):`
			`url = self.root + "/xhr/photo/get/list"`
			`data = {`
			`"tm" : time.time(),`
			`"tk" : data["tk"],`
			`"shd" : data["shd"],`
			`"aid" : data["aid"],`
			`"uid" : data["uid"],`
			`"col" : "0",`
			`"sys" : self.album,`
			`"vw" : "1280",`
			`"vh" : "1024",`
			`"skey" : "",`
			`"viewer": "false",`
			`"pub" : "1",`
			`"sng" : "0",`
			`"whq" : "1",`
			`}`
			`return self.request(url, method="POST", data=data).json()["list"]`
[slickpic] add user extractor (#249) 5 years ago

			`class SlickpicUserExtractor(SlickpicExtractor):`
			`subcategory = "user"`
			`pattern = BASE_PATTERN + r"(?:/gallery)?/?(?:$\|[?#])"`
remove test results in extractor modules and add generic example URLs 1 year ago			`example = "https://USER.slickpic.com/"`
[slickpic] add user extractor (#249) 5 years ago
			`def items(self):`
			`page = self.request(self.root + "/gallery?viewer").text`
			`data = {"_extractor": SlickpicAlbumExtractor}`
			`base = self.root + "/albums/"`

			`for album in text.extract_iter(page, 'href="' + base, '"'):`
			`yield Message.Queue, base + album, data`