[smugmug] added image and album extractor

just some initial code that still requires a lot of work ... TODO: - folders - old-style albums (which are nearly all of them ...) - images from users - OAuth It could also happen that the API credentials used will become invalid whenever my 14 day trial period ends (7 days remaining), but that would just require users to supply their own.
6 years ago · 16e014baaa
parent d96b3474e5
commit 16e014baaa
8 changed files with 229 additions and 15 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,6 +1,11 @@
 # Changelog
 ## Unreleased
 - Added support for:
  - `smugmug` - https://www.smugmug.com/
 - Added title information for `mangadex` chapters
 - Improved the `pinterest` API implementation (#83)
 - Removed `gomanga` and `puremashiro`
 ## 1.3.4 - 2018-04-20
 - Added support for custom OAuth2 credentials for `pinterest`
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@ -68,6 +68,7 @@ Sea Otter Scans      https://reader.seaotterscans.com/   Chapters, Manga
 Sen Manga            http://raw.senmanga.com/            Chapters
 Sense-Scans          http://sensescans.com/              Chapters, Manga
 SlideShare           https://www.slideshare.net/         Presentations
 SmugMug              https://www.smugmug.com/            Albums, individual Images, Nodes
 Subapics             https://subapics.com/               Chapters, Manga
 The /b/ Archive      https://thebarchive.com/            Threads
 Tumblr               https://www.tumblr.com/             Images from Users, Likes, Posts, Tag-Searches      Optional (OAuth)
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -75,6 +75,7 @@ modules = [
    "senmanga",
    "sensescans",
    "slideshare",
    "smugmug",
    "subapics",
    "thebarchive",
    "tumblr",
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@ -21,10 +21,9 @@ class DirectlinkExtractor(Extractor):
               r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
               r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$"]
    test = [
-        (("https://photos.smugmug.com/The-World/Hawaii/"
+        (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
-          "i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), {
+            "url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
-            "url": "32ee1045881e17ef3f13a9958595afa42421ec6c",
+            "keyword": "66bce3a0a6872d8497e1984eb49d54a3ed0d3d5e",
            "keyword": "2427b68c14006489df1776bb1bcd3bc24be25e10",
        }),
        # more complex example
        ("https://example.org/path/file.webm?que=1&ry=2#fragment", {
--- a/gallery_dl/extractor/powermanga.py
+++ b/gallery_dl/extractor/powermanga.py
@ -18,7 +18,7 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor):
    test = [(("https://read.powermanga.org"
              "/read/one_piece_digital_colour_comics/en/0/75/"), {
        "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
-        "keyword": "9bf211d435060d1e38d3d13e4aaaa5a87381bfad",
+        "keyword": "9985bcb78491dff9c725958b06bba606be51b6d3",
    })]
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@ -0,0 +1,214 @@
 # -*- coding: utf-8 -*-
 # Copyright 2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 """Extract images from https://www.smugmug.com/"""
 from .common import Extractor, Message
 from .. import text, util, exception
 from ..cache import memcache
 BASE_PATTERN = (
    r"(?:smugmug:(?:https?://)?([^/]+)|"
    r"(?:https?://)?([^.]+\.smugmug\.com))")
 class SmugmugExtractor(Extractor):
    """Base class for smugmug extractors"""
    category = "smugmug"
    filename_fmt = "{category}_{Owner[Name]}_{Image[ImageKey]}.{extension}"
    def __init__(self):
        Extractor.__init__(self)
        self.api = SmugmugAPI(self)
    def update_image(self, image):
        if "ArchivedUri" not in image:
            largest = self.api.image_largest(image["ImageKey"])
            for key in ("Url", "Width", "Height", "MD5", "Size"):
                if key in largest:
                    image[key] = largest[key]
            return image["Url"], image
        return image["ArchivedUri"], image
 class SmugmugAlbumExtractor(SmugmugExtractor):
    subcategory = "album"
    directory_fmt = ["{category}", "{Owner[Name]}", "{Album[Name]}"]
    archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
    pattern = [r"smugmug:album:([^:]+)$"]
    test = [("smugmug:album:xgkb4C", {
        "url": "eb6133445064115ad83d32cbc6472520a2d24d53",
        "content": "864f6953cb04121290407a579611bc5087d117ee",
    })]
    def __init__(self, match):
        SmugmugExtractor.__init__(self)
        self.album_id = match.group(1)
    def items(self):
        album = self.api.album(self.album_id)
        images = self.api.album_images(self.album_id)
        username = album["Uris"]["User"]["Uri"].rpartition("/")[2]
        owner = self.api.user(username)
        data = {
            "Album": album,
            "Owner": owner,
        }
        yield Message.Version, 1
        yield Message.Directory, data
        for image in images:
            url, image = self.update_image(image)
            data["Image"] = image
            yield Message.Url, url, text.nameext_from_url(url, data)
 class SmugmugImageExtractor(SmugmugExtractor):
    subcategory = "image"
    directory_fmt = ["{category}", "{Owner[Name]}"]
    archive_fmt = "{Image[ImageKey]}"
    pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"]
    test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", {
        "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4",
        "keyword": "d53df829d493ec3e31b8fe300872beb968812bfd",
        "content": "626fe50d25fe49beeda15e116938db36e163c01f",
    })]
    def __init__(self, match):
        SmugmugExtractor.__init__(self)
        self.image_id = match.group(3)
    def items(self):
        image = self.api.image(self.image_id)
        username = image["Uris"]["ImageOwner"]["Uri"].rpartition("/")[2]
        owner = self.api.user(username)
        url, image = self.update_image(image)
        data = {
            "Image": image,
            "Owner": owner,
        }
        del image["Uris"]
        del owner["Uris"]
        text.nameext_from_url(url, data)
        yield Message.Version, 1
        yield Message.Directory, data
        yield Message.Url, url, data
 class SmugmugNodeExtractor(SmugmugExtractor):
    """ """
    subcategory = "node"
    directory_fmt = ["{category}"]
    archive_fmt = "n_{Node[NodeID]}_{Image[ImageID]}"
    pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"]
    test = [("https://mikf.smugmug.com/Test/n-xnNH3s", {
        "pattern": "^smugmug:album:xgkb4C$",
    })]
    def __init__(self, match):
        SmugmugExtractor.__init__(self)
        self.node_id = match.group(3)
    def items(self):
        yield Message.Version, 1
        data = self.api.node(self.node_id)
        if data["Type"] == "Album":
            album_id = data["Uris"]["Album"]["Uri"].rpartition("/")[2]
            yield Message.Queue, "smugmug:album:" + album_id, data
        # ...
 class SmugmugAPI():
    """Minimal interface for the smugmug API v2"""
    API_URL = "https://api.smugmug.com/api/v2/"
    API_KEY = "DFqxg4jf7GrtsQ5PnbNB8899zKfnDrdK"
    API_SECRET = ("fknV35p9r9BwZC4XbTzvCXpcSJRdD83S"
                  "9nMFQm25ndGBzNPnwRDbRnnVBvqt4xTq")
    HEADERS = {"Accept": "application/json"}
    def __init__(self, extractor):
        api_key = extractor.config("api-key", self.API_KEY)
        api_secret = extractor.config("api-secret", self.API_SECRET)
        token = extractor.config("access-token")
        token_secret = extractor.config("access-token-secret")
        if api_key and api_secret and token and token_secret:
            self.session = util.OAuthSession(
                extractor.session,
                api_key, api_secret,
                token, token_secret,
            )
            self.api_key = None
        else:
            self.session = extractor.session
            self.api_key = api_key
    def album(self, album_id):
        return self._call("album/" + album_id)["Album"]
    def album_images(self, album_id):
        return self._pagination("album/" + album_id + "!images")
    def image(self, image_id):
        return self._call("image/" + image_id)["Image"]
    def image_largest(self, image_id):
        endpoint = "image/" + image_id + "!largestimage"
        return self._call(endpoint)["LargestImage"]
    def image_sizes(self, image_id):
        return self._call("image/" + image_id + "!sizedetails")
    def node(self, node_id):
        return self._call("node/" + node_id)["Node"]
    @memcache(keyarg=1)
    def user(self, username):
        return self._call("user/" + username)["User"]
    def _call(self, endpoint, params=None):
        url = self.API_URL + endpoint
        params = params or {}
        if self.api_key:
            params["APIKey"] = self.api_key
        response = self.session.get(url, params=params, headers=self.HEADERS)
        data = response.json()
        if 200 <= data["Code"] < 400:
            return data["Response"]
        if data["Code"] == 404:
            raise exception.NotFoundError()
        if data["Code"] == 429:
            self.log.error("Rate limit reached")
            raise exception.StopExtraction()
    def _pagination(self, endpoint):
        params = {
            "start": 1,
            "count": 100,
        }
        while True:
            response = self._call(endpoint, params)
            obj = response[response["Locator"]]
            if isinstance(obj, list):
                yield from obj
            else:
                yield obj
            if "NextPage" not in response["Pages"]:
                return
            params["start"] += params["count"]
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@ -536,11 +536,11 @@ class OAuthSession():
        self.params["oauth_signature_method"] = "HMAC-SHA1"
        self.params["oauth_version"] = "1.0"
-    def get(self, url, params):
+    def get(self, url, params, **kwargs):
        params.update(self.params)
        params["oauth_nonce"] = self.nonce(16)
        params["oauth_timestamp"] = int(time.time())
-        return self.session.get(url + self.sign(url, params))
+        return self.session.get(url + self.sign(url, params), **kwargs)
    def sign(self, url, params):
        """Generate 'oauth_signature' value and return query string"""
--- a/scripts/build_supportedsites.py
+++ b/scripts/build_supportedsites.py
@ -5,7 +5,7 @@ import os.path
 ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.insert(0, os.path.realpath(ROOTDIR))
-import gallery_dl.extractor
+import gallery_dl.extractor  # noqa
 CATEGORY_MAP = {
@ -20,7 +20,6 @@ CATEGORY_MAP = {
    "e621"           : "e621",
    "exhentai"       : "ExHentai",
    "fallenangels"   : "Fallen Angels Scans",
    "gomanga"        : "GoManga",
    "hbrowse"        : "HBrowse",
    "hentai2read"    : "Hentai2Read",
    "hentaifoundry"  : "Hentai Foundry",
@ -30,14 +29,11 @@ CATEGORY_MAP = {
    "imagebam"       : "ImageBam",
    "imagefap"       : "ImageFap",
    "imgbox"         : "imgbox",
    "imgchili"       : "imgChili",
    "imgth"          : "imgth",
    "imgur"          : "imgur",
    "jaiminisbox"    : "Jaimini's Box",
    "kireicake"      : "Kirei Cake",
    "kisscomic"      : "KissComic",
    "kissmanga"      : "KissManga",
    "loveisover"     : "Love is Over Archive",
    "mangadex"       : "MangaDex",
    "mangafox"       : "Manga Fox",
    "mangahere"      : "Manga Here",
@ -48,7 +44,6 @@ CATEGORY_MAP = {
    "nyafuu"         : "Nyafuu Archive",
    "paheal"         : "rule #34",
    "powermanga"     : "PowerManga",
    "puremashiro"    : "Pure Mashiro",
    "readcomiconline": "Read Comic Online",
    "rbt"            : "RebeccaBlackTech",
    "rule34"         : "Rule 34",
@ -58,10 +53,9 @@ CATEGORY_MAP = {
    "senmanga"       : "Sen Manga",
    "sensescans"     : "Sense-Scans",
    "slideshare"     : "SlideShare",
-    "spectrumnexus"  : "Spectrum Nexus",
+    "smugmug"        : "SmugMug",
    "thebarchive"    : "The /b/ Archive",
    "worldthree"     : "World Three",
    "yeet"           : "YEET Archive",
    "xvideos"        : "XVideos",
 }