From 16e014baaae566323610c304a267e19b64e8dc20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sun, 29 Apr 2018 21:27:25 +0200
Subject: [PATCH] [smugmug] added image and album extractor

just some initial code that still requires a lot of work ...

TODO:
- folders
- old-style albums (which are nearly all of them ...)
- images from users
- OAuth

It could also happen that the API credentials used will become invalid
whenever my 14 day trial period ends (7 days remaining), but that
would just require users to supply their own.
---
 CHANGELOG.md                       |   5 +
 docs/supportedsites.rst            |   1 +
 gallery_dl/extractor/__init__.py   |   1 +
 gallery_dl/extractor/directlink.py |   7 +-
 gallery_dl/extractor/powermanga.py |   2 +-
 gallery_dl/extractor/smugmug.py    | 214 +++++++++++++++++++++++++++++
 gallery_dl/util.py                 |   4 +-
 scripts/build_supportedsites.py    |  10 +-
 8 files changed, 229 insertions(+), 15 deletions(-)
 create mode 100644 gallery_dl/extractor/smugmug.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 844de878..89481b61 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,11 @@
 # Changelog
 
 ## Unreleased
+- Added support for:
+  - `smugmug` - https://www.smugmug.com/
+- Added title information for `mangadex` chapters
+- Improved the `pinterest` API implementation (#83)
+- Removed `gomanga` and `puremashiro`
 
 ## 1.3.4 - 2018-04-20
 - Added support for custom OAuth2 credentials for `pinterest`
diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst
index eaa14a72..ba5f8256 100644
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@@ -68,6 +68,7 @@ Sea Otter Scans      https://reader.seaotterscans.com/   Chapters, Manga
 Sen Manga            http://raw.senmanga.com/            Chapters
 Sense-Scans          http://sensescans.com/              Chapters, Manga
 SlideShare           https://www.slideshare.net/         Presentations
+SmugMug              https://www.smugmug.com/            Albums, individual Images, Nodes
 Subapics             https://subapics.com/               Chapters, Manga
 The /b/ Archive      https://thebarchive.com/            Threads
 Tumblr               https://www.tumblr.com/             Images from Users, Likes, Posts, Tag-Searches      Optional (OAuth)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 3787f8d8..59213681 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -75,6 +75,7 @@ modules = [
     "senmanga",
     "sensescans",
     "slideshare",
+    "smugmug",
     "subapics",
     "thebarchive",
     "tumblr",
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index b9e0c868..5cb76f34 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -21,10 +21,9 @@ class DirectlinkExtractor(Extractor):
                r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
                r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$"]
     test = [
-        (("https://photos.smugmug.com/The-World/Hawaii/"
-          "i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), {
-            "url": "32ee1045881e17ef3f13a9958595afa42421ec6c",
-            "keyword": "2427b68c14006489df1776bb1bcd3bc24be25e10",
+        (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
+            "url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
+            "keyword": "66bce3a0a6872d8497e1984eb49d54a3ed0d3d5e",
         }),
         # more complex example
         ("https://example.org/path/file.webm?que=1&ry=2#fragment", {
diff --git a/gallery_dl/extractor/powermanga.py b/gallery_dl/extractor/powermanga.py
index 281e20db..3b3be1a1 100644
--- a/gallery_dl/extractor/powermanga.py
+++ b/gallery_dl/extractor/powermanga.py
@@ -18,7 +18,7 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor):
     test = [(("https://read.powermanga.org"
               "/read/one_piece_digital_colour_comics/en/0/75/"), {
         "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
-        "keyword": "9bf211d435060d1e38d3d13e4aaaa5a87381bfad",
+        "keyword": "9985bcb78491dff9c725958b06bba606be51b6d3",
     })]
 
 
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
new file mode 100644
index 00000000..23b29150
--- /dev/null
+++ b/gallery_dl/extractor/smugmug.py
@@ -0,0 +1,214 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images from https://www.smugmug.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import memcache
+
+BASE_PATTERN = (
+    r"(?:smugmug:(?:https?://)?([^/]+)|"
+    r"(?:https?://)?([^.]+\.smugmug\.com))")
+
+
+class SmugmugExtractor(Extractor):
+    """Base class for smugmug extractors"""
+    category = "smugmug"
+    filename_fmt = "{category}_{Owner[Name]}_{Image[ImageKey]}.{extension}"
+
+    def __init__(self):
+        Extractor.__init__(self)
+        self.api = SmugmugAPI(self)
+
+    def update_image(self, image):
+        if "ArchivedUri" not in image:
+            largest = self.api.image_largest(image["ImageKey"])
+            for key in ("Url", "Width", "Height", "MD5", "Size"):
+                if key in largest:
+                    image[key] = largest[key]
+            return image["Url"], image
+        return image["ArchivedUri"], image
+
+
+class SmugmugAlbumExtractor(SmugmugExtractor):
+    subcategory = "album"
+    directory_fmt = ["{category}", "{Owner[Name]}", "{Album[Name]}"]
+    archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
+    pattern = [r"smugmug:album:([^:]+)$"]
+    test = [("smugmug:album:xgkb4C", {
+        "url": "eb6133445064115ad83d32cbc6472520a2d24d53",
+        "content": "864f6953cb04121290407a579611bc5087d117ee",
+    })]
+
+    def __init__(self, match):
+        SmugmugExtractor.__init__(self)
+        self.album_id = match.group(1)
+
+    def items(self):
+        album = self.api.album(self.album_id)
+        images = self.api.album_images(self.album_id)
+        username = album["Uris"]["User"]["Uri"].rpartition("/")[2]
+        owner = self.api.user(username)
+
+        data = {
+            "Album": album,
+            "Owner": owner,
+        }
+
+        yield Message.Version, 1
+        yield Message.Directory, data
+
+        for image in images:
+            url, image = self.update_image(image)
+            data["Image"] = image
+            yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class SmugmugImageExtractor(SmugmugExtractor):
+    subcategory = "image"
+    directory_fmt = ["{category}", "{Owner[Name]}"]
+    archive_fmt = "{Image[ImageKey]}"
+    pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"]
+    test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", {
+        "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4",
+        "keyword": "d53df829d493ec3e31b8fe300872beb968812bfd",
+        "content": "626fe50d25fe49beeda15e116938db36e163c01f",
+    })]
+
+    def __init__(self, match):
+        SmugmugExtractor.__init__(self)
+        self.image_id = match.group(3)
+
+    def items(self):
+        image = self.api.image(self.image_id)
+        username = image["Uris"]["ImageOwner"]["Uri"].rpartition("/")[2]
+        owner = self.api.user(username)
+
+        url, image = self.update_image(image)
+
+        data = {
+            "Image": image,
+            "Owner": owner,
+        }
+        del image["Uris"]
+        del owner["Uris"]
+        text.nameext_from_url(url, data)
+
+        yield Message.Version, 1
+        yield Message.Directory, data
+        yield Message.Url, url, data
+
+
+class SmugmugNodeExtractor(SmugmugExtractor):
+    """ """
+    subcategory = "node"
+    directory_fmt = ["{category}"]
+    archive_fmt = "n_{Node[NodeID]}_{Image[ImageID]}"
+    pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"]
+    test = [("https://mikf.smugmug.com/Test/n-xnNH3s", {
+        "pattern": "^smugmug:album:xgkb4C$",
+    })]
+
+    def __init__(self, match):
+        SmugmugExtractor.__init__(self)
+        self.node_id = match.group(3)
+
+    def items(self):
+        yield Message.Version, 1
+
+        data = self.api.node(self.node_id)
+        if data["Type"] == "Album":
+            album_id = data["Uris"]["Album"]["Uri"].rpartition("/")[2]
+            yield Message.Queue, "smugmug:album:" + album_id, data
+        # ...
+
+
+class SmugmugAPI():
+    """Minimal interface for the smugmug API v2"""
+    API_URL = "https://api.smugmug.com/api/v2/"
+    API_KEY = "DFqxg4jf7GrtsQ5PnbNB8899zKfnDrdK"
+    API_SECRET = ("fknV35p9r9BwZC4XbTzvCXpcSJRdD83S"
+                  "9nMFQm25ndGBzNPnwRDbRnnVBvqt4xTq")
+    HEADERS = {"Accept": "application/json"}
+
+    def __init__(self, extractor):
+        api_key = extractor.config("api-key", self.API_KEY)
+        api_secret = extractor.config("api-secret", self.API_SECRET)
+        token = extractor.config("access-token")
+        token_secret = extractor.config("access-token-secret")
+
+        if api_key and api_secret and token and token_secret:
+            self.session = util.OAuthSession(
+                extractor.session,
+                api_key, api_secret,
+                token, token_secret,
+            )
+            self.api_key = None
+        else:
+            self.session = extractor.session
+            self.api_key = api_key
+
+    def album(self, album_id):
+        return self._call("album/" + album_id)["Album"]
+
+    def album_images(self, album_id):
+        return self._pagination("album/" + album_id + "!images")
+
+    def image(self, image_id):
+        return self._call("image/" + image_id)["Image"]
+
+    def image_largest(self, image_id):
+        endpoint = "image/" + image_id + "!largestimage"
+        return self._call(endpoint)["LargestImage"]
+
+    def image_sizes(self, image_id):
+        return self._call("image/" + image_id + "!sizedetails")
+
+    def node(self, node_id):
+        return self._call("node/" + node_id)["Node"]
+
+    @memcache(keyarg=1)
+    def user(self, username):
+        return self._call("user/" + username)["User"]
+
+    def _call(self, endpoint, params=None):
+        url = self.API_URL + endpoint
+        params = params or {}
+        if self.api_key:
+            params["APIKey"] = self.api_key
+
+        response = self.session.get(url, params=params, headers=self.HEADERS)
+        data = response.json()
+
+        if 200 <= data["Code"] < 400:
+            return data["Response"]
+
+        if data["Code"] == 404:
+            raise exception.NotFoundError()
+        if data["Code"] == 429:
+            self.log.error("Rate limit reached")
+            raise exception.StopExtraction()
+
+    def _pagination(self, endpoint):
+        params = {
+            "start": 1,
+            "count": 100,
+        }
+        while True:
+            response = self._call(endpoint, params)
+
+            obj = response[response["Locator"]]
+            if isinstance(obj, list):
+                yield from obj
+            else:
+                yield obj
+
+            if "NextPage" not in response["Pages"]:
+                return
+            params["start"] += params["count"]
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 8341e084..d566bfca 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -536,11 +536,11 @@ class OAuthSession():
         self.params["oauth_signature_method"] = "HMAC-SHA1"
         self.params["oauth_version"] = "1.0"
 
-    def get(self, url, params):
+    def get(self, url, params, **kwargs):
         params.update(self.params)
         params["oauth_nonce"] = self.nonce(16)
         params["oauth_timestamp"] = int(time.time())
-        return self.session.get(url + self.sign(url, params))
+        return self.session.get(url + self.sign(url, params), **kwargs)
 
     def sign(self, url, params):
         """Generate 'oauth_signature' value and return query string"""
diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py
index 398d3623..594f58d2 100755
--- a/scripts/build_supportedsites.py
+++ b/scripts/build_supportedsites.py
@@ -5,7 +5,7 @@ import os.path
 
 ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.insert(0, os.path.realpath(ROOTDIR))
-import gallery_dl.extractor
+import gallery_dl.extractor  # noqa
 
 
 CATEGORY_MAP = {
@@ -20,7 +20,6 @@ CATEGORY_MAP = {
     "e621"           : "e621",
     "exhentai"       : "ExHentai",
     "fallenangels"   : "Fallen Angels Scans",
-    "gomanga"        : "GoManga",
     "hbrowse"        : "HBrowse",
     "hentai2read"    : "Hentai2Read",
     "hentaifoundry"  : "Hentai Foundry",
@@ -30,14 +29,11 @@ CATEGORY_MAP = {
     "imagebam"       : "ImageBam",
     "imagefap"       : "ImageFap",
     "imgbox"         : "imgbox",
-    "imgchili"       : "imgChili",
     "imgth"          : "imgth",
     "imgur"          : "imgur",
     "jaiminisbox"    : "Jaimini's Box",
     "kireicake"      : "Kirei Cake",
-    "kisscomic"      : "KissComic",
     "kissmanga"      : "KissManga",
-    "loveisover"     : "Love is Over Archive",
     "mangadex"       : "MangaDex",
     "mangafox"       : "Manga Fox",
     "mangahere"      : "Manga Here",
@@ -48,7 +44,6 @@ CATEGORY_MAP = {
     "nyafuu"         : "Nyafuu Archive",
     "paheal"         : "rule #34",
     "powermanga"     : "PowerManga",
-    "puremashiro"    : "Pure Mashiro",
     "readcomiconline": "Read Comic Online",
     "rbt"            : "RebeccaBlackTech",
     "rule34"         : "Rule 34",
@@ -58,10 +53,9 @@ CATEGORY_MAP = {
     "senmanga"       : "Sen Manga",
     "sensescans"     : "Sense-Scans",
     "slideshare"     : "SlideShare",
-    "spectrumnexus"  : "Spectrum Nexus",
+    "smugmug"        : "SmugMug",
     "thebarchive"    : "The /b/ Archive",
     "worldthree"     : "World Three",
-    "yeet"           : "YEET Archive",
     "xvideos"        : "XVideos",
 }