From 16e014baaae566323610c304a267e19b64e8dc20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 29 Apr 2018 21:27:25 +0200 Subject: [PATCH] [smugmug] added image and album extractor just some initial code that still requires a lot of work ... TODO: - folders - old-style albums (which are nearly all of them ...) - images from users - OAuth It could also happen that the API credentials used will become invalid whenever my 14 day trial period ends (7 days remaining), but that would just require users to supply their own. --- CHANGELOG.md | 5 + docs/supportedsites.rst | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/directlink.py | 7 +- gallery_dl/extractor/powermanga.py | 2 +- gallery_dl/extractor/smugmug.py | 214 +++++++++++++++++++++++++++++ gallery_dl/util.py | 4 +- scripts/build_supportedsites.py | 10 +- 8 files changed, 229 insertions(+), 15 deletions(-) create mode 100644 gallery_dl/extractor/smugmug.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 844de878..89481b61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog ## Unreleased +- Added support for: + - `smugmug` - https://www.smugmug.com/ +- Added title information for `mangadex` chapters +- Improved the `pinterest` API implementation (#83) +- Removed `gomanga` and `puremashiro` ## 1.3.4 - 2018-04-20 - Added support for custom OAuth2 credentials for `pinterest` diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index eaa14a72..ba5f8256 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -68,6 +68,7 @@ Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sen Manga http://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/ Chapters, Manga SlideShare https://www.slideshare.net/ Presentations +SmugMug https://www.smugmug.com/ Albums, individual Images, Nodes Subapics https://subapics.com/ Chapters, Manga The /b/ Archive https://thebarchive.com/ Threads Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3787f8d8..59213681 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -75,6 +75,7 @@ modules = [ "senmanga", "sensescans", "slideshare", + "smugmug", "subapics", "thebarchive", "tumblr", diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index b9e0c868..5cb76f34 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -21,10 +21,9 @@ class DirectlinkExtractor(Extractor): r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))" r"(?:\?(?P[^/?#]*))?(?:#(?P.*))?$"] test = [ - (("https://photos.smugmug.com/The-World/Hawaii/" - "i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), { - "url": "32ee1045881e17ef3f13a9958595afa42421ec6c", - "keyword": "2427b68c14006489df1776bb1bcd3bc24be25e10", + (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), { + "url": "18c5d00077332e98e53be9fed2ee4be66154b88d", + "keyword": "66bce3a0a6872d8497e1984eb49d54a3ed0d3d5e", }), # more complex example ("https://example.org/path/file.webm?que=1&ry=2#fragment", { diff --git a/gallery_dl/extractor/powermanga.py b/gallery_dl/extractor/powermanga.py index 281e20db..3b3be1a1 100644 --- a/gallery_dl/extractor/powermanga.py +++ b/gallery_dl/extractor/powermanga.py @@ -18,7 +18,7 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor): test = [(("https://read.powermanga.org" "/read/one_piece_digital_colour_comics/en/0/75/"), { "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384", - "keyword": "9bf211d435060d1e38d3d13e4aaaa5a87381bfad", + "keyword": "9985bcb78491dff9c725958b06bba606be51b6d3", })] diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py new file mode 100644 index 00000000..23b29150 --- /dev/null +++ b/gallery_dl/extractor/smugmug.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://www.smugmug.com/""" + +from .common import Extractor, Message +from .. import text, util, exception +from ..cache import memcache + +BASE_PATTERN = ( + r"(?:smugmug:(?:https?://)?([^/]+)|" + r"(?:https?://)?([^.]+\.smugmug\.com))") + + +class SmugmugExtractor(Extractor): + """Base class for smugmug extractors""" + category = "smugmug" + filename_fmt = "{category}_{Owner[Name]}_{Image[ImageKey]}.{extension}" + + def __init__(self): + Extractor.__init__(self) + self.api = SmugmugAPI(self) + + def update_image(self, image): + if "ArchivedUri" not in image: + largest = self.api.image_largest(image["ImageKey"]) + for key in ("Url", "Width", "Height", "MD5", "Size"): + if key in largest: + image[key] = largest[key] + return image["Url"], image + return image["ArchivedUri"], image + + +class SmugmugAlbumExtractor(SmugmugExtractor): + subcategory = "album" + directory_fmt = ["{category}", "{Owner[Name]}", "{Album[Name]}"] + archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}" + pattern = [r"smugmug:album:([^:]+)$"] + test = [("smugmug:album:xgkb4C", { + "url": "eb6133445064115ad83d32cbc6472520a2d24d53", + "content": "864f6953cb04121290407a579611bc5087d117ee", + })] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.album_id = match.group(1) + + def items(self): + album = self.api.album(self.album_id) + images = self.api.album_images(self.album_id) + username = album["Uris"]["User"]["Uri"].rpartition("/")[2] + owner = self.api.user(username) + + data = { + "Album": album, + "Owner": owner, + } + + yield Message.Version, 1 + yield Message.Directory, data + + for image in images: + url, image = self.update_image(image) + data["Image"] = image + yield Message.Url, url, text.nameext_from_url(url, data) + + +class SmugmugImageExtractor(SmugmugExtractor): + subcategory = "image" + directory_fmt = ["{category}", "{Owner[Name]}"] + archive_fmt = "{Image[ImageKey]}" + pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"] + test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", { + "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4", + "keyword": "d53df829d493ec3e31b8fe300872beb968812bfd", + "content": "626fe50d25fe49beeda15e116938db36e163c01f", + })] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.image_id = match.group(3) + + def items(self): + image = self.api.image(self.image_id) + username = image["Uris"]["ImageOwner"]["Uri"].rpartition("/")[2] + owner = self.api.user(username) + + url, image = self.update_image(image) + + data = { + "Image": image, + "Owner": owner, + } + del image["Uris"] + del owner["Uris"] + text.nameext_from_url(url, data) + + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, url, data + + +class SmugmugNodeExtractor(SmugmugExtractor): + """ """ + subcategory = "node" + directory_fmt = ["{category}"] + archive_fmt = "n_{Node[NodeID]}_{Image[ImageID]}" + pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"] + test = [("https://mikf.smugmug.com/Test/n-xnNH3s", { + "pattern": "^smugmug:album:xgkb4C$", + })] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.node_id = match.group(3) + + def items(self): + yield Message.Version, 1 + + data = self.api.node(self.node_id) + if data["Type"] == "Album": + album_id = data["Uris"]["Album"]["Uri"].rpartition("/")[2] + yield Message.Queue, "smugmug:album:" + album_id, data + # ... + + +class SmugmugAPI(): + """Minimal interface for the smugmug API v2""" + API_URL = "https://api.smugmug.com/api/v2/" + API_KEY = "DFqxg4jf7GrtsQ5PnbNB8899zKfnDrdK" + API_SECRET = ("fknV35p9r9BwZC4XbTzvCXpcSJRdD83S" + "9nMFQm25ndGBzNPnwRDbRnnVBvqt4xTq") + HEADERS = {"Accept": "application/json"} + + def __init__(self, extractor): + api_key = extractor.config("api-key", self.API_KEY) + api_secret = extractor.config("api-secret", self.API_SECRET) + token = extractor.config("access-token") + token_secret = extractor.config("access-token-secret") + + if api_key and api_secret and token and token_secret: + self.session = util.OAuthSession( + extractor.session, + api_key, api_secret, + token, token_secret, + ) + self.api_key = None + else: + self.session = extractor.session + self.api_key = api_key + + def album(self, album_id): + return self._call("album/" + album_id)["Album"] + + def album_images(self, album_id): + return self._pagination("album/" + album_id + "!images") + + def image(self, image_id): + return self._call("image/" + image_id)["Image"] + + def image_largest(self, image_id): + endpoint = "image/" + image_id + "!largestimage" + return self._call(endpoint)["LargestImage"] + + def image_sizes(self, image_id): + return self._call("image/" + image_id + "!sizedetails") + + def node(self, node_id): + return self._call("node/" + node_id)["Node"] + + @memcache(keyarg=1) + def user(self, username): + return self._call("user/" + username)["User"] + + def _call(self, endpoint, params=None): + url = self.API_URL + endpoint + params = params or {} + if self.api_key: + params["APIKey"] = self.api_key + + response = self.session.get(url, params=params, headers=self.HEADERS) + data = response.json() + + if 200 <= data["Code"] < 400: + return data["Response"] + + if data["Code"] == 404: + raise exception.NotFoundError() + if data["Code"] == 429: + self.log.error("Rate limit reached") + raise exception.StopExtraction() + + def _pagination(self, endpoint): + params = { + "start": 1, + "count": 100, + } + while True: + response = self._call(endpoint, params) + + obj = response[response["Locator"]] + if isinstance(obj, list): + yield from obj + else: + yield obj + + if "NextPage" not in response["Pages"]: + return + params["start"] += params["count"] diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 8341e084..d566bfca 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -536,11 +536,11 @@ class OAuthSession(): self.params["oauth_signature_method"] = "HMAC-SHA1" self.params["oauth_version"] = "1.0" - def get(self, url, params): + def get(self, url, params, **kwargs): params.update(self.params) params["oauth_nonce"] = self.nonce(16) params["oauth_timestamp"] = int(time.time()) - return self.session.get(url + self.sign(url, params)) + return self.session.get(url + self.sign(url, params), **kwargs) def sign(self, url, params): """Generate 'oauth_signature' value and return query string""" diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 398d3623..594f58d2 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -5,7 +5,7 @@ import os.path ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.realpath(ROOTDIR)) -import gallery_dl.extractor +import gallery_dl.extractor # noqa CATEGORY_MAP = { @@ -20,7 +20,6 @@ CATEGORY_MAP = { "e621" : "e621", "exhentai" : "ExHentai", "fallenangels" : "Fallen Angels Scans", - "gomanga" : "GoManga", "hbrowse" : "HBrowse", "hentai2read" : "Hentai2Read", "hentaifoundry" : "Hentai Foundry", @@ -30,14 +29,11 @@ CATEGORY_MAP = { "imagebam" : "ImageBam", "imagefap" : "ImageFap", "imgbox" : "imgbox", - "imgchili" : "imgChili", "imgth" : "imgth", "imgur" : "imgur", "jaiminisbox" : "Jaimini's Box", "kireicake" : "Kirei Cake", - "kisscomic" : "KissComic", "kissmanga" : "KissManga", - "loveisover" : "Love is Over Archive", "mangadex" : "MangaDex", "mangafox" : "Manga Fox", "mangahere" : "Manga Here", @@ -48,7 +44,6 @@ CATEGORY_MAP = { "nyafuu" : "Nyafuu Archive", "paheal" : "rule #34", "powermanga" : "PowerManga", - "puremashiro" : "Pure Mashiro", "readcomiconline": "Read Comic Online", "rbt" : "RebeccaBlackTech", "rule34" : "Rule 34", @@ -58,10 +53,9 @@ CATEGORY_MAP = { "senmanga" : "Sen Manga", "sensescans" : "Sense-Scans", "slideshare" : "SlideShare", - "spectrumnexus" : "Spectrum Nexus", + "smugmug" : "SmugMug", "thebarchive" : "The /b/ Archive", "worldthree" : "World Three", - "yeet" : "YEET Archive", "xvideos" : "XVideos", }