[smugmug] added image and album extractor

just some initial code that still requires a lot of work ...

TODO:
- folders
- old-style albums (which are nearly all of them ...)
- images from users
- OAuth

It could also happen that the API credentials used will become invalid
whenever my 14 day trial period ends (7 days remaining), but that
would just require users to supply their own.
pull/86/head
Mike Fährmann 6 years ago
parent d96b3474e5
commit 16e014baaa
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,11 @@
# Changelog # Changelog
## Unreleased ## Unreleased
- Added support for:
- `smugmug` - https://www.smugmug.com/
- Added title information for `mangadex` chapters
- Improved the `pinterest` API implementation (#83)
- Removed `gomanga` and `puremashiro`
## 1.3.4 - 2018-04-20 ## 1.3.4 - 2018-04-20
- Added support for custom OAuth2 credentials for `pinterest` - Added support for custom OAuth2 credentials for `pinterest`

@ -68,6 +68,7 @@ Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga
Sen Manga http://raw.senmanga.com/ Chapters Sen Manga http://raw.senmanga.com/ Chapters
Sense-Scans http://sensescans.com/ Chapters, Manga Sense-Scans http://sensescans.com/ Chapters, Manga
SlideShare https://www.slideshare.net/ Presentations SlideShare https://www.slideshare.net/ Presentations
SmugMug https://www.smugmug.com/ Albums, individual Images, Nodes
Subapics https://subapics.com/ Chapters, Manga Subapics https://subapics.com/ Chapters, Manga
The /b/ Archive https://thebarchive.com/ Threads The /b/ Archive https://thebarchive.com/ Threads
Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth)

@ -75,6 +75,7 @@ modules = [
"senmanga", "senmanga",
"sensescans", "sensescans",
"slideshare", "slideshare",
"smugmug",
"subapics", "subapics",
"thebarchive", "thebarchive",
"tumblr", "tumblr",

@ -21,10 +21,9 @@ class DirectlinkExtractor(Extractor):
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))" r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$"] r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$"]
test = [ test = [
(("https://photos.smugmug.com/The-World/Hawaii/" (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
"i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), { "url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
"url": "32ee1045881e17ef3f13a9958595afa42421ec6c", "keyword": "66bce3a0a6872d8497e1984eb49d54a3ed0d3d5e",
"keyword": "2427b68c14006489df1776bb1bcd3bc24be25e10",
}), }),
# more complex example # more complex example
("https://example.org/path/file.webm?que=1&ry=2#fragment", { ("https://example.org/path/file.webm?que=1&ry=2#fragment", {

@ -18,7 +18,7 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor):
test = [(("https://read.powermanga.org" test = [(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), { "/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384", "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
"keyword": "9bf211d435060d1e38d3d13e4aaaa5a87381bfad", "keyword": "9985bcb78491dff9c725958b06bba606be51b6d3",
})] })]

@ -0,0 +1,214 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://www.smugmug.com/"""
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import memcache
BASE_PATTERN = (
r"(?:smugmug:(?:https?://)?([^/]+)|"
r"(?:https?://)?([^.]+\.smugmug\.com))")
class SmugmugExtractor(Extractor):
"""Base class for smugmug extractors"""
category = "smugmug"
filename_fmt = "{category}_{Owner[Name]}_{Image[ImageKey]}.{extension}"
def __init__(self):
Extractor.__init__(self)
self.api = SmugmugAPI(self)
def update_image(self, image):
if "ArchivedUri" not in image:
largest = self.api.image_largest(image["ImageKey"])
for key in ("Url", "Width", "Height", "MD5", "Size"):
if key in largest:
image[key] = largest[key]
return image["Url"], image
return image["ArchivedUri"], image
class SmugmugAlbumExtractor(SmugmugExtractor):
subcategory = "album"
directory_fmt = ["{category}", "{Owner[Name]}", "{Album[Name]}"]
archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
pattern = [r"smugmug:album:([^:]+)$"]
test = [("smugmug:album:xgkb4C", {
"url": "eb6133445064115ad83d32cbc6472520a2d24d53",
"content": "864f6953cb04121290407a579611bc5087d117ee",
})]
def __init__(self, match):
SmugmugExtractor.__init__(self)
self.album_id = match.group(1)
def items(self):
album = self.api.album(self.album_id)
images = self.api.album_images(self.album_id)
username = album["Uris"]["User"]["Uri"].rpartition("/")[2]
owner = self.api.user(username)
data = {
"Album": album,
"Owner": owner,
}
yield Message.Version, 1
yield Message.Directory, data
for image in images:
url, image = self.update_image(image)
data["Image"] = image
yield Message.Url, url, text.nameext_from_url(url, data)
class SmugmugImageExtractor(SmugmugExtractor):
subcategory = "image"
directory_fmt = ["{category}", "{Owner[Name]}"]
archive_fmt = "{Image[ImageKey]}"
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"]
test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", {
"url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4",
"keyword": "d53df829d493ec3e31b8fe300872beb968812bfd",
"content": "626fe50d25fe49beeda15e116938db36e163c01f",
})]
def __init__(self, match):
SmugmugExtractor.__init__(self)
self.image_id = match.group(3)
def items(self):
image = self.api.image(self.image_id)
username = image["Uris"]["ImageOwner"]["Uri"].rpartition("/")[2]
owner = self.api.user(username)
url, image = self.update_image(image)
data = {
"Image": image,
"Owner": owner,
}
del image["Uris"]
del owner["Uris"]
text.nameext_from_url(url, data)
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data
class SmugmugNodeExtractor(SmugmugExtractor):
""" """
subcategory = "node"
directory_fmt = ["{category}"]
archive_fmt = "n_{Node[NodeID]}_{Image[ImageID]}"
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"]
test = [("https://mikf.smugmug.com/Test/n-xnNH3s", {
"pattern": "^smugmug:album:xgkb4C$",
})]
def __init__(self, match):
SmugmugExtractor.__init__(self)
self.node_id = match.group(3)
def items(self):
yield Message.Version, 1
data = self.api.node(self.node_id)
if data["Type"] == "Album":
album_id = data["Uris"]["Album"]["Uri"].rpartition("/")[2]
yield Message.Queue, "smugmug:album:" + album_id, data
# ...
class SmugmugAPI():
"""Minimal interface for the smugmug API v2"""
API_URL = "https://api.smugmug.com/api/v2/"
API_KEY = "DFqxg4jf7GrtsQ5PnbNB8899zKfnDrdK"
API_SECRET = ("fknV35p9r9BwZC4XbTzvCXpcSJRdD83S"
"9nMFQm25ndGBzNPnwRDbRnnVBvqt4xTq")
HEADERS = {"Accept": "application/json"}
def __init__(self, extractor):
api_key = extractor.config("api-key", self.API_KEY)
api_secret = extractor.config("api-secret", self.API_SECRET)
token = extractor.config("access-token")
token_secret = extractor.config("access-token-secret")
if api_key and api_secret and token and token_secret:
self.session = util.OAuthSession(
extractor.session,
api_key, api_secret,
token, token_secret,
)
self.api_key = None
else:
self.session = extractor.session
self.api_key = api_key
def album(self, album_id):
return self._call("album/" + album_id)["Album"]
def album_images(self, album_id):
return self._pagination("album/" + album_id + "!images")
def image(self, image_id):
return self._call("image/" + image_id)["Image"]
def image_largest(self, image_id):
endpoint = "image/" + image_id + "!largestimage"
return self._call(endpoint)["LargestImage"]
def image_sizes(self, image_id):
return self._call("image/" + image_id + "!sizedetails")
def node(self, node_id):
return self._call("node/" + node_id)["Node"]
@memcache(keyarg=1)
def user(self, username):
return self._call("user/" + username)["User"]
def _call(self, endpoint, params=None):
url = self.API_URL + endpoint
params = params or {}
if self.api_key:
params["APIKey"] = self.api_key
response = self.session.get(url, params=params, headers=self.HEADERS)
data = response.json()
if 200 <= data["Code"] < 400:
return data["Response"]
if data["Code"] == 404:
raise exception.NotFoundError()
if data["Code"] == 429:
self.log.error("Rate limit reached")
raise exception.StopExtraction()
def _pagination(self, endpoint):
params = {
"start": 1,
"count": 100,
}
while True:
response = self._call(endpoint, params)
obj = response[response["Locator"]]
if isinstance(obj, list):
yield from obj
else:
yield obj
if "NextPage" not in response["Pages"]:
return
params["start"] += params["count"]

@ -536,11 +536,11 @@ class OAuthSession():
self.params["oauth_signature_method"] = "HMAC-SHA1" self.params["oauth_signature_method"] = "HMAC-SHA1"
self.params["oauth_version"] = "1.0" self.params["oauth_version"] = "1.0"
def get(self, url, params): def get(self, url, params, **kwargs):
params.update(self.params) params.update(self.params)
params["oauth_nonce"] = self.nonce(16) params["oauth_nonce"] = self.nonce(16)
params["oauth_timestamp"] = int(time.time()) params["oauth_timestamp"] = int(time.time())
return self.session.get(url + self.sign(url, params)) return self.session.get(url + self.sign(url, params), **kwargs)
def sign(self, url, params): def sign(self, url, params):
"""Generate 'oauth_signature' value and return query string""" """Generate 'oauth_signature' value and return query string"""

@ -5,7 +5,7 @@ import os.path
ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.realpath(ROOTDIR)) sys.path.insert(0, os.path.realpath(ROOTDIR))
import gallery_dl.extractor import gallery_dl.extractor # noqa
CATEGORY_MAP = { CATEGORY_MAP = {
@ -20,7 +20,6 @@ CATEGORY_MAP = {
"e621" : "e621", "e621" : "e621",
"exhentai" : "ExHentai", "exhentai" : "ExHentai",
"fallenangels" : "Fallen Angels Scans", "fallenangels" : "Fallen Angels Scans",
"gomanga" : "GoManga",
"hbrowse" : "HBrowse", "hbrowse" : "HBrowse",
"hentai2read" : "Hentai2Read", "hentai2read" : "Hentai2Read",
"hentaifoundry" : "Hentai Foundry", "hentaifoundry" : "Hentai Foundry",
@ -30,14 +29,11 @@ CATEGORY_MAP = {
"imagebam" : "ImageBam", "imagebam" : "ImageBam",
"imagefap" : "ImageFap", "imagefap" : "ImageFap",
"imgbox" : "imgbox", "imgbox" : "imgbox",
"imgchili" : "imgChili",
"imgth" : "imgth", "imgth" : "imgth",
"imgur" : "imgur", "imgur" : "imgur",
"jaiminisbox" : "Jaimini's Box", "jaiminisbox" : "Jaimini's Box",
"kireicake" : "Kirei Cake", "kireicake" : "Kirei Cake",
"kisscomic" : "KissComic",
"kissmanga" : "KissManga", "kissmanga" : "KissManga",
"loveisover" : "Love is Over Archive",
"mangadex" : "MangaDex", "mangadex" : "MangaDex",
"mangafox" : "Manga Fox", "mangafox" : "Manga Fox",
"mangahere" : "Manga Here", "mangahere" : "Manga Here",
@ -48,7 +44,6 @@ CATEGORY_MAP = {
"nyafuu" : "Nyafuu Archive", "nyafuu" : "Nyafuu Archive",
"paheal" : "rule #34", "paheal" : "rule #34",
"powermanga" : "PowerManga", "powermanga" : "PowerManga",
"puremashiro" : "Pure Mashiro",
"readcomiconline": "Read Comic Online", "readcomiconline": "Read Comic Online",
"rbt" : "RebeccaBlackTech", "rbt" : "RebeccaBlackTech",
"rule34" : "Rule 34", "rule34" : "Rule 34",
@ -58,10 +53,9 @@ CATEGORY_MAP = {
"senmanga" : "Sen Manga", "senmanga" : "Sen Manga",
"sensescans" : "Sense-Scans", "sensescans" : "Sense-Scans",
"slideshare" : "SlideShare", "slideshare" : "SlideShare",
"spectrumnexus" : "Spectrum Nexus", "smugmug" : "SmugMug",
"thebarchive" : "The /b/ Archive", "thebarchive" : "The /b/ Archive",
"worldthree" : "World Three", "worldthree" : "World Three",
"yeet" : "YEET Archive",
"xvideos" : "XVideos", "xvideos" : "XVideos",
} }

Loading…
Cancel
Save