diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 275dead0..5f3ebfa8 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -50,6 +50,14 @@ "gelbooru": { "filename": "{category}_{id:>07}_{md5}.{extension}" + }, + "reddit": + { + "refresh-token": null, + "comments": 500, + "date-min": 0, + "date-max": 253402210800, + "recursion": 0 } } } diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index cfbd522a..4a6c0c97 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -73,6 +73,7 @@ modules = [ "imagehosts", "directlink", "recursive", + "oauth", "test", ] diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py new file mode 100644 index 00000000..ed29b7fa --- /dev/null +++ b/gallery_dl/extractor/oauth.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- + +# Copyright 2017 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Utility classes to setup OAuth""" + +from .common import Extractor, Message +from . import reddit +import random +import socket +import string +import webbrowser +import urllib.parse + + +class OAuthBase(Extractor): + category = "oauth" + redirect_uri = "http://localhost:6414/" + + def __init__(self): + Extractor.__init__(self) + self.client = None + + def recv(self): + print("Waiting for response. (Cancel with Ctrl+c)") + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind(("localhost", 6414)) + server.listen(1) + self.client = server.accept()[0] + server.close() + + data = self.client.recv(1024).decode() + path = data.split(" ", 2)[1] + query = path.partition("?")[2] + return { + key: urllib.parse.unquote(value) + for key, _, value in [ + part.partition("=") + for part in query.split("&") + ] + } + + def send(self, msg): + print(msg) + self.client.send(b"HTTP/1.1 200 OK\r\n\r\n" + msg.encode()) + self.client.close() + + +class OAuthReddit(OAuthBase): + subcategory = "reddit" + pattern = ["oauth:reddit$"] + + def __init__(self, match): + OAuthBase.__init__(self) + self.session.headers["User-Agent"] = reddit.RedditAPI.USER_AGENT + self.client_id = reddit.RedditAPI.CLIENT_ID + self.state = "gallery-dl:{}:{}".format( + self.subcategory, + "".join(random.choice(string.ascii_letters) for _ in range(8)), + ) + + def items(self): + yield Message.Version, 1 + + params = { + "client_id": self.client_id, + "response_type": "code", + "state": self.state, + "redirect_uri": self.redirect_uri, + "duration": "permanent", + "scope": "read", + } + url = "https://www.reddit.com/api/v1/authorize?" + url += urllib.parse.urlencode(params) + webbrowser.open(url) + + params = self.recv() + + if self.state != params.get("state"): + self.send("'state' mismatch: expected {}, got {}.".format( + self.state, params.get("state"))) + return + if "error" in params: + self.send(params["error"]) + return + + url = "https://www.reddit.com/api/v1/access_token" + data = { + "grant_type": "authorization_code", + "code": params["code"], + "redirect_uri": self.redirect_uri, + } + response = self.session.post(url, data=data, auth=(self.client_id, "")) + data = response.json() + + if "error" in data: + self.send(data["error"]) + else: + self.send(REDDIT_MSG_TEMPLATE.format(token=data["refresh_token"])) + + +REDDIT_MSG_TEMPLATE = """ +Your Refresh Token is + +{token} + +Put this value into your configuration file as 'extractor.reddit.refesh-token'. + +Example: +{{ + "extractor": {{ + "reddit": {{ + "refresh-token": "{token}" + }} + }} +}} +""" diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 8b65b765..056beffb 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -11,6 +11,7 @@ from .common import Extractor, Message from .. import text, extractor, exception from ..cache import cache +import time import re @@ -101,13 +102,16 @@ class RedditSubmissionExtractor(RedditExtractor): class RedditAPI(): """Minimal interface for the reddit API""" - def __init__(self, extractor, client_id="6N9uN0krSDE-ig"): + CLIENT_ID = "6N9uN0krSDE-ig" + USER_AGENT = "Python:gallery-dl:0.8.4 (by /u/mikf1)" + + def __init__(self, extractor): self.extractor = extractor - self.client_id = extractor.config("client-id", client_id) - self.comments = extractor.config("comments", 500) + self.comments = extractor.config("comments", 500) + self.refresh_token = extractor.config("refresh-token") + self.log = extractor.log self.session = extractor.session - self.session.headers["User-Agent"] = ("Python:gallery-dl:0.8.4" - " (by /u/mikf1)") + self.session.headers["User-Agent"] = self.USER_AGENT def submission(self, submission_id): """Fetch the (submission, comments)=-tuple for a submission id""" @@ -124,18 +128,23 @@ class RedditAPI(): def authenticate(self): """Authenticate the application by requesting an access token""" - access_token = self._authenticate_impl(self.client_id) + access_token = self._authenticate_impl(self.refresh_token) self.session.headers["Authorization"] = access_token - @cache(maxage=3600, keyarg=1) - def _authenticate_impl(self, client_id): + @cache(maxage=3590, keyarg=1) + def _authenticate_impl(self, refresh_token=None): """Actual authenticate implementation""" url = "https://www.reddit.com/api/v1/access_token" - data = { - "grant_type": "https://oauth.reddit.com/grants/installed_client", - "device_id": "DO_NOT_TRACK_THIS_DEVICE", - } - response = self.session.post(url, data=data, auth=(client_id, "")) + if refresh_token: + self.log.info("Refreshing access token") + data = {"grant_type": "refresh_token", + "refresh_token": refresh_token} + else: + self.log.info("Requesting public access token") + data = {"grant_type": ("https://oauth.reddit.com/" + "grants/installed_client"), + "device_id": "DO_NOT_TRACK_THIS_DEVICE"} + response = self.session.post(url, data=data, auth=(self.CLIENT_ID, "")) if response.status_code != 200: raise exception.AuthenticationError() return "Bearer " + response.json()["access_token"] @@ -144,7 +153,13 @@ class RedditAPI(): url = "https://oauth.reddit.com" + endpoint params["raw_json"] = 1 self.authenticate() - data = self.session.get(url, params=params).json() + response = self.session.get(url, params=params) + remaining = response.headers.get("x-ratelimit-remaining") + if remaining and float(remaining) < 2: + wait = int(response.headers["x-ratelimit-reset"]) + self.log.info("Waiting %d seconds for ratelimit reset", wait) + time.sleep(wait) + data = response.json() if "error" in data: if data["error"] == 403: raise exception.AuthorizationError()