From 247f785af18bb69f49f679452753946d3055981e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sun, 13 May 2018 22:24:21 +0200
Subject: [PATCH] [pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...
---
 gallery_dl/extractor/pixiv.py | 358 +++++++++++-----------------------
 1 file changed, 119 insertions(+), 239 deletions(-)

diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index e94f4105..c1f8f011 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -11,7 +11,7 @@
 from .common import Extractor, Message
 from .. import text, exception
 from ..cache import cache
-import re
+from datetime import datetime, timedelta
 
 
 class PixivExtractor(Extractor):
@@ -20,11 +20,10 @@ class PixivExtractor(Extractor):
     directory_fmt = ["{category}", "{user[id]} {user[account]}"]
     filename_fmt = "{category}_{user[id]}_{id}{num}.{extension}"
     archive_fmt = "{id}{num}.{extension}"
-    illust_url = "https://www.pixiv.net/member_illust.php?mode=medium"
 
     def __init__(self):
         Extractor.__init__(self)
-        self.api = PixivAPI(self)
+        self.api = PixivAppAPI(self)
         self.user_id = -1
         self.load_ugoira = self.config("ugoira", True)
 
@@ -35,70 +34,54 @@ class PixivExtractor(Extractor):
         yield Message.Directory, metadata
 
         for work in self.works():
-            work = self.prepare_work(work)
+            if not work["user"]["id"]:
+                continue
+
+            meta_single_page = work["meta_single_page"]
+            meta_pages = work["meta_pages"]
+            del work["meta_single_page"]
+            del work["image_urls"]
+            del work["meta_pages"]
+            work["num"] = ""
+            work["tags"] = [tag["name"] for tag in work["tags"]]
+            work.update(metadata)
 
             if work["type"] == "ugoira":
                 if not self.load_ugoira:
                     continue
-                url, framelist = self.parse_ugoira(work)
+                ugoira = self.api.ugoira_metadata(work["id"])
+
+                url = ugoira["zip_urls"]["medium"].replace(
+                    "_ugoira600x600", "_ugoira1920x1080")
                 work["extension"] = "zip"
                 yield Message.Url, url, work
+
+                framelist = "".join(
+                    "{file} {delay}\n".format_map(frame)
+                    for frame in ugoira["frames"]
+                )
                 work["extension"] = "txt"
-                yield Message.Url, "text:"+framelist, work
+                yield Message.Url, "text:" + framelist, work
 
             elif work["page_count"] == 1:
-                yield Message.Url, work["url"], work
+                url = meta_single_page["original_image_url"]
+                work["extension"] = url.rpartition(".")[2]
+                yield Message.Url, url, work
 
             else:
-                url, _, ext = work["url"].rpartition("_p0")
-                for i in range(work["page_count"]):
-                    work["num"] = "_p{:02}".format(i)
-                    yield Message.Url, "{}_p{}{}".format(url, i, ext), work
+                for num, img in enumerate(meta_pages):
+                    url = img["image_urls"]["original"]
+                    work["num"] = "_p{:02}".format(num)
+                    work["extension"] = url.rpartition(".")[2]
+                    yield Message.Url, url, work
 
     def works(self):
         """Return an iterable containing all relevant 'work'-objects"""
-        return []
-
-    def prepare_work(self, work):
-        """Prepare a work-dictionary with additional keywords"""
-        url = work["image_urls"]["large"]
-        del work["image_urls"]
-        work["num"] = ""
-        work["url"] = url
-        work["extension"] = url.rpartition(".")[2]
-        return work
-
-    def parse_ugoira(self, data):
-        """Parse ugoira data"""
-        # get illust page
-        page = self.request(
-            self.illust_url,
-            params={"illust_id": data["id"]},
-            headers={"User-Agent": "Mozilla/5.0"},
-        ).text
-
-        # parse page
-        frames = text.extract(page, ',"frames":[', ']')[0]
-
-        # build url
-        url = re.sub(
-            r"/img-original/(.+/\d+)[^/]+",
-            r"/img-zip-ugoira/\g<1>_ugoira1920x1080.zip",
-            data["url"]
-        )
-
-        # build framelist
-        framelist = re.sub(
-            r'\{"file":"([^"]+)","delay":(\d+)\},?',
-            r'\1 \2\n', frames
-        )
-
-        return url, framelist
 
     def get_metadata(self, user=None):
         """Collect metadata for extractor-job"""
         if not user:
-            user = self.api.user(self.user_id)[0]
+            user = self.api.user_detail(self.user_id)
         return {"user": user}
 
 
@@ -135,11 +118,11 @@ class PixivUserExtractor(PixivExtractor):
             self.works = self._tagged_works
 
     def works(self):
-        return self.api.user_works(self.user_id)
+        return self.api.user_illusts(self.user_id)
 
     def _tagged_works(self):
-        for work in self.api.user_works(self.user_id):
-            if self.tag in [tag.lower() for tag in work["tags"]]:
+        for work in self.api.user_illusts(self.user_id):
+            if self.tag in [tag["name"].lower() for tag in work["tags"]]:
                 yield work
 
 
@@ -188,14 +171,6 @@ class PixivWorkExtractor(PixivExtractor):
           "?mode=medium&illust_id=966411"), {
             "exception": exception.NotFoundError,
         }),
-        (("http://i1.pixiv.net/c/600x600/img-master/"
-          "img/2008/06/13/00/29/13/966412_p0_master1200.jpg"), {
-            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
-        }),
-        (("https://i.pximg.net/img-original/"
-          "img/2017/04/25/07/33/29/62568267_p0.png"), {
-            "url": "71b8bbd070d6b03a75ca4afb89f64d1445b2278d",
-        }),
         # ugoira
         (("https://www.pixiv.net/member_illust.php"
           "?mode=medium&illust_id=66806629"), {
@@ -203,6 +178,10 @@ class PixivWorkExtractor(PixivExtractor):
                         r"66806629_ugoira1920x1080\.zip|text:.+"),
             "count": 2,
         }),
+        (("http://i1.pixiv.net/c/600x600/img-master/"
+          "img/2008/06/13/00/29/13/966412_p0_master1200.jpg"), None),
+        (("https://i.pximg.net/img-original/"
+          "img/2017/04/25/07/33/29/62568267_p0.png"), None),
         ("https://www.pixiv.net/i/966412", None),
         ("http://img.pixiv.net/img/soundcross/42626136.jpg", None),
         ("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg", None),
@@ -218,15 +197,16 @@ class PixivWorkExtractor(PixivExtractor):
         return (self.work,)
 
     def get_metadata(self, user=None):
-        self.work = self.api.work(self.illust_id)[0]
+        self.work = self.api.illust_detail(self.illust_id)
         return PixivExtractor.get_metadata(self, self.work["user"])
 
 
 class PixivFavoriteExtractor(PixivExtractor):
     """Extractor for all favorites/bookmarks of a pixiv-user"""
     subcategory = "favorite"
-    directory_fmt = ["{category}", "bookmarks", "{user[id]} {user[account]}"]
-    archive_fmt = "f_{bookmark[id]}{num}.{extension}"
+    directory_fmt = ["{category}", "bookmarks",
+                     "{user_bookmark[id]} {user_bookmark[account]}"]
+    archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
     pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
                r"/bookmark\.php\?id=(\d+)"]
     test = [
@@ -239,16 +219,14 @@ class PixivFavoriteExtractor(PixivExtractor):
     def __init__(self, match):
         PixivExtractor.__init__(self)
         self.user_id = match.group(1)
+        self.user = None
 
     def works(self):
-        return self.api.user_favorite_works(self.user_id)
+        return self.api.user_bookmarks_illust(self.user_id)
 
-    def prepare_work(self, work):
-        work["work"]["bookmark"] = {
-            key: work[key]
-            for key in ("id", "comment", "tags", "publicity")
-        }
-        return PixivExtractor.prepare_work(self, work["work"])
+    def get_metadata(self, user=None):
+        self.user = user or self.api.user_detail(self.user_id)
+        return {"user_bookmark": self.user}
 
 
 class PixivBookmarkExtractor(PixivFavoriteExtractor):
@@ -264,178 +242,76 @@ class PixivBookmarkExtractor(PixivFavoriteExtractor):
         self.api.login()
         user = self.api.user_info
         self.user_id = user["id"]
-        return PixivExtractor.get_metadata(self, user)
+        return PixivFavoriteExtractor.get_metadata(self, user)
 
 
 class PixivRankingExtractor(PixivExtractor):
     """Extractor for pixiv ranking pages"""
     subcategory = "ranking"
     archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
-    directory_fmt = ["{category}", "rankings", "{mode}", "{date}"]
+    directory_fmt = ["{category}", "rankings",
+                     "{ranking[mode]}", "{ranking[date]}"]
     pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
                r"/ranking\.php(?:\?([^#]*))?"]
     test = [
         (("https://www.pixiv.net/ranking.php"
-          "?mode=daily&content=illust&date=20170818"), None),
+          "?mode=daily&date=20170818"), None),
         ("https://www.pixiv.net/ranking.php", None),
         ("https://touch.pixiv.net/ranking.php", None),
     ]
 
     def __init__(self, match):
         PixivExtractor.__init__(self)
-        self.ranking_info = None
-        self._iter = None
-        self._first = None
 
-        query = text.parse_query(match.group(1))
-        self.mode = query.get("mode", "daily")
-        self.content = query.get("content", "all")
-        self.date = query.get("date")
-
-        if self.date:
-            if len(self.date) == 8 and self.date.isdecimal():
-                self.date = (self.date[0:4] + "-" +
-                             self.date[4:6] + "-" +
-                             self.date[6:8])
-            else:
-                self.log.warning("invalid date '%s'", self.date)
-                self.date = None
-
-        if self.content not in ("all", "illust", "manga", "ugoira"):
-            self.log.warning("unrecognized content value '%s' - "
-                             "falling back to 'all'", self.content)
-            self.content = "all"
-
-    def works(self):
-        yield from self._first["works"]
-        for page in self._iter:
-            yield from page["works"]
-
-    def get_metadata(self, user=None):
-        self._iter = self.api.ranking(self.mode, self.content, self.date)
-        self._first = next(self._iter)
-        self.ranking_info = {
-            key: self._first[key]
-            for key in ("mode", "content", "date")
+        modes = {
+            "daily": "day",
+            "daily_r18": "day_r18",
+            "weekly": "week",
+            "weekly_r18": "week_r18",
+            "monthly": "month",
+            "male": "day_male",
+            "male_r18": "day_male_r18",
+            "female": "day_female",
+            "female_r18": "day_female_r18",
+            "original": "week_original",
+            "rookie": "week_rookie",
+            "r18g": "week_r18g",
         }
-        return self.ranking_info.copy()
-
-    def prepare_work(self, work):
-        work["work"]["rank"] = work["rank"]
-        work["work"]["ranking"] = self.ranking_info
-        return PixivExtractor.prepare_work(self, work["work"])
-
 
-class PixivAPI():
-    """Minimal interface for the Pixiv Public-API for mobile devices
-
-    For a better and more complete implementation, see
-    - https://github.com/upbit/pixivpy
-    For in-depth information regarding the Pixiv Public-API, see
-    - http://blog.imaou.com/opensource/2014/10/09/pixiv_api_for_ios_update.html
-    - https://gist.github.com/ZipFile/e14ff1a7e6d01456188a
-    """
-    def __init__(self, extractor):
-        self.session = extractor.session
-        self.log = extractor.log
-        self.username, self.password = extractor._get_auth_info()
-        self.user_info = None
-        self.session.headers.update({
-            "Referer": "https://www.pixiv.net/",
-            'App-OS': 'ios',
-            'App-OS-Version': '10.3.1',
-            'App-Version': '6.7.1',
-            'User-Agent': 'PixivIOSApp/6.7.1 (iOS 10.3.1; iPhone8,1)',
-        })
-
-    def user(self, user_id):
-        """Query information about a pixiv user"""
-        endpoint = "users/" + user_id
-        return self._call(endpoint, {})["response"]
-
-    def work(self, illust_id):
-        """Query information about a single pixiv work/illustration"""
-        endpoint = "works/" + illust_id
-        params = {"image_sizes": "large"}
-        return self._call(endpoint, params)["response"]
-
-    def user_works(self, user_id):
-        """Query information about the works of a pixiv user"""
-        endpoint = "users/{user}/works".format(user=user_id)
-        params = {"image_sizes": "large"}
-        return self._pagination(endpoint, params)
-
-    def user_favorite_works(self, user_id):
-        """Query information about the favorite works of a pixiv user"""
-        endpoint = "users/{user}/favorite_works".format(user=user_id)
-        params = {"image_sizes": "large", "include_stats": False}
-        return self._pagination(endpoint, params)
-
-    def ranking(self, mode, content="all", date=None):
-        """Query pixiv's ranking lists"""
-        endpoint = "ranking/" + content
-        params = {"image_sizes": "large", "mode": mode, "date": date}
-        return self._pagination(endpoint, params)
-
-    def login(self):
-        """Login and gain a Pixiv Public-API access token"""
-        self.user_info, access_token = self._login_impl(
-            self.username, self.password)
-        self.session.headers["Authorization"] = access_token
-
-    @cache(maxage=50*60, keyarg=1)
-    def _login_impl(self, username, password):
-        """Actual login implementation"""
-        self.log.info("Logging in as %s", username)
-        data = {
-            "username": username,
-            "password": password,
-            "grant_type": "password",
-            "client_id": "bYGKuGVw91e0NMfPGp44euvGt59s",
-            "client_secret": "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK",
-            "get_secure_url": 1,
-        }
-        response = self.session.post(
-            "https://oauth.secure.pixiv.net/auth/token", data=data
-        )
-        if response.status_code != 200:
-            raise exception.AuthenticationError()
-        try:
-            response = response.json()["response"]
-            token = response["access_token"]
-            user = response["user"]
-        except KeyError:
-            raise Exception("Get token error! Response: %s" % (response))
-        return user, "Bearer " + token
+        query = text.parse_query(match.group(1))
 
-    def _call(self, endpoint, params, _empty=[None]):
-        url = "https://public-api.secure.pixiv.net/v1/" + endpoint + ".json"
+        mode = query.get("mode", "daily").lower()
+        if mode not in modes:
+            self.log.warning("invalid mode '%s'", mode)
+            mode = "daily"
+        self.mode = modes[mode]
 
-        self.login()
-        data = self.session.get(url, params=params).json()
+        date = query.get("date")
+        if date:
+            if len(date) == 8 and date.isdecimal():
+                date = "{}-{}-{}".format(date[0:4], date[4:6], date[6:8])
+            else:
+                self.log.warning("invalid date '%s'", date)
+                date = None
+        if not date:
+            date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
+        self.date = date
 
-        status = data.get("status")
-        response = data.get("response", _empty)
-        if status == "failure" or response == _empty:
-            raise exception.NotFoundError()
-        return data
+        self.ranking_info = {"mode": mode, "date": self.date}
 
-    def _pagination(self, endpoint, params):
-        while True:
-            data = self._call(endpoint, params)
-            yield from data["response"]
+    def works(self):
+        return self.api.illust_ranking(self.mode, self.date)
 
-            pinfo = data["pagination"]
-            if pinfo["current"] == pinfo["pages"]:
-                return
-            params["page"] = pinfo["next"]
+    def get_metadata(self, user=None):
+        return {"ranking": self.ranking_info}
 
 
 class PixivAppAPI():
-    """Minimal interface for the Pixiv App-API for mobile devices
+    """Minimal interface for the Pixiv App API for mobile devices
 
-    For a more complete implementation, see
+    For a more complete implementation or documentation, see
     - https://github.com/upbit/pixivpy
+    - https://gist.github.com/ZipFile/3ba99b47162c23f8aea5d5942bb557b1
     """
     CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT"
     CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj"
@@ -456,37 +332,17 @@ class PixivAppAPI():
             "App-OS-Version": "10.3.1",
             "App-Version": "6.7.1",
             "User-Agent": "PixivIOSApp/6.7.1 (iOS 10.3.1; iPhone8,1)",
+            "Referer": "https://app-api.pixiv.net/",
         })
 
-    def illust_detail(self, illust_id):
-        params = {"illust_id": illust_id}
-        return self._call("v1/illust/detail", params)
-
-    def illust_ranking(self, mode="day", date=None):
-        params = {"mode": mode, "date": date}
-        return self._pagination("v1/illust/ranking", params)
-
-    def user_detail(self, user_id):
-        params = {"user_id": user_id}
-        return self._call("v1/user/detail", params)
-
-    def user_illusts(self, user_id, illust_type=None):
-        params = {"user_id": user_id, "type": illust_type}
-        return self._pagination("v1/user/illusts", params)
-
-    def ugoira_metadata(self, illust_id):
-        params = {"illust_id": illust_id}
-        return self._call("v1/ugoira/metadata", params)
-
-    def authenticate(self):
-        """Authenticate the application by requesting an access token"""
-        self.user_info, auth = self._authenticate_impl(
+    def login(self):
+        """Login and gain an access token"""
+        self.user_info, auth = self._login_impl(
             self.username, self.password)
         self.session.headers["Authorization"] = auth
 
     @cache(maxage=3590, keyarg=1)
-    def _authenticate_impl(self, username, password):
-        """Actual authenticate implementation"""
+    def _login_impl(self, username, password):
         self.log.info("Logging in as %s", username)
 
         url = "https://oauth.secure.pixiv.net/auth/token"
@@ -506,10 +362,34 @@ class PixivAppAPI():
         data = response.json()["response"]
         return data["user"], "Bearer " + data["access_token"]
 
+    def illust_detail(self, illust_id):
+        params = {"illust_id": illust_id}
+        return self._call("v1/illust/detail", params)["illust"]
+
+    def illust_ranking(self, mode="day", date=None):
+        params = {"mode": mode, "date": date}
+        return self._pagination("v1/illust/ranking", params)
+
+    def user_bookmarks_illust(self, user_id, tag=None):
+        params = {"user_id": user_id, "restrict": "public", "tag": tag}
+        return self._pagination("v1/user/bookmarks/illust", params)
+
+    def user_detail(self, user_id):
+        params = {"user_id": user_id}
+        return self._call("v1/user/detail", params)["user"]
+
+    def user_illusts(self, user_id, illust_type=None):
+        params = {"user_id": user_id, "type": illust_type}
+        return self._pagination("v1/user/illusts", params)
+
+    def ugoira_metadata(self, illust_id):
+        params = {"illust_id": illust_id}
+        return self._call("v1/ugoira/metadata", params)["ugoira_metadata"]
+
     def _call(self, endpoint, params=None):
         url = "https://app-api.pixiv.net/" + endpoint
 
-        self.authenticate()
+        self.login()
         response = self.session.get(url, params=params)
 
         if 200 <= response.status_code < 400: