[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is no longer any bookmark ID information), but the favorites API endpoint of the public API was gone anyways ...
6 years ago · 247f785af1
parent 7a58151566
commit 247f785af1
1 changed files with 119 additions and 239 deletions
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@ -11,7 +11,7 @@
 from .common import Extractor, Message
 from .. import text, exception
 from ..cache import cache
-import re
+from datetime import datetime, timedelta


 class PixivExtractor(Extractor):
@ -20,11 +20,10 @@ class PixivExtractor(Extractor):
    directory_fmt = ["{category}", "{user[id]} {user[account]}"]
    filename_fmt = "{category}_{user[id]}_{id}{num}.{extension}"
    archive_fmt = "{id}{num}.{extension}"
-    illust_url = "https://www.pixiv.net/member_illust.php?mode=medium"

    def __init__(self):
        Extractor.__init__(self)
-        self.api = PixivAPI(self)
+        self.api = PixivAppAPI(self)
        self.user_id = -1
        self.load_ugoira = self.config("ugoira", True)

@ -35,70 +34,54 @@ class PixivExtractor(Extractor):
        yield Message.Directory, metadata

        for work in self.works():
-            work = self.prepare_work(work)
+            if not work["user"]["id"]:
+                continue
+
+            meta_single_page = work["meta_single_page"]
+            meta_pages = work["meta_pages"]
+            del work["meta_single_page"]
+            del work["image_urls"]
+            del work["meta_pages"]
+            work["num"] = ""
+            work["tags"] = [tag["name"] for tag in work["tags"]]
+            work.update(metadata)

            if work["type"] == "ugoira":
                if not self.load_ugoira:
                    continue
-                url, framelist = self.parse_ugoira(work)
+                ugoira = self.api.ugoira_metadata(work["id"])
+
+                url = ugoira["zip_urls"]["medium"].replace(
+                    "_ugoira600x600", "_ugoira1920x1080")
                work["extension"] = "zip"
                yield Message.Url, url, work
+
+                framelist = "".join(
+                    "{file} {delay}\n".format_map(frame)
+                    for frame in ugoira["frames"]
+                )
                work["extension"] = "txt"
-                yield Message.Url, "text:"+framelist, work
+                yield Message.Url, "text:" + framelist, work

            elif work["page_count"] == 1:
-                yield Message.Url, work["url"], work
+                url = meta_single_page["original_image_url"]
+                work["extension"] = url.rpartition(".")[2]
+                yield Message.Url, url, work

            else:
-                url, _, ext = work["url"].rpartition("_p0")
-                for i in range(work["page_count"]):
-                    work["num"] = "_p{:02}".format(i)
-                    yield Message.Url, "{}_p{}{}".format(url, i, ext), work
+                for num, img in enumerate(meta_pages):
+                    url = img["image_urls"]["original"]
+                    work["num"] = "_p{:02}".format(num)
+                    work["extension"] = url.rpartition(".")[2]
+                    yield Message.Url, url, work

    def works(self):
        """Return an iterable containing all relevant 'work'-objects"""
-        return []
-
-    def prepare_work(self, work):
-        """Prepare a work-dictionary with additional keywords"""
-        url = work["image_urls"]["large"]
-        del work["image_urls"]
-        work["num"] = ""
-        work["url"] = url
-        work["extension"] = url.rpartition(".")[2]
-        return work
-
-    def parse_ugoira(self, data):
-        """Parse ugoira data"""
-        # get illust page
-        page = self.request(
-            self.illust_url,
-            params={"illust_id": data["id"]},
-            headers={"User-Agent": "Mozilla/5.0"},
-        ).text
-
-        # parse page
-        frames = text.extract(page, ',"frames":[', ']')[0]
-
-        # build url
-        url = re.sub(
-            r"/img-original/(.+/\d+)[^/]+",
-            r"/img-zip-ugoira/\g<1>_ugoira1920x1080.zip",
-            data["url"]
-        )
-
-        # build framelist
-        framelist = re.sub(
-            r'\{"file":"([^"]+)","delay":(\d+)\},?',
-            r'\1 \2\n', frames
-        )
-
-        return url, framelist

    def get_metadata(self, user=None):
        """Collect metadata for extractor-job"""
        if not user:
-            user = self.api.user(self.user_id)[0]
+            user = self.api.user_detail(self.user_id)
        return {"user": user}


@ -135,11 +118,11 @@ class PixivUserExtractor(PixivExtractor):
            self.works = self._tagged_works

    def works(self):
-        return self.api.user_works(self.user_id)
+        return self.api.user_illusts(self.user_id)

    def _tagged_works(self):
-        for work in self.api.user_works(self.user_id):
-            if self.tag in [tag.lower() for tag in work["tags"]]:
+        for work in self.api.user_illusts(self.user_id):
+            if self.tag in [tag["name"].lower() for tag in work["tags"]]:
                yield work


@ -188,14 +171,6 @@ class PixivWorkExtractor(PixivExtractor):
          "?mode=medium&illust_id=966411"), {
            "exception": exception.NotFoundError,
        }),
-        (("http://i1.pixiv.net/c/600x600/img-master/"
-          "img/2008/06/13/00/29/13/966412_p0_master1200.jpg"), {
-            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
-        }),
-        (("https://i.pximg.net/img-original/"
-          "img/2017/04/25/07/33/29/62568267_p0.png"), {
-            "url": "71b8bbd070d6b03a75ca4afb89f64d1445b2278d",
-        }),
        # ugoira
        (("https://www.pixiv.net/member_illust.php"
          "?mode=medium&illust_id=66806629"), {
@ -203,6 +178,10 @@ class PixivWorkExtractor(PixivExtractor):
                        r"66806629_ugoira1920x1080\.zip|text:.+"),
            "count": 2,
        }),
+        (("http://i1.pixiv.net/c/600x600/img-master/"
+          "img/2008/06/13/00/29/13/966412_p0_master1200.jpg"), None),
+        (("https://i.pximg.net/img-original/"
+          "img/2017/04/25/07/33/29/62568267_p0.png"), None),
        ("https://www.pixiv.net/i/966412", None),
        ("http://img.pixiv.net/img/soundcross/42626136.jpg", None),
        ("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg", None),
@ -218,15 +197,16 @@ class PixivWorkExtractor(PixivExtractor):
        return (self.work,)

    def get_metadata(self, user=None):
-        self.work = self.api.work(self.illust_id)[0]
+        self.work = self.api.illust_detail(self.illust_id)
        return PixivExtractor.get_metadata(self, self.work["user"])


 class PixivFavoriteExtractor(PixivExtractor):
    """Extractor for all favorites/bookmarks of a pixiv-user"""
    subcategory = "favorite"
-    directory_fmt = ["{category}", "bookmarks", "{user[id]} {user[account]}"]
-    archive_fmt = "f_{bookmark[id]}{num}.{extension}"
+    directory_fmt = ["{category}", "bookmarks",
+                     "{user_bookmark[id]} {user_bookmark[account]}"]
+    archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
    pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
               r"/bookmark\.php\?id=(\d+)"]
    test = [
@ -239,16 +219,14 @@ class PixivFavoriteExtractor(PixivExtractor):
    def __init__(self, match):
        PixivExtractor.__init__(self)
        self.user_id = match.group(1)
+        self.user = None

    def works(self):
-        return self.api.user_favorite_works(self.user_id)
+        return self.api.user_bookmarks_illust(self.user_id)

-    def prepare_work(self, work):
-        work["work"]["bookmark"] = {
-            key: work[key]
-            for key in ("id", "comment", "tags", "publicity")
-        }
-        return PixivExtractor.prepare_work(self, work["work"])
+    def get_metadata(self, user=None):
+        self.user = user or self.api.user_detail(self.user_id)
+        return {"user_bookmark": self.user}


 class PixivBookmarkExtractor(PixivFavoriteExtractor):
@ -264,178 +242,76 @@ class PixivBookmarkExtractor(PixivFavoriteExtractor):
        self.api.login()
        user = self.api.user_info
        self.user_id = user["id"]
-        return PixivExtractor.get_metadata(self, user)
+        return PixivFavoriteExtractor.get_metadata(self, user)


 class PixivRankingExtractor(PixivExtractor):
    """Extractor for pixiv ranking pages"""
    subcategory = "ranking"
    archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
-    directory_fmt = ["{category}", "rankings", "{mode}", "{date}"]
+    directory_fmt = ["{category}", "rankings",
+                     "{ranking[mode]}", "{ranking[date]}"]
    pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
               r"/ranking\.php(?:\?([^#]*))?"]
    test = [
        (("https://www.pixiv.net/ranking.php"
-          "?mode=daily&content=illust&date=20170818"), None),
+          "?mode=daily&date=20170818"), None),
        ("https://www.pixiv.net/ranking.php", None),
        ("https://touch.pixiv.net/ranking.php", None),
    ]

    def __init__(self, match):
        PixivExtractor.__init__(self)
-        self.ranking_info = None
-        self._iter = None
-        self._first = None

-        query = text.parse_query(match.group(1))
-        self.mode = query.get("mode", "daily")
-        self.content = query.get("content", "all")
-        self.date = query.get("date")
-
-        if self.date:
-            if len(self.date) == 8 and self.date.isdecimal():
-                self.date = (self.date[0:4] + "-" +
-                             self.date[4:6] + "-" +
-                             self.date[6:8])
-            else:
-                self.log.warning("invalid date '%s'", self.date)
-                self.date = None
-
-        if self.content not in ("all", "illust", "manga", "ugoira"):
-            self.log.warning("unrecognized content value '%s' - "
-                             "falling back to 'all'", self.content)
-            self.content = "all"
-
-    def works(self):
-        yield from self._first["works"]
-        for page in self._iter:
-            yield from page["works"]
-
-    def get_metadata(self, user=None):
-        self._iter = self.api.ranking(self.mode, self.content, self.date)
-        self._first = next(self._iter)
-        self.ranking_info = {
-            key: self._first[key]
-            for key in ("mode", "content", "date")
+        modes = {
+            "daily": "day",
+            "daily_r18": "day_r18",
+            "weekly": "week",
+            "weekly_r18": "week_r18",
+            "monthly": "month",
+            "male": "day_male",
+            "male_r18": "day_male_r18",
+            "female": "day_female",
+            "female_r18": "day_female_r18",
+            "original": "week_original",
+            "rookie": "week_rookie",
+            "r18g": "week_r18g",
        }
-        return self.ranking_info.copy()
-
-    def prepare_work(self, work):
-        work["work"]["rank"] = work["rank"]
-        work["work"]["ranking"] = self.ranking_info
-        return PixivExtractor.prepare_work(self, work["work"])
-

-class PixivAPI():
-    """Minimal interface for the Pixiv Public-API for mobile devices
-
-    For a better and more complete implementation, see
-    - https://github.com/upbit/pixivpy
-    For in-depth information regarding the Pixiv Public-API, see
-    - http://blog.imaou.com/opensource/2014/10/09/pixiv_api_for_ios_update.html
-    - https://gist.github.com/ZipFile/e14ff1a7e6d01456188a
-    """
-    def __init__(self, extractor):
-        self.session = extractor.session
-        self.log = extractor.log
-        self.username, self.password = extractor._get_auth_info()
-        self.user_info = None
-        self.session.headers.update({
-            "Referer": "https://www.pixiv.net/",
-            'App-OS': 'ios',
-            'App-OS-Version': '10.3.1',
-            'App-Version': '6.7.1',
-            'User-Agent': 'PixivIOSApp/6.7.1 (iOS 10.3.1; iPhone8,1)',
-        })
-
-    def user(self, user_id):
-        """Query information about a pixiv user"""
-        endpoint = "users/" + user_id
-        return self._call(endpoint, {})["response"]
-
-    def work(self, illust_id):
-        """Query information about a single pixiv work/illustration"""
-        endpoint = "works/" + illust_id
-        params = {"image_sizes": "large"}
-        return self._call(endpoint, params)["response"]
-
-    def user_works(self, user_id):
-        """Query information about the works of a pixiv user"""
-        endpoint = "users/{user}/works".format(user=user_id)
-        params = {"image_sizes": "large"}
-        return self._pagination(endpoint, params)
-
-    def user_favorite_works(self, user_id):
-        """Query information about the favorite works of a pixiv user"""
-        endpoint = "users/{user}/favorite_works".format(user=user_id)
-        params = {"image_sizes": "large", "include_stats": False}
-        return self._pagination(endpoint, params)
-
-    def ranking(self, mode, content="all", date=None):
-        """Query pixiv's ranking lists"""
-        endpoint = "ranking/" + content
-        params = {"image_sizes": "large", "mode": mode, "date": date}
-        return self._pagination(endpoint, params)
-
-    def login(self):
-        """Login and gain a Pixiv Public-API access token"""
-        self.user_info, access_token = self._login_impl(
-            self.username, self.password)
-        self.session.headers["Authorization"] = access_token
-
-    @cache(maxage=50*60, keyarg=1)
-    def _login_impl(self, username, password):
-        """Actual login implementation"""
-        self.log.info("Logging in as %s", username)
-        data = {
-            "username": username,
-            "password": password,
-            "grant_type": "password",
-            "client_id": "bYGKuGVw91e0NMfPGp44euvGt59s",
-            "client_secret": "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK",
-            "get_secure_url": 1,
-        }
-        response = self.session.post(
-            "https://oauth.secure.pixiv.net/auth/token", data=data
-        )
-        if response.status_code != 200:
-            raise exception.AuthenticationError()
-        try:
-            response = response.json()["response"]
-            token = response["access_token"]
-            user = response["user"]
-        except KeyError:
-            raise Exception("Get token error! Response: %s" % (response))
-        return user, "Bearer " + token
+        query = text.parse_query(match.group(1))

-    def _call(self, endpoint, params, _empty=[None]):
-        url = "https://public-api.secure.pixiv.net/v1/" + endpoint + ".json"
+        mode = query.get("mode", "daily").lower()
+        if mode not in modes:
+            self.log.warning("invalid mode '%s'", mode)
+            mode = "daily"
+        self.mode = modes[mode]

-        self.login()
-        data = self.session.get(url, params=params).json()
+        date = query.get("date")
+        if date:
+            if len(date) == 8 and date.isdecimal():
+                date = "{}-{}-{}".format(date[0:4], date[4:6], date[6:8])
+            else:
+                self.log.warning("invalid date '%s'", date)
+                date = None
+        if not date:
+            date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
+        self.date = date

-        status = data.get("status")
-        response = data.get("response", _empty)
-        if status == "failure" or response == _empty:
-            raise exception.NotFoundError()
-        return data
+        self.ranking_info = {"mode": mode, "date": self.date}

-    def _pagination(self, endpoint, params):
-        while True:
-            data = self._call(endpoint, params)
-            yield from data["response"]
+    def works(self):
+        return self.api.illust_ranking(self.mode, self.date)

-            pinfo = data["pagination"]
-            if pinfo["current"] == pinfo["pages"]:
-                return
-            params["page"] = pinfo["next"]
+    def get_metadata(self, user=None):
+        return {"ranking": self.ranking_info}


 class PixivAppAPI():
-    """Minimal interface for the Pixiv App-API for mobile devices
+    """Minimal interface for the Pixiv App API for mobile devices

-    For a more complete implementation, see
+    For a more complete implementation or documentation, see
    - https://github.com/upbit/pixivpy
+    - https://gist.github.com/ZipFile/3ba99b47162c23f8aea5d5942bb557b1
    """
    CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT"
    CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj"
@ -456,37 +332,17 @@ class PixivAppAPI():
            "App-OS-Version": "10.3.1",
            "App-Version": "6.7.1",
            "User-Agent": "PixivIOSApp/6.7.1 (iOS 10.3.1; iPhone8,1)",
+            "Referer": "https://app-api.pixiv.net/",
        })

-    def illust_detail(self, illust_id):
-        params = {"illust_id": illust_id}
-        return self._call("v1/illust/detail", params)
-
-    def illust_ranking(self, mode="day", date=None):
-        params = {"mode": mode, "date": date}
-        return self._pagination("v1/illust/ranking", params)
-
-    def user_detail(self, user_id):
-        params = {"user_id": user_id}
-        return self._call("v1/user/detail", params)
-
-    def user_illusts(self, user_id, illust_type=None):
-        params = {"user_id": user_id, "type": illust_type}
-        return self._pagination("v1/user/illusts", params)
-
-    def ugoira_metadata(self, illust_id):
-        params = {"illust_id": illust_id}
-        return self._call("v1/ugoira/metadata", params)
-
-    def authenticate(self):
-        """Authenticate the application by requesting an access token"""
-        self.user_info, auth = self._authenticate_impl(
+    def login(self):
+        """Login and gain an access token"""
+        self.user_info, auth = self._login_impl(
            self.username, self.password)
        self.session.headers["Authorization"] = auth

    @cache(maxage=3590, keyarg=1)
-    def _authenticate_impl(self, username, password):
-        """Actual authenticate implementation"""
+    def _login_impl(self, username, password):
        self.log.info("Logging in as %s", username)

        url = "https://oauth.secure.pixiv.net/auth/token"
@ -506,10 +362,34 @@ class PixivAppAPI():
        data = response.json()["response"]
        return data["user"], "Bearer " + data["access_token"]

+    def illust_detail(self, illust_id):
+        params = {"illust_id": illust_id}
+        return self._call("v1/illust/detail", params)["illust"]
+
+    def illust_ranking(self, mode="day", date=None):
+        params = {"mode": mode, "date": date}
+        return self._pagination("v1/illust/ranking", params)
+
+    def user_bookmarks_illust(self, user_id, tag=None):
+        params = {"user_id": user_id, "restrict": "public", "tag": tag}
+        return self._pagination("v1/user/bookmarks/illust", params)
+
+    def user_detail(self, user_id):
+        params = {"user_id": user_id}
+        return self._call("v1/user/detail", params)["user"]
+
+    def user_illusts(self, user_id, illust_type=None):
+        params = {"user_id": user_id, "type": illust_type}
+        return self._pagination("v1/user/illusts", params)
+
+    def ugoira_metadata(self, illust_id):
+        params = {"illust_id": illust_id}
+        return self._call("v1/ugoira/metadata", params)["ugoira_metadata"]
+
    def _call(self, endpoint, params=None):
        url = "https://app-api.pixiv.net/" + endpoint

-        self.authenticate()
+        self.login()
        response = self.session.get(url, params=params)

        if 200 <= response.status_code < 400: