[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter
pull/86/head
Mike Fährmann 6 years ago
parent b8e53b8c6b
commit 0a1863fce3
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -9,7 +9,7 @@
"""Extract images and ugoira from https://www.pixiv.net/"""
from .common import Extractor, Message
from .. import text, exception
from .. import text, util, exception
from ..cache import cache
from datetime import datetime, timedelta
@ -89,7 +89,7 @@ class PixivUserExtractor(PixivExtractor):
"""Extractor for works of a pixiv-user"""
subcategory = "user"
pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/member(?:_illust)?\.php\?id=(\d+)(?:.*&tag=([^&#]+))?"),
r"/member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"),
(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+)()")]
test = [
@ -100,6 +100,10 @@ class PixivUserExtractor(PixivExtractor):
"&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}),
(("https://www.pixiv.net/member_illust.php?id=3137110"
"&tag=%E3%83%96%E3%82%A4%E3%82%BA&type=illust&p=2"), {
"count": ">= 55",
}),
("http://www.pixiv.net/member_illust.php?id=173531", {
"exception": exception.NotFoundError,
}),
@ -112,18 +116,32 @@ class PixivUserExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self)
self.user_id, self.tag = match.groups()
self.user_id, self.query = match.groups()
def works(self):
if self.tag:
return self._tagged_works()
return self.api.user_illusts(self.user_id)
works = self.api.user_illusts(self.user_id)
if self.query:
qdict = text.parse_query(self.query)
if "type" in qdict:
type_ = qdict["type"].lower()
works = filter(self._is_type(type_), works)
if "tag" in qdict:
tag = text.unquote(qdict["tag"]).lower()
works = filter(self._has_tag(tag), works)
if "p" in qdict: # apply page-offset last
offset = (text.parse_int(qdict["p"], 1) - 1) * 20
works = util.advance(works, offset)
return works
def _tagged_works(self):
tag = text.unquote(self.tag).lower()
for work in self.api.user_illusts(self.user_id):
if tag in [tag["name"].lower() for tag in work["tags"]]:
yield work
@staticmethod
def _has_tag(tag):
return lambda work: tag in [t["name"].lower() for t in work["tags"]]
@staticmethod
def _is_type(type_):
return lambda work: work["type"] == type_
class PixivMeExtractor(PixivExtractor):
@ -466,8 +484,8 @@ class PixivAppAPI():
params = {"user_id": user_id}
return self._call("v1/user/detail", params)["user"]
def user_illusts(self, user_id, illust_type=None):
params = {"user_id": user_id, "type": illust_type}
def user_illusts(self, user_id):
params = {"user_id": user_id}
return self._pagination("v1/user/illusts", params)
def ugoira_metadata(self, illust_id):

@ -22,7 +22,6 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
"pixiv", # /users/<id>/favorite_works API endpoint is gone
}

Loading…
Cancel
Save