You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gallery-dl/gallery_dl/extractor/bcy.py

207 lines
7.1 KiB

# -*- coding: utf-8 -*-
# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://bcy.net/"""
from .common import Extractor, Message
from .. import text, util, exception
import re
class BcyExtractor(Extractor):
"""Base class for bcy extractors"""
category = "bcy"
directory_fmt = ("{category}", "{user[id]} {user[name]}")
filename_fmt = "{post[id]} {id}.{extension}"
archive_fmt = "{post[id]}_{id}"
root = "https://bcy.net"
def __init__(self, match):
Extractor.__init__(self, match)
self.item_id = match.group(1)
self.session.headers["Referer"] = self.root + "/"
def items(self):
sub = re.compile(r"^https?://p\d+-bcy"
r"(?:-sign\.bcyimg\.com|\.byteimg\.com/img)"
r"/banciyuan").sub
iroot = "https://img-bcy-qn.pstatp.com"
noop = self.config("noop")
for post in self.posts():
if not post["image_list"]:
continue
multi = None
tags = post.get("post_tags") or ()
data = {
"user": {
"id" : post["uid"],
"name" : post["uname"],
"avatar" : sub(iroot, post["avatar"].partition("~")[0]),
},
"post": {
"id" : text.parse_int(post["item_id"]),
"tags" : [t["tag_name"] for t in tags],
"date" : text.parse_timestamp(post["ctime"]),
"parody" : post["work"],
"content": post["plain"],
"likes" : post["like_count"],
"shares" : post["share_count"],
"replies": post["reply_count"],
},
}
yield Message.Directory, data
for data["num"], image in enumerate(post["image_list"], 1):
data["id"] = image["mid"]
data["width"] = image["w"]
data["height"] = image["h"]
url = image["path"].partition("~")[0]
text.nameext_from_url(url, data)
# full-resolution image without watermark
if data["extension"]:
if not url.startswith(iroot):
url = sub(iroot, url)
data["filter"] = ""
yield Message.Url, url, data
# watermarked image & low quality noop filter
else:
if multi is None:
multi = self._data_from_post(
post["item_id"])["post_data"]["multi"]
image = multi[data["num"] - 1]
if image["origin"]:
data["filter"] = "watermark"
yield Message.Url, image["origin"], data
if noop:
data["extension"] = ""
data["filter"] = "noop"
yield Message.Url, image["original_path"], data
def posts(self):
"""Returns an iterable with all relevant 'post' objects"""
def _data_from_post(self, post_id):
url = "{}/item/detail/{}".format(self.root, post_id)
page = self.request(url, notfound="post").text
data = (text.extr(page, 'JSON.parse("', '");')
.replace('\\\\u002F', '/')
.replace('\\"', '"'))
try:
return util.json_loads(data)["detail"]
except ValueError:
return util.json_loads(data.replace('\\"', '"'))["detail"]
class BcyUserExtractor(BcyExtractor):
"""Extractor for user timelines"""
subcategory = "user"
pattern = r"(?:https?://)?bcy\.net/u/(\d+)"
test = (
("https://bcy.net/u/1933712", {
"pattern": r"https://img-bcy-qn.pstatp.com/\w+/\d+/post/\w+/.+jpg",
"count": ">= 20",
}),
("https://bcy.net/u/109282764041", {
"pattern": r"https://p\d-bcy-sign\.bcyimg\.com/banciyuan/[0-9a-f]+"
r"~tplv-bcyx-yuan-logo-v1:.+\.image",
"range": "1-25",
"count": 25,
}),
)
def posts(self):
url = self.root + "/apiv3/user/selfPosts"
params = {"uid": self.item_id, "since": None}
while True:
data = self.request(url, params=params).json()
try:
items = data["data"]["items"]
except KeyError:
return
if not items:
return
for item in items:
yield item["item_detail"]
params["since"] = item["since"]
class BcyPostExtractor(BcyExtractor):
"""Extractor for individual posts"""
subcategory = "post"
pattern = r"(?:https?://)?bcy\.net/item/detail/(\d+)"
test = (
("https://bcy.net/item/detail/6355835481002893070", {
"url": "301202375e61fd6e0e2e35de6c3ac9f74885dec3",
"count": 1,
"keyword": {
"user": {
"id" : 1933712,
"name" : "wukloo",
"avatar" : "re:https://img-bcy-qn.pstatp.com/Public/",
},
"post": {
"id" : 6355835481002893070,
"tags" : list,
"date" : "dt:2016-11-22 08:47:46",
"parody" : "东方PROJECT",
"content": "re:根据微博的建议稍微做了点修改",
"likes" : int,
"shares" : int,
"replies": int,
},
"id": 8330182,
"num": 1,
"width" : 3000,
"height": 1687,
"filename": "712e0780b09011e696f973c3d1568337",
"extension": "jpg",
},
}),
# only watermarked images available
("https://bcy.net/item/detail/6950136331708144648", {
"pattern": r"https://p\d-bcy-sign\.bcyimg\.com/banciyuan/[0-9a-f]+"
r"~tplv-bcyx-yuan-logo-v1:.+\.image",
"count": 10,
"keyword": {"filter": "watermark"},
}),
# deleted
("https://bcy.net/item/detail/6780546160802143237", {
"exception": exception.NotFoundError,
"count": 0,
}),
# only visible to logged in users
("https://bcy.net/item/detail/6747523535150783495", {
"count": 0,
}),
# JSON decode error (#3321)
("https://bcy.net/item/detail/7166939271872388110", {
"count": 0,
}),
)
def posts(self):
try:
data = self._data_from_post(self.item_id)
except KeyError:
return ()
post = data["post_data"]
post["image_list"] = post["multi"]
post["plain"] = text.parse_unicode_escapes(post["plain"])
post.update(data["detail_user"])
return (post,)