From 46b64251ebd6654adf726fa0892949d45684552f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 26 Oct 2022 15:33:07 +0200 Subject: [PATCH] [bcy] fix extraction (#3103) - fix regex for non-watermarked images - fetch data from '/item/detail' pages for all other posts, since '/apiv3/user/selfPosts' only has incomplete data --- gallery_dl/extractor/bcy.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py index 47e51b30..79828819 100644 --- a/gallery_dl/extractor/bcy.py +++ b/gallery_dl/extractor/bcy.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2021 Mike Fährmann +# Copyright 2020-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -25,9 +25,12 @@ class BcyExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.item_id = match.group(1) + self.session.headers["Referer"] = self.root + "/" def items(self): - sub = re.compile(r"^https?://p\d+-bcy\.byteimg\.com/img/banciyuan").sub + sub = re.compile(r"^https?://p\d+-bcy" + r"(?:-sign\.bcyimg\.com|\.byteimg\.com/img)" + r"/banciyuan").sub iroot = "https://img-bcy-qn.pstatp.com" noop = self.config("noop") @@ -64,19 +67,18 @@ class BcyExtractor(Extractor): url = image["path"].partition("~")[0] text.nameext_from_url(url, data) + # full-resolution image without watermark if data["extension"]: if not url.startswith(iroot): url = sub(iroot, url) data["filter"] = "" yield Message.Url, url, data + # watermarked image & low quality noop filter else: - if not multi: - if len(post["multi"]) < len(post["image_list"]): - multi = self._data_from_post(post["item_id"]) - multi = multi["post_data"]["multi"] - else: - multi = post["multi"] + if multi is None: + multi = self._data_from_post( + post["item_id"])["post_data"]["multi"] image = multi[data["num"] - 1] if image["origin"]: @@ -111,8 +113,8 @@ class BcyUserExtractor(BcyExtractor): "count": ">= 20", }), ("https://bcy.net/u/109282764041", { - "pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+" - r"~tplv-banciyuan-logo-v3:.+\.image", + "pattern": r"https://p\d-bcy-sign\.bcyimg\.com/banciyuan/[0-9a-f]+" + r"~tplv-bcyx-yuan-logo-v1:.+\.image", "range": "1-25", "count": 25, }), @@ -171,13 +173,13 @@ class BcyPostExtractor(BcyExtractor): }), # only watermarked images available ("https://bcy.net/item/detail/6950136331708144648", { - "pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+" - r"~tplv-banciyuan-logo-v3:.+\.image", - "count": 8, + "pattern": r"https://p\d-bcy-sign\.bcyimg\.com/banciyuan/[0-9a-f]+" + r"~tplv-bcyx-yuan-logo-v1:.+\.image", + "count": 10, "keyword": {"filter": "watermark"}, }), # deleted - ("https://bcy.net/item/detail/6780546160802143236", { + ("https://bcy.net/item/detail/6780546160802143237", { "exception": exception.NotFoundError, "count": 0, }),