From 09f227152838ecb98b08a2533feb74c2c3ec585f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 24 Mar 2020 01:45:13 +0100 Subject: [PATCH] [35photo] add 'tag' extractor --- docs/supportedsites.rst | 3 ++- gallery_dl/extractor/35photo.py | 38 ++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index f29102ec..945b2267 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -5,7 +5,7 @@ Unless otherwise known, assume all sites to be NSFW ==================== =================================== ================================================== ================ Site URL Capabilities Authentication ==================== =================================== ================================================== ================ -35PHOTO https://35photo.pro/ Genres, individual Images, User Profiles +35PHOTO https://35photo.pro/ |35photo-C| 3dbooru http://behoimi.org/ Pools, Popular Images, Posts, Tag-Searches 4chan https://www.4chan.org/ Boards, Threads 4plebs https://archive.4plebs.org/ Threads @@ -141,6 +141,7 @@ Turboimagehost https://www.turboimagehost.com/ individual Images 半次元 https://bcy.net/ Posts, User Profiles ==================== =================================== ================================================== ================ +.. |35photo-C| replace:: Genres, individual Images, Tag-Searches, User Profiles .. |artstation-C| replace:: Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles .. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh, User Profiles .. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py index 1bbd93ec..e33aa2dd 100644 --- a/gallery_dl/extractor/35photo.py +++ b/gallery_dl/extractor/35photo.py @@ -101,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor): """Extractor for all images of a user on 35photo.pro""" subcategory = "user" pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro" - r"/(?!photo_|genre_|rating/)([^/?&#]+)") + r"/(?!photo_|genre_|tags/|rating/)([^/?&#]+)") test = ( ("https://35photo.pro/liya", { "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg", @@ -137,6 +137,42 @@ class _35photoUserExtractor(_35photoExtractor): }) +class _35photoTagExtractor(_35photoExtractor): + """Extractor for all photos from a tag listing""" + subcategory = "tag" + directory_fmt = ("{category}", "Tags", "{search_tag}") + archive_fmt = "t{search_tag}_{id}_{num}" + pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?&#]+)" + test = ("https://35photo.pro/tags/landscape/", { + "range": "1-25", + "count": 25, + }) + + def __init__(self, match): + _35photoExtractor.__init__(self, match) + self.tag = match.group(1) + + def metadata(self): + return {"search_tag": text.unquote(self.tag).lower()} + + def photos(self): + num = 1 + + while True: + url = "{}/tags/{}/list_{}/".format(self.root, self.tag, num) + page = self.request(url).text + prev = None + + for photo_id in text.extract_iter(page, "35photo.pro/photo_", "/"): + if photo_id != prev: + prev = photo_id + yield photo_id + + if not prev: + return + num += 1 + + class _35photoGenreExtractor(_35photoExtractor): """Extractor for images of a specific genre on 35photo.pro""" subcategory = "genre"