[35photo] add 'tag' extractor

pull/658/head
Mike Fährmann 5 years ago
parent 77fda8190c
commit 09f2271528
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -5,7 +5,7 @@ Unless otherwise known, assume all sites to be NSFW
==================== =================================== ================================================== ================
Site URL Capabilities Authentication
==================== =================================== ================================================== ================
35PHOTO https://35photo.pro/ Genres, individual Images, User Profiles
35PHOTO https://35photo.pro/ |35photo-C|
3dbooru http://behoimi.org/ Pools, Popular Images, Posts, Tag-Searches
4chan https://www.4chan.org/ Boards, Threads
4plebs https://archive.4plebs.org/ Threads
@ -141,6 +141,7 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
半次元 https://bcy.net/ Posts, User Profiles
==================== =================================== ================================================== ================
.. |35photo-C| replace:: Genres, individual Images, Tag-Searches, User Profiles
.. |artstation-C| replace:: Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles
.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh, User Profiles
.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles

@ -101,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
"""Extractor for all images of a user on 35photo.pro"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
r"/(?!photo_|genre_|rating/)([^/?&#]+)")
r"/(?!photo_|genre_|tags/|rating/)([^/?&#]+)")
test = (
("https://35photo.pro/liya", {
"pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg",
@ -137,6 +137,42 @@ class _35photoUserExtractor(_35photoExtractor):
})
class _35photoTagExtractor(_35photoExtractor):
"""Extractor for all photos from a tag listing"""
subcategory = "tag"
directory_fmt = ("{category}", "Tags", "{search_tag}")
archive_fmt = "t{search_tag}_{id}_{num}"
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?&#]+)"
test = ("https://35photo.pro/tags/landscape/", {
"range": "1-25",
"count": 25,
})
def __init__(self, match):
_35photoExtractor.__init__(self, match)
self.tag = match.group(1)
def metadata(self):
return {"search_tag": text.unquote(self.tag).lower()}
def photos(self):
num = 1
while True:
url = "{}/tags/{}/list_{}/".format(self.root, self.tag, num)
page = self.request(url).text
prev = None
for photo_id in text.extract_iter(page, "35photo.pro/photo_", "/"):
if photo_id != prev:
prev = photo_id
yield photo_id
if not prev:
return
num += 1
class _35photoGenreExtractor(_35photoExtractor):
"""Extractor for images of a specific genre on 35photo.pro"""
subcategory = "genre"

Loading…
Cancel
Save