[35photo] add 'tag' extractor

5 years ago · 09f2271528
parent 77fda8190c
commit 09f2271528
2 changed files with 39 additions and 2 deletions
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@ -5,7 +5,7 @@ Unless otherwise known, assume all sites to be NSFW
 ==================== =================================== ================================================== ================
 Site                 URL                                 Capabilities                                       Authentication
 ==================== =================================== ================================================== ================
-35PHOTO              https://35photo.pro/                Genres, individual Images, User Profiles
+35PHOTO              https://35photo.pro/                |35photo-C|
 3dbooru              http://behoimi.org/                 Pools, Popular Images, Posts, Tag-Searches
 4chan                https://www.4chan.org/              Boards, Threads
 4plebs               https://archive.4plebs.org/         Threads
@ -141,6 +141,7 @@ Turboimagehost       https://www.turboimagehost.com/     individual Images
 半次元                  https://bcy.net/                    Posts, User Profiles
 ==================== =================================== ================================================== ================

+.. |35photo-C| replace:: Genres, individual Images, Tag-Searches, User Profiles
 .. |artstation-C| replace:: Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles
 .. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh, User Profiles
 .. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@ -101,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
    """Extractor for all images of a user on 35photo.pro"""
    subcategory = "user"
    pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
-               r"/(?!photo_|genre_|rating/)([^/?&#]+)")
+               r"/(?!photo_|genre_|tags/|rating/)([^/?&#]+)")
    test = (
        ("https://35photo.pro/liya", {
            "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg",
@ -137,6 +137,42 @@ class _35photoUserExtractor(_35photoExtractor):
        })


+class _35photoTagExtractor(_35photoExtractor):
+    """Extractor for all photos from a tag listing"""
+    subcategory = "tag"
+    directory_fmt = ("{category}", "Tags", "{search_tag}")
+    archive_fmt = "t{search_tag}_{id}_{num}"
+    pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?&#]+)"
+    test = ("https://35photo.pro/tags/landscape/", {
+        "range": "1-25",
+        "count": 25,
+    })
+
+    def __init__(self, match):
+        _35photoExtractor.__init__(self, match)
+        self.tag = match.group(1)
+
+    def metadata(self):
+        return {"search_tag": text.unquote(self.tag).lower()}
+
+    def photos(self):
+        num = 1
+
+        while True:
+            url = "{}/tags/{}/list_{}/".format(self.root, self.tag, num)
+            page = self.request(url).text
+            prev = None
+
+            for photo_id in text.extract_iter(page, "35photo.pro/photo_", "/"):
+                if photo_id != prev:
+                    prev = photo_id
+                    yield photo_id
+
+            if not prev:
+                return
+            num += 1
+
+
 class _35photoGenreExtractor(_35photoExtractor):
    """Extractor for images of a specific genre on 35photo.pro"""
    subcategory = "genre"