merge #4886: [urlgalleries] add 'gallery' extractor (#919, #1184, #2905)

10 months ago · bf74eb5c46
parent 042a9da451 ade93c5397
commit bf74eb5c46
4 changed files with 111 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -889,6 +889,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Files</td>
    <td></td>
 </tr>
+<tr>
+    <td>Urlgalleries</td>
+    <td>https://urlgalleries.net/</td>
+    <td>Galleries</td>
+    <td></td>
+</tr>
 <tr>
    <td>Vipergirls</td>
    <td>https://vipergirls.to/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -155,6 +155,7 @@ modules = [
    "tumblrgallery",
    "twibooru",
    "twitter",
+    "urlgalleries",
    "unsplash",
    "uploadir",
    "urlshortener",
--- a/gallery_dl/extractor/urlgalleries.py
+++ b/gallery_dl/extractor/urlgalleries.py
@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://urlgalleries.net/"""
+
+from .common import GalleryExtractor, Message
+from .. import text
+
+
+class UrlgalleriesGalleryExtractor(GalleryExtractor):
+    """Base class for Urlgalleries extractors"""
+    category = "urlgalleries"
+    root = "urlgalleries.net"
+    request_interval = (0.5, 1.0)
+    pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)"
+    example = "https://blog.urlgalleries.net/gallery-12345/TITLE"
+
+    def __init__(self, match):
+        self.blog, self.gallery_id = match.groups()
+        url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format(
+            self.blog, self.gallery_id)
+        GalleryExtractor.__init__(self, match, url)
+
+    def items(self):
+        page = self.request(self.gallery_url).text
+        imgs = self.images(page)
+        data = self.metadata(page)
+        data["count"] = len(imgs)
+        del page
+
+        root = "https://{}.urlgalleries.net".format(self.blog)
+        yield Message.Directory, data
+        for data["num"], img in enumerate(imgs, 1):
+            response = self.request(
+                root + img, method="HEAD", allow_redirects=False)
+            yield Message.Queue, response.headers["Location"], data
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        return {
+            "gallery_id": self.gallery_id,
+            "_site": extr(' title="', '"'),  # site name
+            "blog" : text.unescape(extr(' title="', '"')),
+            "_rprt": extr(' title="', '"'),  # report button
+            "title": text.unescape(extr(' title="', '"').strip()),
+            "date" : text.parse_datetime(
+                extr(" images in gallery | ", "<"), "%B %d, %Y %H:%M"),
+        }
+
+    def images(self, page):
+        imgs = text.extr(page, 'id="wtf"', "</div>")
+        return list(text.extract_iter(imgs, " href='", "'"))
--- a/test/results/urlgalleries.py
+++ b/test/results/urlgalleries.py
@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import urlgalleries
+
+
+__tests__ = (
+{
+    "#url"     : "https://photos2q.urlgalleries.net/porn-gallery-7851311/clarice-window-8",
+    "#category": ("", "urlgalleries", "gallery"),
+    "#class"   : urlgalleries.UrlgalleriesGalleryExtractor,
+    "#range"   : "1-3",
+    "#urls"    : (
+        "https://fappic.com/x207mqkn2463/4gq1yv.jpg",
+        "https://fappic.com/q684ua2rp0j9/4gq1xv.jpg",
+        "https://fappic.com/8vf3n8fgz9po/4gq1ya.jpg",
+    ),
+
+    "blog"      : "photos2q",
+    "count"     : 39,
+    "date"      : "dt:2023-12-08 13:59:00",
+    "gallery_id": "7851311",
+    "num"       : range(1, 3),
+    "title"     : "Clarice window 8",
+},
+
+{
+    "#url"     : "https://dreamer.urlgalleries.net/7645840",
+    "#category": ("", "urlgalleries", "gallery"),
+    "#class"   : urlgalleries.UrlgalleriesGalleryExtractor,
+    "#range"   : "1-3",
+    "#urls"    : (
+        "https://www.fappic.com/vj7up04ny487/AmourAngels-0001.jpg",
+        "https://www.fappic.com/zfgsmpm36iyv/AmourAngels-0002.jpg",
+        "https://www.fappic.com/rqpt37rdbwa5/AmourAngels-0003.jpg",
+    ),
+
+    "blog"      : "Dreamer",
+    "count"     : 105,
+    "date"      : "dt:2020-03-10 21:17:00",
+    "gallery_id": "7645840",
+    "num"       : range(1, 3),
+    "title"     : "Angelika - Rustic Charm - AmourAngels 2016-09-27",
+},
+
+)