[booru] add generalized extractors for *booru sites

similar to cc15fbe7
4 years ago · a3a863fc13
parent 5f23441e12
commit a3a863fc13
6 changed files with 217 additions and 460 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -92,11 +92,8 @@ modules = [
    "pururin",
    "reactor",
    "readcomiconline",
    "realbooru",
    "reddit",
    "redgifs",
    "rule34",
    "safebooru",
    "sankaku",
    "sankakucomplex",
    "seiga",
@ -122,6 +119,7 @@ modules = [
    "xhamster",
    "xvideos",
    "yuki",
    "booru",
    "moebooru",
    "foolfuuka",
    "foolslide",
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@ -1,247 +1,248 @@
 # -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2020 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
-"""Base classes for extractors for danbooru and co"""
+"""Extractors for *booru sites"""
 from .common import Extractor, Message, generate_extractors
 from .. import text, util, exception
 from .common import Extractor, Message
 from .. import text, exception
 from xml.etree import ElementTree
 import collections
 import datetime
 import operator
 import re
 class BooruExtractor(Extractor):
-    """Base class for all booru extractors"""
+    """Base class for *booru extractors"""
    basecategory = "booru"
    filename_fmt = "{category}_{id}_{md5}.{extension}"
-    api_url = ""
+    page_start = 0
-    post_url = ""
+    per_page = 100
    per_page = 50
    page_start = 1
    page_limit = None
    sort = False
-    def __init__(self, match):
+    def items(self):
-        super().__init__(match)
+        self.login()
-        self.params = {}
+        extended_tags = self.config("tags", False)
-        self.extags = self.post_url and self.config("tags", False)
+        data = self.metadata()
        for post in self.posts():
            try:
                url = self._prepare_post(post, extended_tags)
            except KeyError:
                continue
            post.update(data)
            text.nameext_from_url(url, post)
            yield Message.Directory, post
            yield Message.Url, url, post
    def skip(self, num):
        pages = num // self.per_page
        if self.page_limit and pages + self.page_start > self.page_limit:
            pages = self.page_limit - self.page_start
        self.page_start += pages
        return pages * self.per_page
-    def items(self):
+    def login(self):
-        yield Message.Version, 1
+        """Login and set necessary cookies"""
        data = self.get_metadata()
-        self.reset_page()
+    def metadata(self):
-        while True:
+        """Return a dict with general metadata"""
-            images = self.parse_response(
+        return ()
                self.request(self.api_url, params=self.params))
            for image in images:
                try:
                    url = self.get_file_url(image)
                except KeyError:
                    continue
                if url.startswith("/"):
                    url = text.urljoin(self.api_url, url)
                image.update(data)
                text.nameext_from_url(url, image)
                if self.extags:
                    self.extended_tags(image)
                yield Message.Directory, image
                yield Message.Url, url, image
            if len(images) < self.per_page:
                return
            self.update_page(image)
-    def reset_page(self):
+    def posts(self):
-        """Initialize params to point to the first page"""
+        """Return an iterable with post objects"""
-        self.params["page"] = self.page_start
+        return ()
-    def update_page(self, data):
+    def _prepare_post(self, post, extended_tags=False):
-        """Update params to point to the next page"""
+        url = post["file_url"]
        if url[0] == "/":
            url = self.root + url
        if extended_tags:
            self._fetch_extended_tags(post)
        post["date"] = text.parse_datetime(
            post["created_at"], "%a %b %d %H:%M:%S %z %Y")
        return url
-    def parse_response(self, response):
+    def _fetch_extended_tags(self, post, page=None):
-        """Parse JSON API response"""
+        if not page:
-        images = response.json()
+            url = "{}/index.php?page=post&s=view&id={}".format(
-        if self.sort:
+                self.root, post["id"])
-            images.sort(key=operator.itemgetter("score", "id"),
+            page = self.request(url).text
-                        reverse=True)
+        html = text.extract(page, '<ul id="tag-', '</ul>')[0]
-        return images
+        if html:
            tags = collections.defaultdict(list)
            pattern = re.compile(
                r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
            for tag_type, tag_name in pattern.findall(html):
                tags[tag_type].append(text.unquote(tag_name))
            for key, value in tags.items():
                post["tags_" + key] = " ".join(value)
    def _api_request(self, params):
        url = self.root + "/index.php?page=dapi&s=post&q=index"
        return ElementTree.fromstring(self.request(url, params=params).text)
    def _pagination(self, params):
        params["pid"] = self.page_start
        params["limit"] = self.per_page
-    def get_metadata(self):
+        while True:
-        """Collect metadata for extractor-job"""
+            root = self._api_request(params)
-        return {}
+            for post in root:
                yield post.attrib
-    @staticmethod
+            if len(root) < self.per_page:
-    def get_file_url(image):
+                return
-        return image["file_url"]
+            params["pid"] += 1
    def extended_tags(self, image, page=None):
        """Retrieve extended tag information"""
        if not page:
            url = self.post_url.format(image["id"])
            page = self.request(url).text
        tags = collections.defaultdict(list)
        tags_html = text.extract(page, '<ul id="tag-', '</ul>')[0]
        pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"']+)", re.S)
        for tag_type, tag_name in pattern.findall(tags_html or ""):
            tags[tag_type].append(text.unquote(tag_name))
        for key, value in tags.items():
            image["tags_" + key] = " ".join(value)
 class XmlParserMixin():
    """Mixin for XML based API responses"""
    def parse_response(self, response):
        root = ElementTree.fromstring(response.text)
        return [post.attrib for post in root]
 class MoebooruPageMixin():
    """Pagination for Moebooru and Danbooru v1"""
    def update_page(self, data):
        if self.page_limit:
            self.params["page"] = None
            self.params["before_id"] = data["id"]
        else:
            self.params["page"] += 1
 class GelbooruPageMixin():
    """Pagination for Gelbooru-like sites"""
    page_start = 0
-    def reset_page(self):
+class BooruPostExtractor(BooruExtractor):
-        self.params["pid"] = self.page_start
+    subcategory = "post"
    archive_fmt = "{id}"
    pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)"
-    def update_page(self, data):
+    def __init__(self, match):
-        self.params["pid"] += 1
+        BooruExtractor.__init__(self, match)
        self.post_id = match.group(1)
    def posts(self):
        return self._pagination({"id": self.post_id})
-class TagMixin():
+
-    """Extraction of images based on search-tags"""
+class BooruTagExtractor(BooruExtractor):
    subcategory = "tag"
    directory_fmt = ("{category}", "{search_tags}")
    archive_fmt = "t_{search_tags}_{id}"
    pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)"
    def __init__(self, match):
-        super().__init__(match)
+        BooruExtractor.__init__(self, match)
-        self.tags = text.unquote(match.group("tags").replace("+", " "))
+        self.tags = text.unquote(match.group(1).replace("+", " "))
        self.params["tags"] = self.tags
        self.params["limit"] = self.per_page
-    def get_metadata(self):
+    def metadata(self):
        return {"search_tags": self.tags}
    def posts(self):
        return self._pagination({"tags" : self.tags})
-class PoolMixin():
+class BooruPoolExtractor(BooruExtractor):
    """Extraction of image-pools"""
    subcategory = "pool"
    directory_fmt = ("{category}", "pool", "{pool}")
    archive_fmt = "p_{pool}_{id}"
    pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)"
    def __init__(self, match):
-        super().__init__(match)
+        BooruExtractor.__init__(self, match)
-        self.pool = match.group("pool")
+        self.pool_id = match.group(1)
-        self.params["tags"] = "pool:" + self.pool
+        self.post_ids = ()
        self.params["limit"] = self.per_page
    def get_metadata(self):
        return {"pool": text.parse_int(self.pool)}
    def skip(self, num):
        self.page_start += num
        return num
-class GelbooruPoolMixin(PoolMixin):
+    def metadata(self):
-    """Image-pool extraction for Gelbooru-like sites"""
+        url = "{}/index.php?page=pool&s=show&id={}".format(
-    per_page = 1
+            self.root, self.pool_id)
        page = self.request(url).text
-    def get_metadata(self):
+        name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
        page = self.request(self.pool_url.format(self.pool)).text
        name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
        if not name:
            name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
        if not name:
            raise exception.NotFoundError("pool")
-        self.posts = list(text.extract_iter(
+        self.post_ids = text.extract_iter(
-            page, 'class="thumb" id="p', '"', pos))
+            page, 'class="thumb" id="p', '"', pos)
        return {
-            "pool": text.parse_int(self.pool),
+            "pool": text.parse_int(self.pool_id),
            "pool_name": text.unescape(name),
            "count": len(self.posts),
        }
-    def reset_page(self):
+    def posts(self):
-        self.index = self.page_start
+        params = {}
-        self.update_page(None)
+        for params["id"] in util.advance(self.post_ids, self.page_start):
-
+            for post in self._api_request(params):
-    def update_page(self, data):
+                yield post.attrib
-        try:
+
-            post = self.posts[self.index]
+
-            self.index += 1
+EXTRACTORS = {
-        except IndexError:
+    "rule34": {
-            post = "0"
+        "root": "https://rule34.xxx",
-        self.params["tags"] = "id:" + post
+        "test-tag": (
-
+            ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
-
+                "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
-class PostMixin():
+                "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
-    """Extraction of a single image-post"""
+                "count": 1,
-    subcategory = "post"
+            }),
-    archive_fmt = "{id}"
+        ),
-
+        "test-pool": (
-    def __init__(self, match):
+            ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
-        super().__init__(match)
+                "count": 3,
-        self.post = match.group("post")
+            }),
-        self.params["tags"] = "id:" + self.post
+        ),
-
+        "test-post": (
-
+            ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
-class MoebooruPopularMixin():
+                "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
-    """Extraction and metadata handling for Moebooru and Danbooru v1"""
+                "options": (("tags", True),),
-    subcategory = "popular"
+                "keyword": {
-    directory_fmt = ("{category}", "popular", "{scale}", "{date}")
+                    "tags_artist": "danraku",
-    archive_fmt = "P_{scale[0]}_{date}_{id}"
+                    "tags_character": "kashima_(kantai_collection)",
-    page_start = None
+                    "tags_copyright": "kantai_collection",
-    sort = True
+                    "tags_general": str,
-
+                    "tags_metadata": str,
-    def __init__(self, match):
+                },
-        super().__init__(match)
+            }),
-        self.params.update(text.parse_query(match.group("query")))
+        ),
-        self.scale = match.group("scale")
+    },
-
+    "safebooru": {
-    def get_metadata(self, fmt="%Y-%m-%d"):
+        "root": "https://safebooru.org",
-        date = self.get_date() or datetime.date.today().isoformat()
+        "test-tag": (
-        scale = self.get_scale() or "day"
+            ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
-
+                "url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
-        if scale == "week":
+                "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
-            date = datetime.date.fromisoformat(date)
+            }),
-            date = (date - datetime.timedelta(days=date.weekday())).isoformat()
+        ),
-        elif scale == "month":
+        "test-pool": (
-            date = date[:-3]
+            ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
-
+                "count": 5,
-        return {"date": date, "scale": scale}
+            }),
-
+        ),
-    def get_date(self):
+        "test-post": (
-        if "year" in self.params:
+            ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
-            return "{:>04}-{:>02}-{:>02}".format(
+                "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
-                self.params["year"],
+                "content": "93b293b27dabd198afafabbaf87c49863ac82f27",
-                self.params.get("month", "01"),
+                "options": (("tags", True),),
-                self.params.get("day", "01"))
+                "keyword": {
-        return None
+                    "tags_artist": "kawanakajima",
-
+                    "tags_character": "heath_ledger ronald_mcdonald the_joker",
-    def get_scale(self):
+                    "tags_copyright": "dc_comics mcdonald's the_dark_knight",
-        if self.scale and self.scale.startswith("by_"):
+                    "tags_general": str,
-            return self.scale[3:]
+                },
-        return self.scale
+            }),
        ),
    },
    "realbooru": {
        "root": "https://realbooru.com",
        "test-tag": (
            ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
                "count": ">= 64",
            }),
        ),
        "test-pool": (
            ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
                "count": 3,
            }),
        ),
        "test-post": (
            ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
                "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
                "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
            }),
        ),
    },
 }
 generate_extractors(EXTRACTORS, globals(), (
    BooruTagExtractor,
    BooruPoolExtractor,
    BooruPostExtractor,
 ))
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@ -6,98 +6,27 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
-"""Extract images from https://gelbooru.com/"""
+"""Extractors for https://gelbooru.com/"""
 from . import booru
-from .common import Message
+from .. import text, exception
 from .. import text
-class GelbooruExtractor(booru.XmlParserMixin,
+class GelbooruBase():
                        booru.GelbooruPageMixin,
                        booru.BooruExtractor):
    """Base class for gelbooru extractors"""
    category = "gelbooru"
-    api_url = "https://gelbooru.com/index.php"
+    root = "https://gelbooru.com"
    post_url = "https://gelbooru.com/index.php?page=post&s=view&id={}"
    pool_url = "https://gelbooru.com/index.php?page=pool&s=show&id={}"
-    def __init__(self, match):
+    def _prepare_post(self, post, extended_tags=False):
-        super().__init__(match)
+        url = booru.BooruExtractor._prepare_post(self, post, extended_tags)
        self.use_api = self.config("api", True)
        if self.use_api:
            self.params.update({"page": "dapi", "s": "post", "q": "index"})
        else:
            self.items = self.items_noapi
            self.session.cookies["fringeBenefits"] = "yup"
            self.per_page = 42
    @staticmethod
    def get_file_url(image):
        url = image["file_url"]
        if url.startswith("https://mp4.gelbooru.com/"):
-            ihash = image["md5"]
+            md5 = post["md5"]
            return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
-                ihash[0:2], ihash[2:4], ihash)
+                md5[0:2], md5[2:4], md5)
        return url
    def items_noapi(self):
        yield Message.Version, 1
        data = self.get_metadata()
        for post in self.get_posts():
            post = self.get_post_data(post)
            url = post["file_url"]
            post.update(data)
            text.nameext_from_url(url, post)
            yield Message.Directory, post
            yield Message.Url, url, post
    def get_posts(self):
        """Return an iterable containing all relevant post objects"""
        url = "https://gelbooru.com/index.php?page=post&s=list"
        params = {
            "tags": self.params["tags"],
            "pid" : self.page_start * self.per_page
        }
        while True:
            page = self.request(url, params=params).text
            ids = list(text.extract_iter(page, '<span id="s', '"'))
            yield from ids
            if len(ids) < self.per_page:
                return
            params["pid"] += self.per_page
    def get_post_data(self, post_id):
        """Extract metadata of a single post"""
        page = self.request(self.post_url.format(post_id)).text
        data = text.extract_all(page, (
            (None        , '<meta name="keywords"', ''),
            ("tags"      , ' imageboard- ', '"'),
            ("id"        , '<li>Id: ', '<'),
            ("created_at", '<li>Posted: ', '<'),
            ("width"     , '<li>Size: ', 'x'),
            ("height"    , '', '<'),
            ("source"    , '<li>Source: <a href="', '"'),
            ("rating"    , '<li>Rating: ', '<'),
            (None        , '<li>Score: ', ''),
            ("score"     , '>', '<'),
            ("file_url"  , '<li><a href="http', '"'),
            ("change"    , ' id="lupdated" value="', '"'),
        ))[0]
        data["file_url"] = "http" + data["file_url"].replace("m//", "m/", 1)
        data["md5"] = data["file_url"].rpartition("/")[2].partition(".")[0]
        data["rating"] = (data["rating"] or "?")[0].lower()
        data["tags"] = " ".join(
            [tag.replace(" ", "_") for tag in data["tags"].split(", ")])
        if self.extags:
            self.extended_tags(data, page)
        return data
-
+class GelbooruTagExtractor(GelbooruBase, booru.BooruTagExtractor):
 class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
    """Extractor for images from gelbooru.com based on search-tags"""
    pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
@ -112,7 +41,7 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
    )
-class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
+class GelbooruPoolExtractor(GelbooruBase, booru.BooruPoolExtractor):
    """Extractor for image-pools from gelbooru.com"""
    pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=pool&s=show&id=(?P<pool>\d+)")
@ -126,8 +55,23 @@ class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
        }),
    )
    def metadata(self):
        url = "{}/index.php?page=pool&s=show&id={}".format(
            self.root, self.pool_id)
        page = self.request(url).text
        name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
        if not name:
            raise exception.NotFoundError("pool")
        self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
        return {
            "pool": text.parse_int(self.pool_id),
            "pool_name": text.unescape(name),
        }
-class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
+class GelbooruPostExtractor(GelbooruBase, booru.BooruPostExtractor):
    """Extractor for single images from gelbooru.com"""
    pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=post&s=view&id=(?P<post>\d+)")
@ -135,6 +79,3 @@ class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
        "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
        "count": 1,
    })
    def get_posts(self):
        return (self.post,)
--- a/gallery_dl/extractor/realbooru.py
+++ b/gallery_dl/extractor/realbooru.py
@ -1,59 +0,0 @@
 # -*- coding: utf-8 -*-
 # Copyright 2019 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 """Extractors for https://realbooru.com/"""
 from . import booru
 class RealbooruExtractor(booru.XmlParserMixin,
                         booru.GelbooruPageMixin,
                         booru.BooruExtractor):
    """Base class for realbooru extractors"""
    category = "realbooru"
    api_url = "https://realbooru.com/index.php"
    post_url = "https://realbooru.com/index.php?page=post&s=view&id={}"
    pool_url = "https://realbooru.com/index.php?page=pool&s=show&id={}"
    def __init__(self, match):
        super().__init__(match)
        self.params.update({"page": "dapi", "s": "post", "q": "index"})
 class RealbooruTagExtractor(booru.TagMixin, RealbooruExtractor):
    """Extractor for images from realbooru.com based on search-tags"""
    pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
               r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
    test = ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
        "count": ">= 64",
    })
 class RealbooruPoolExtractor(booru.GelbooruPoolMixin, RealbooruExtractor):
    """Extractor for image-pools from realbooru.com"""
    pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
               r"\?page=pool&s=show&id=(?P<pool>\d+)")
    test = ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
        "count": 3,
    })
 class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
    """Extractor for single images from realbooru.com"""
    pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
               r"\?page=post&s=view&id=(?P<post>\d+)")
    test = ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
        "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
        "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
        #  "options": (("tags", True),),
        #  "keyword": {
        #      "tags_general" : str,
        #      "tags_metadata": str,
        #      "tags_model"   : "jennifer_lawrence",
        #  },
    })
--- a/gallery_dl/extractor/rule34.py
+++ b/gallery_dl/extractor/rule34.py
@ -1,63 +0,0 @@
 # -*- coding: utf-8 -*-
 # Copyright 2016-2019 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 """Extract images from https://rule34.xxx/"""
 from . import booru
 class Rule34Extractor(booru.XmlParserMixin,
                      booru.GelbooruPageMixin,
                      booru.BooruExtractor):
    """Base class for rule34 extractors"""
    category = "rule34"
    api_url = "https://rule34.xxx/index.php"
    post_url = "https://rule34.xxx/index.php?page=post&s=view&id={}"
    pool_url = "https://rule34.xxx/index.php?page=pool&s=show&id={}"
    page_limit = 4000
    def __init__(self, match):
        super().__init__(match)
        self.params.update({"page": "dapi", "s": "post", "q": "index"})
 class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
    """Extractor for images from rule34.xxx based on search-tags"""
    pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
               r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
    test = ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
        "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
        "pattern": r"https?://([^.]+\.)?rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
        "count": 1,
    })
 class Rule34PoolExtractor(booru.GelbooruPoolMixin, Rule34Extractor):
    """Extractor for image-pools from rule34.xxx"""
    pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
               r"\?page=pool&s=show&id=(?P<pool>\d+)")
    test = ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
        "count": 3,
    })
 class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
    """Extractor for single images from rule34.xxx"""
    pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
               r"\?page=post&s=view&id=(?P<post>\d+)")
    test = ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
        "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
        "options": (("tags", True),),
        "keyword": {
            "tags_artist": "danraku",
            "tags_character": "kashima_(kantai_collection)",
            "tags_copyright": "kantai_collection",
            "tags_general": str,
            "tags_metadata": str,
        },
    })
--- a/gallery_dl/extractor/safebooru.py
+++ b/gallery_dl/extractor/safebooru.py
@ -1,61 +0,0 @@
 # -*- coding: utf-8 -*-
 # Copyright 2015-2019 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 """Extract images from https://safebooru.org/"""
 from . import booru
 class SafebooruExtractor(booru.XmlParserMixin,
                         booru.GelbooruPageMixin,
                         booru.BooruExtractor):
    """Base class for safebooru extractors"""
    category = "safebooru"
    api_url = "https://safebooru.org/index.php"
    post_url = "https://safebooru.org/index.php?page=post&s=view&id={}"
    pool_url = "https://safebooru.org/index.php?page=pool&s=show&id={}"
    def __init__(self, match):
        super().__init__(match)
        self.params.update({"page": "dapi", "s": "post", "q": "index"})
 class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
    """Extractor for images from safebooru.org based on search-tags"""
    pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
               r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
    test = ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
        "url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
        "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
    })
 class SafebooruPoolExtractor(booru.GelbooruPoolMixin, SafebooruExtractor):
    """Extractor for image-pools from safebooru.org"""
    pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
               r"\?page=pool&s=show&id=(?P<pool>\d+)")
    test = ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
        "count": 5,
    })
 class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
    """Extractor for single images from safebooru.org"""
    pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
               r"\?page=post&s=view&id=(?P<post>\d+)")
    test = ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
        "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
        "content": "93b293b27dabd198afafabbaf87c49863ac82f27",
        "options": (("tags", True),),
        "keyword": {
            "tags_artist": "kawanakajima",
            "tags_character": "heath_ledger ronald_mcdonald the_joker",
            "tags_copyright": "dc_comics mcdonald's the_dark_knight",
            "tags_general": str,
        },
    })