[booru] add generalized extractors for *booru sites

similar to cc15fbe7
4 years ago · a3a863fc13
parent 5f23441e12
commit a3a863fc13
6 changed files with 217 additions and 460 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -92,11 +92,8 @@ modules = [
    "pururin",
    "reactor",
    "readcomiconline",
-    "realbooru",
    "reddit",
    "redgifs",
-    "rule34",
-    "safebooru",
    "sankaku",
    "sankakucomplex",
    "seiga",
@ -122,6 +119,7 @@ modules = [
    "xhamster",
    "xvideos",
    "yuki",
+    "booru",
    "moebooru",
    "foolfuuka",
    "foolslide",
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@ -1,247 +1,248 @@
 # -*- coding: utf-8 -*-

-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2020 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Base classes for extractors for danbooru and co"""
+"""Extractors for *booru sites"""
+
+from .common import Extractor, Message, generate_extractors
+from .. import text, util, exception

-from .common import Extractor, Message
-from .. import text, exception
 from xml.etree import ElementTree
 import collections
-import datetime
-import operator
 import re


 class BooruExtractor(Extractor):
-    """Base class for all booru extractors"""
+    """Base class for *booru extractors"""
    basecategory = "booru"
    filename_fmt = "{category}_{id}_{md5}.{extension}"
-    api_url = ""
-    post_url = ""
-    per_page = 50
-    page_start = 1
-    page_limit = None
-    sort = False
-
-    def __init__(self, match):
-        super().__init__(match)
-        self.params = {}
-        self.extags = self.post_url and self.config("tags", False)
-
-    def skip(self, num):
-        pages = num // self.per_page
-        if self.page_limit and pages + self.page_start > self.page_limit:
-            pages = self.page_limit - self.page_start
-        self.page_start += pages
-        return pages * self.per_page
+    page_start = 0
+    per_page = 100

    def items(self):
-        yield Message.Version, 1
-        data = self.get_metadata()
-
-        self.reset_page()
-        while True:
-            images = self.parse_response(
-                self.request(self.api_url, params=self.params))
-
-            for image in images:
+        self.login()
+        extended_tags = self.config("tags", False)
+        data = self.metadata()
+        for post in self.posts():
            try:
-                    url = self.get_file_url(image)
+                url = self._prepare_post(post, extended_tags)
            except KeyError:
                continue
-                if url.startswith("/"):
-                    url = text.urljoin(self.api_url, url)
-                image.update(data)
-                text.nameext_from_url(url, image)
-                if self.extags:
-                    self.extended_tags(image)
-                yield Message.Directory, image
-                yield Message.Url, url, image
-
-            if len(images) < self.per_page:
-                return
-            self.update_page(image)
+            post.update(data)
+            text.nameext_from_url(url, post)
+            yield Message.Directory, post
+            yield Message.Url, url, post

-    def reset_page(self):
-        """Initialize params to point to the first page"""
-        self.params["page"] = self.page_start
+    def skip(self, num):
+        pages = num // self.per_page
+        self.page_start += pages
+        return pages * self.per_page

-    def update_page(self, data):
-        """Update params to point to the next page"""
+    def login(self):
+        """Login and set necessary cookies"""

-    def parse_response(self, response):
-        """Parse JSON API response"""
-        images = response.json()
-        if self.sort:
-            images.sort(key=operator.itemgetter("score", "id"),
-                        reverse=True)
-        return images
+    def metadata(self):
+        """Return a dict with general metadata"""
+        return ()

-    def get_metadata(self):
-        """Collect metadata for extractor-job"""
-        return {}
+    def posts(self):
+        """Return an iterable with post objects"""
+        return ()

-    @staticmethod
-    def get_file_url(image):
-        return image["file_url"]
+    def _prepare_post(self, post, extended_tags=False):
+        url = post["file_url"]
+        if url[0] == "/":
+            url = self.root + url
+        if extended_tags:
+            self._fetch_extended_tags(post)
+        post["date"] = text.parse_datetime(
+            post["created_at"], "%a %b %d %H:%M:%S %z %Y")
+        return url

-    def extended_tags(self, image, page=None):
-        """Retrieve extended tag information"""
+    def _fetch_extended_tags(self, post, page=None):
        if not page:
-            url = self.post_url.format(image["id"])
+            url = "{}/index.php?page=post&s=view&id={}".format(
+                self.root, post["id"])
            page = self.request(url).text
+        html = text.extract(page, '<ul id="tag-', '</ul>')[0]
+        if html:
            tags = collections.defaultdict(list)
-        tags_html = text.extract(page, '<ul id="tag-', '</ul>')[0]
-        pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"']+)", re.S)
-        for tag_type, tag_name in pattern.findall(tags_html or ""):
+            pattern = re.compile(
+                r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
+            for tag_type, tag_name in pattern.findall(html):
                tags[tag_type].append(text.unquote(tag_name))
            for key, value in tags.items():
-            image["tags_" + key] = " ".join(value)
+                post["tags_" + key] = " ".join(value)

+    def _api_request(self, params):
+        url = self.root + "/index.php?page=dapi&s=post&q=index"
+        return ElementTree.fromstring(self.request(url, params=params).text)

-class XmlParserMixin():
-    """Mixin for XML based API responses"""
-    def parse_response(self, response):
-        root = ElementTree.fromstring(response.text)
-        return [post.attrib for post in root]
+    def _pagination(self, params):
+        params["pid"] = self.page_start
+        params["limit"] = self.per_page

+        while True:
+            root = self._api_request(params)
+            for post in root:
+                yield post.attrib

-class MoebooruPageMixin():
-    """Pagination for Moebooru and Danbooru v1"""
-    def update_page(self, data):
-        if self.page_limit:
-            self.params["page"] = None
-            self.params["before_id"] = data["id"]
-        else:
-            self.params["page"] += 1
+            if len(root) < self.per_page:
+                return
+            params["pid"] += 1


-class GelbooruPageMixin():
-    """Pagination for Gelbooru-like sites"""
-    page_start = 0
+class BooruPostExtractor(BooruExtractor):
+    subcategory = "post"
+    archive_fmt = "{id}"
+    pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)"

-    def reset_page(self):
-        self.params["pid"] = self.page_start
+    def __init__(self, match):
+        BooruExtractor.__init__(self, match)
+        self.post_id = match.group(1)

-    def update_page(self, data):
-        self.params["pid"] += 1
+    def posts(self):
+        return self._pagination({"id": self.post_id})


-class TagMixin():
-    """Extraction of images based on search-tags"""
+class BooruTagExtractor(BooruExtractor):
    subcategory = "tag"
    directory_fmt = ("{category}", "{search_tags}")
    archive_fmt = "t_{search_tags}_{id}"
+    pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)"

    def __init__(self, match):
-        super().__init__(match)
-        self.tags = text.unquote(match.group("tags").replace("+", " "))
-        self.params["tags"] = self.tags
-        self.params["limit"] = self.per_page
+        BooruExtractor.__init__(self, match)
+        self.tags = text.unquote(match.group(1).replace("+", " "))

-    def get_metadata(self):
+    def metadata(self):
        return {"search_tags": self.tags}

+    def posts(self):
+        return self._pagination({"tags" : self.tags})

-class PoolMixin():
-    """Extraction of image-pools"""
+
+class BooruPoolExtractor(BooruExtractor):
    subcategory = "pool"
    directory_fmt = ("{category}", "pool", "{pool}")
    archive_fmt = "p_{pool}_{id}"
+    pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)"

    def __init__(self, match):
-        super().__init__(match)
-        self.pool = match.group("pool")
-        self.params["tags"] = "pool:" + self.pool
-        self.params["limit"] = self.per_page
-
-    def get_metadata(self):
-        return {"pool": text.parse_int(self.pool)}
+        BooruExtractor.__init__(self, match)
+        self.pool_id = match.group(1)
+        self.post_ids = ()

+    def skip(self, num):
+        self.page_start += num
+        return num

-class GelbooruPoolMixin(PoolMixin):
-    """Image-pool extraction for Gelbooru-like sites"""
-    per_page = 1
+    def metadata(self):
+        url = "{}/index.php?page=pool&s=show&id={}".format(
+            self.root, self.pool_id)
+        page = self.request(url).text

-    def get_metadata(self):
-        page = self.request(self.pool_url.format(self.pool)).text
-        name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
-        if not name:
        name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
        if not name:
            raise exception.NotFoundError("pool")
-        self.posts = list(text.extract_iter(
-            page, 'class="thumb" id="p', '"', pos))
+        self.post_ids = text.extract_iter(
+            page, 'class="thumb" id="p', '"', pos)

        return {
-            "pool": text.parse_int(self.pool),
+            "pool": text.parse_int(self.pool_id),
            "pool_name": text.unescape(name),
-            "count": len(self.posts),
        }

-    def reset_page(self):
-        self.index = self.page_start
-        self.update_page(None)
-
-    def update_page(self, data):
-        try:
-            post = self.posts[self.index]
-            self.index += 1
-        except IndexError:
-            post = "0"
-        self.params["tags"] = "id:" + post
-
-
-class PostMixin():
-    """Extraction of a single image-post"""
-    subcategory = "post"
-    archive_fmt = "{id}"
-
-    def __init__(self, match):
-        super().__init__(match)
-        self.post = match.group("post")
-        self.params["tags"] = "id:" + self.post
-
-
-class MoebooruPopularMixin():
-    """Extraction and metadata handling for Moebooru and Danbooru v1"""
-    subcategory = "popular"
-    directory_fmt = ("{category}", "popular", "{scale}", "{date}")
-    archive_fmt = "P_{scale[0]}_{date}_{id}"
-    page_start = None
-    sort = True
-
-    def __init__(self, match):
-        super().__init__(match)
-        self.params.update(text.parse_query(match.group("query")))
-        self.scale = match.group("scale")
-
-    def get_metadata(self, fmt="%Y-%m-%d"):
-        date = self.get_date() or datetime.date.today().isoformat()
-        scale = self.get_scale() or "day"
-
-        if scale == "week":
-            date = datetime.date.fromisoformat(date)
-            date = (date - datetime.timedelta(days=date.weekday())).isoformat()
-        elif scale == "month":
-            date = date[:-3]
-
-        return {"date": date, "scale": scale}
-
-    def get_date(self):
-        if "year" in self.params:
-            return "{:>04}-{:>02}-{:>02}".format(
-                self.params["year"],
-                self.params.get("month", "01"),
-                self.params.get("day", "01"))
-        return None
-
-    def get_scale(self):
-        if self.scale and self.scale.startswith("by_"):
-            return self.scale[3:]
-        return self.scale
+    def posts(self):
+        params = {}
+        for params["id"] in util.advance(self.post_ids, self.page_start):
+            for post in self._api_request(params):
+                yield post.attrib
+
+
+EXTRACTORS = {
+    "rule34": {
+        "root": "https://rule34.xxx",
+        "test-tag": (
+            ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
+                "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
+                "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
+                "count": 1,
+            }),
+        ),
+        "test-pool": (
+            ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
+                "count": 3,
+            }),
+        ),
+        "test-post": (
+            ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
+                "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
+                "options": (("tags", True),),
+                "keyword": {
+                    "tags_artist": "danraku",
+                    "tags_character": "kashima_(kantai_collection)",
+                    "tags_copyright": "kantai_collection",
+                    "tags_general": str,
+                    "tags_metadata": str,
+                },
+            }),
+        ),
+    },
+    "safebooru": {
+        "root": "https://safebooru.org",
+        "test-tag": (
+            ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
+                "url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
+                "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
+            }),
+        ),
+        "test-pool": (
+            ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
+                "count": 5,
+            }),
+        ),
+        "test-post": (
+            ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
+                "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
+                "content": "93b293b27dabd198afafabbaf87c49863ac82f27",
+                "options": (("tags", True),),
+                "keyword": {
+                    "tags_artist": "kawanakajima",
+                    "tags_character": "heath_ledger ronald_mcdonald the_joker",
+                    "tags_copyright": "dc_comics mcdonald's the_dark_knight",
+                    "tags_general": str,
+                },
+            }),
+        ),
+    },
+    "realbooru": {
+        "root": "https://realbooru.com",
+        "test-tag": (
+            ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
+                "count": ">= 64",
+            }),
+        ),
+        "test-pool": (
+            ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
+                "count": 3,
+            }),
+        ),
+        "test-post": (
+            ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
+                "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
+                "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
+            }),
+        ),
+    },
+}
+
+generate_extractors(EXTRACTORS, globals(), (
+    BooruTagExtractor,
+    BooruPoolExtractor,
+    BooruPostExtractor,
+))
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@ -6,98 +6,27 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extract images from https://gelbooru.com/"""
+"""Extractors for https://gelbooru.com/"""

 from . import booru
-from .common import Message
-from .. import text
+from .. import text, exception


-class GelbooruExtractor(booru.XmlParserMixin,
-                        booru.GelbooruPageMixin,
-                        booru.BooruExtractor):
+class GelbooruBase():
    """Base class for gelbooru extractors"""
    category = "gelbooru"
-    api_url = "https://gelbooru.com/index.php"
-    post_url = "https://gelbooru.com/index.php?page=post&s=view&id={}"
-    pool_url = "https://gelbooru.com/index.php?page=pool&s=show&id={}"
+    root = "https://gelbooru.com"

-    def __init__(self, match):
-        super().__init__(match)
-
-        self.use_api = self.config("api", True)
-        if self.use_api:
-            self.params.update({"page": "dapi", "s": "post", "q": "index"})
-        else:
-            self.items = self.items_noapi
-            self.session.cookies["fringeBenefits"] = "yup"
-            self.per_page = 42
-
-    @staticmethod
-    def get_file_url(image):
-        url = image["file_url"]
+    def _prepare_post(self, post, extended_tags=False):
+        url = booru.BooruExtractor._prepare_post(self, post, extended_tags)
        if url.startswith("https://mp4.gelbooru.com/"):
-            ihash = image["md5"]
+            md5 = post["md5"]
            return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
-                ihash[0:2], ihash[2:4], ihash)
+                md5[0:2], md5[2:4], md5)
        return url

-    def items_noapi(self):
-        yield Message.Version, 1
-        data = self.get_metadata()
-
-        for post in self.get_posts():
-            post = self.get_post_data(post)
-            url = post["file_url"]
-            post.update(data)
-            text.nameext_from_url(url, post)
-            yield Message.Directory, post
-            yield Message.Url, url, post
-
-    def get_posts(self):
-        """Return an iterable containing all relevant post objects"""
-        url = "https://gelbooru.com/index.php?page=post&s=list"
-        params = {
-            "tags": self.params["tags"],
-            "pid" : self.page_start * self.per_page
-        }
-
-        while True:
-            page = self.request(url, params=params).text
-            ids = list(text.extract_iter(page, '<span id="s', '"'))
-            yield from ids
-            if len(ids) < self.per_page:
-                return
-            params["pid"] += self.per_page
-
-    def get_post_data(self, post_id):
-        """Extract metadata of a single post"""
-        page = self.request(self.post_url.format(post_id)).text
-        data = text.extract_all(page, (
-            (None        , '<meta name="keywords"', ''),
-            ("tags"      , ' imageboard- ', '"'),
-            ("id"        , '<li>Id: ', '<'),
-            ("created_at", '<li>Posted: ', '<'),
-            ("width"     , '<li>Size: ', 'x'),
-            ("height"    , '', '<'),
-            ("source"    , '<li>Source: <a href="', '"'),
-            ("rating"    , '<li>Rating: ', '<'),
-            (None        , '<li>Score: ', ''),
-            ("score"     , '>', '<'),
-            ("file_url"  , '<li><a href="http', '"'),
-            ("change"    , ' id="lupdated" value="', '"'),
-        ))[0]
-        data["file_url"] = "http" + data["file_url"].replace("m//", "m/", 1)
-        data["md5"] = data["file_url"].rpartition("/")[2].partition(".")[0]
-        data["rating"] = (data["rating"] or "?")[0].lower()
-        data["tags"] = " ".join(
-            [tag.replace(" ", "_") for tag in data["tags"].split(", ")])
-        if self.extags:
-            self.extended_tags(data, page)
-        return data

-
-class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
+class GelbooruTagExtractor(GelbooruBase, booru.BooruTagExtractor):
    """Extractor for images from gelbooru.com based on search-tags"""
    pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
@ -112,7 +41,7 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
    )


-class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
+class GelbooruPoolExtractor(GelbooruBase, booru.BooruPoolExtractor):
    """Extractor for image-pools from gelbooru.com"""
    pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=pool&s=show&id=(?P<pool>\d+)")
@ -126,8 +55,23 @@ class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
        }),
    )

+    def metadata(self):
+        url = "{}/index.php?page=pool&s=show&id={}".format(
+            self.root, self.pool_id)
+        page = self.request(url).text
+
+        name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
+        if not name:
+            raise exception.NotFoundError("pool")
+        self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
+
+        return {
+            "pool": text.parse_int(self.pool_id),
+            "pool_name": text.unescape(name),
+        }
+

-class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
+class GelbooruPostExtractor(GelbooruBase, booru.BooruPostExtractor):
    """Extractor for single images from gelbooru.com"""
    pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=post&s=view&id=(?P<post>\d+)")
@ -135,6 +79,3 @@ class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
        "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
        "count": 1,
    })
-
-    def get_posts(self):
-        return (self.post,)
--- a/gallery_dl/extractor/realbooru.py
+++ b/gallery_dl/extractor/realbooru.py
@ -1,59 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2019 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://realbooru.com/"""
-
-from . import booru
-
-
-class RealbooruExtractor(booru.XmlParserMixin,
-                         booru.GelbooruPageMixin,
-                         booru.BooruExtractor):
-    """Base class for realbooru extractors"""
-    category = "realbooru"
-    api_url = "https://realbooru.com/index.php"
-    post_url = "https://realbooru.com/index.php?page=post&s=view&id={}"
-    pool_url = "https://realbooru.com/index.php?page=pool&s=show&id={}"
-
-    def __init__(self, match):
-        super().__init__(match)
-        self.params.update({"page": "dapi", "s": "post", "q": "index"})
-
-
-class RealbooruTagExtractor(booru.TagMixin, RealbooruExtractor):
-    """Extractor for images from realbooru.com based on search-tags"""
-    pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
-               r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
-    test = ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
-        "count": ">= 64",
-    })
-
-
-class RealbooruPoolExtractor(booru.GelbooruPoolMixin, RealbooruExtractor):
-    """Extractor for image-pools from realbooru.com"""
-    pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
-               r"\?page=pool&s=show&id=(?P<pool>\d+)")
-    test = ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
-        "count": 3,
-    })
-
-
-class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
-    """Extractor for single images from realbooru.com"""
-    pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
-               r"\?page=post&s=view&id=(?P<post>\d+)")
-    test = ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
-        "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
-        "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
-        #  "options": (("tags", True),),
-        #  "keyword": {
-        #      "tags_general" : str,
-        #      "tags_metadata": str,
-        #      "tags_model"   : "jennifer_lawrence",
-        #  },
-    })
--- a/gallery_dl/extractor/rule34.py
+++ b/gallery_dl/extractor/rule34.py
@ -1,63 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2016-2019 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extract images from https://rule34.xxx/"""
-
-from . import booru
-
-
-class Rule34Extractor(booru.XmlParserMixin,
-                      booru.GelbooruPageMixin,
-                      booru.BooruExtractor):
-    """Base class for rule34 extractors"""
-    category = "rule34"
-    api_url = "https://rule34.xxx/index.php"
-    post_url = "https://rule34.xxx/index.php?page=post&s=view&id={}"
-    pool_url = "https://rule34.xxx/index.php?page=pool&s=show&id={}"
-    page_limit = 4000
-
-    def __init__(self, match):
-        super().__init__(match)
-        self.params.update({"page": "dapi", "s": "post", "q": "index"})
-
-
-class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
-    """Extractor for images from rule34.xxx based on search-tags"""
-    pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
-               r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
-    test = ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
-        "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
-        "pattern": r"https?://([^.]+\.)?rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
-        "count": 1,
-    })
-
-
-class Rule34PoolExtractor(booru.GelbooruPoolMixin, Rule34Extractor):
-    """Extractor for image-pools from rule34.xxx"""
-    pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
-               r"\?page=pool&s=show&id=(?P<pool>\d+)")
-    test = ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
-        "count": 3,
-    })
-
-
-class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
-    """Extractor for single images from rule34.xxx"""
-    pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
-               r"\?page=post&s=view&id=(?P<post>\d+)")
-    test = ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
-        "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
-        "options": (("tags", True),),
-        "keyword": {
-            "tags_artist": "danraku",
-            "tags_character": "kashima_(kantai_collection)",
-            "tags_copyright": "kantai_collection",
-            "tags_general": str,
-            "tags_metadata": str,
-        },
-    })
--- a/gallery_dl/extractor/safebooru.py
+++ b/gallery_dl/extractor/safebooru.py
@ -1,61 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2015-2019 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extract images from https://safebooru.org/"""
-
-from . import booru
-
-
-class SafebooruExtractor(booru.XmlParserMixin,
-                         booru.GelbooruPageMixin,
-                         booru.BooruExtractor):
-    """Base class for safebooru extractors"""
-    category = "safebooru"
-    api_url = "https://safebooru.org/index.php"
-    post_url = "https://safebooru.org/index.php?page=post&s=view&id={}"
-    pool_url = "https://safebooru.org/index.php?page=pool&s=show&id={}"
-
-    def __init__(self, match):
-        super().__init__(match)
-        self.params.update({"page": "dapi", "s": "post", "q": "index"})
-
-
-class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
-    """Extractor for images from safebooru.org based on search-tags"""
-    pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
-               r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
-    test = ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
-        "url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
-        "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
-    })
-
-
-class SafebooruPoolExtractor(booru.GelbooruPoolMixin, SafebooruExtractor):
-    """Extractor for image-pools from safebooru.org"""
-    pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
-               r"\?page=pool&s=show&id=(?P<pool>\d+)")
-    test = ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
-        "count": 5,
-    })
-
-
-class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
-    """Extractor for single images from safebooru.org"""
-    pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
-               r"\?page=post&s=view&id=(?P<post>\d+)")
-    test = ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
-        "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
-        "content": "93b293b27dabd198afafabbaf87c49863ac82f27",
-        "options": (("tags", True),),
-        "keyword": {
-            "tags_artist": "kawanakajima",
-            "tags_character": "heath_ledger ronald_mcdonald the_joker",
-            "tags_copyright": "dc_comics mcdonald's the_dark_knight",
-            "tags_general": str,
-        },
-    })