[booru] add generalized extractors for *booru sites

similar to cc15fbe7
pull/1195/head
Mike Fährmann 4 years ago
parent 5f23441e12
commit a3a863fc13
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -92,11 +92,8 @@ modules = [
"pururin", "pururin",
"reactor", "reactor",
"readcomiconline", "readcomiconline",
"realbooru",
"reddit", "reddit",
"redgifs", "redgifs",
"rule34",
"safebooru",
"sankaku", "sankaku",
"sankakucomplex", "sankakucomplex",
"seiga", "seiga",
@ -122,6 +119,7 @@ modules = [
"xhamster", "xhamster",
"xvideos", "xvideos",
"yuki", "yuki",
"booru",
"moebooru", "moebooru",
"foolfuuka", "foolfuuka",
"foolslide", "foolslide",

@ -1,247 +1,248 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2020 Mike Fährmann # Copyright 2020 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Base classes for extractors for danbooru and co""" """Extractors for *booru sites"""
from .common import Extractor, Message, generate_extractors
from .. import text, util, exception
from .common import Extractor, Message
from .. import text, exception
from xml.etree import ElementTree from xml.etree import ElementTree
import collections import collections
import datetime
import operator
import re import re
class BooruExtractor(Extractor): class BooruExtractor(Extractor):
"""Base class for all booru extractors""" """Base class for *booru extractors"""
basecategory = "booru" basecategory = "booru"
filename_fmt = "{category}_{id}_{md5}.{extension}" filename_fmt = "{category}_{id}_{md5}.{extension}"
api_url = "" page_start = 0
post_url = "" per_page = 100
per_page = 50
page_start = 1
page_limit = None
sort = False
def __init__(self, match): def items(self):
super().__init__(match) self.login()
self.params = {} extended_tags = self.config("tags", False)
self.extags = self.post_url and self.config("tags", False) data = self.metadata()
for post in self.posts():
try:
url = self._prepare_post(post, extended_tags)
except KeyError:
continue
post.update(data)
text.nameext_from_url(url, post)
yield Message.Directory, post
yield Message.Url, url, post
def skip(self, num): def skip(self, num):
pages = num // self.per_page pages = num // self.per_page
if self.page_limit and pages + self.page_start > self.page_limit:
pages = self.page_limit - self.page_start
self.page_start += pages self.page_start += pages
return pages * self.per_page return pages * self.per_page
def items(self): def login(self):
yield Message.Version, 1 """Login and set necessary cookies"""
data = self.get_metadata()
self.reset_page() def metadata(self):
while True: """Return a dict with general metadata"""
images = self.parse_response( return ()
self.request(self.api_url, params=self.params))
for image in images:
try:
url = self.get_file_url(image)
except KeyError:
continue
if url.startswith("/"):
url = text.urljoin(self.api_url, url)
image.update(data)
text.nameext_from_url(url, image)
if self.extags:
self.extended_tags(image)
yield Message.Directory, image
yield Message.Url, url, image
if len(images) < self.per_page:
return
self.update_page(image)
def reset_page(self): def posts(self):
"""Initialize params to point to the first page""" """Return an iterable with post objects"""
self.params["page"] = self.page_start return ()
def update_page(self, data): def _prepare_post(self, post, extended_tags=False):
"""Update params to point to the next page""" url = post["file_url"]
if url[0] == "/":
url = self.root + url
if extended_tags:
self._fetch_extended_tags(post)
post["date"] = text.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
return url
def parse_response(self, response): def _fetch_extended_tags(self, post, page=None):
"""Parse JSON API response""" if not page:
images = response.json() url = "{}/index.php?page=post&s=view&id={}".format(
if self.sort: self.root, post["id"])
images.sort(key=operator.itemgetter("score", "id"), page = self.request(url).text
reverse=True) html = text.extract(page, '<ul id="tag-', '</ul>')[0]
return images if html:
tags = collections.defaultdict(list)
pattern = re.compile(
r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
for tag_type, tag_name in pattern.findall(html):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
post["tags_" + key] = " ".join(value)
def _api_request(self, params):
url = self.root + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text)
def _pagination(self, params):
params["pid"] = self.page_start
params["limit"] = self.per_page
def get_metadata(self): while True:
"""Collect metadata for extractor-job""" root = self._api_request(params)
return {} for post in root:
yield post.attrib
@staticmethod if len(root) < self.per_page:
def get_file_url(image): return
return image["file_url"] params["pid"] += 1
def extended_tags(self, image, page=None):
"""Retrieve extended tag information"""
if not page:
url = self.post_url.format(image["id"])
page = self.request(url).text
tags = collections.defaultdict(list)
tags_html = text.extract(page, '<ul id="tag-', '</ul>')[0]
pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"']+)", re.S)
for tag_type, tag_name in pattern.findall(tags_html or ""):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
image["tags_" + key] = " ".join(value)
class XmlParserMixin():
"""Mixin for XML based API responses"""
def parse_response(self, response):
root = ElementTree.fromstring(response.text)
return [post.attrib for post in root]
class MoebooruPageMixin():
"""Pagination for Moebooru and Danbooru v1"""
def update_page(self, data):
if self.page_limit:
self.params["page"] = None
self.params["before_id"] = data["id"]
else:
self.params["page"] += 1
class GelbooruPageMixin():
"""Pagination for Gelbooru-like sites"""
page_start = 0
def reset_page(self): class BooruPostExtractor(BooruExtractor):
self.params["pid"] = self.page_start subcategory = "post"
archive_fmt = "{id}"
pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)"
def update_page(self, data): def __init__(self, match):
self.params["pid"] += 1 BooruExtractor.__init__(self, match)
self.post_id = match.group(1)
def posts(self):
return self._pagination({"id": self.post_id})
class TagMixin():
"""Extraction of images based on search-tags""" class BooruTagExtractor(BooruExtractor):
subcategory = "tag" subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}") directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}" archive_fmt = "t_{search_tags}_{id}"
pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)"
def __init__(self, match): def __init__(self, match):
super().__init__(match) BooruExtractor.__init__(self, match)
self.tags = text.unquote(match.group("tags").replace("+", " ")) self.tags = text.unquote(match.group(1).replace("+", " "))
self.params["tags"] = self.tags
self.params["limit"] = self.per_page
def get_metadata(self): def metadata(self):
return {"search_tags": self.tags} return {"search_tags": self.tags}
def posts(self):
return self._pagination({"tags" : self.tags})
class PoolMixin(): class BooruPoolExtractor(BooruExtractor):
"""Extraction of image-pools"""
subcategory = "pool" subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}") directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}" archive_fmt = "p_{pool}_{id}"
pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)"
def __init__(self, match): def __init__(self, match):
super().__init__(match) BooruExtractor.__init__(self, match)
self.pool = match.group("pool") self.pool_id = match.group(1)
self.params["tags"] = "pool:" + self.pool self.post_ids = ()
self.params["limit"] = self.per_page
def get_metadata(self):
return {"pool": text.parse_int(self.pool)}
def skip(self, num):
self.page_start += num
return num
class GelbooruPoolMixin(PoolMixin): def metadata(self):
"""Image-pool extraction for Gelbooru-like sites""" url = "{}/index.php?page=pool&s=show&id={}".format(
per_page = 1 self.root, self.pool_id)
page = self.request(url).text
def get_metadata(self): name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
page = self.request(self.pool_url.format(self.pool)).text
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
if not name:
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
if not name: if not name:
raise exception.NotFoundError("pool") raise exception.NotFoundError("pool")
self.posts = list(text.extract_iter( self.post_ids = text.extract_iter(
page, 'class="thumb" id="p', '"', pos)) page, 'class="thumb" id="p', '"', pos)
return { return {
"pool": text.parse_int(self.pool), "pool": text.parse_int(self.pool_id),
"pool_name": text.unescape(name), "pool_name": text.unescape(name),
"count": len(self.posts),
} }
def reset_page(self): def posts(self):
self.index = self.page_start params = {}
self.update_page(None) for params["id"] in util.advance(self.post_ids, self.page_start):
for post in self._api_request(params):
def update_page(self, data): yield post.attrib
try:
post = self.posts[self.index]
self.index += 1 EXTRACTORS = {
except IndexError: "rule34": {
post = "0" "root": "https://rule34.xxx",
self.params["tags"] = "id:" + post "test-tag": (
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
class PostMixin(): "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"""Extraction of a single image-post""" "count": 1,
subcategory = "post" }),
archive_fmt = "{id}" ),
"test-pool": (
def __init__(self, match): ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
super().__init__(match) "count": 3,
self.post = match.group("post") }),
self.params["tags"] = "id:" + self.post ),
"test-post": (
("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
class MoebooruPopularMixin(): "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"""Extraction and metadata handling for Moebooru and Danbooru v1""" "options": (("tags", True),),
subcategory = "popular" "keyword": {
directory_fmt = ("{category}", "popular", "{scale}", "{date}") "tags_artist": "danraku",
archive_fmt = "P_{scale[0]}_{date}_{id}" "tags_character": "kashima_(kantai_collection)",
page_start = None "tags_copyright": "kantai_collection",
sort = True "tags_general": str,
"tags_metadata": str,
def __init__(self, match): },
super().__init__(match) }),
self.params.update(text.parse_query(match.group("query"))) ),
self.scale = match.group("scale") },
"safebooru": {
def get_metadata(self, fmt="%Y-%m-%d"): "root": "https://safebooru.org",
date = self.get_date() or datetime.date.today().isoformat() "test-tag": (
scale = self.get_scale() or "day" ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
if scale == "week": "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
date = datetime.date.fromisoformat(date) }),
date = (date - datetime.timedelta(days=date.weekday())).isoformat() ),
elif scale == "month": "test-pool": (
date = date[:-3] ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
"count": 5,
return {"date": date, "scale": scale} }),
),
def get_date(self): "test-post": (
if "year" in self.params: ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
return "{:>04}-{:>02}-{:>02}".format( "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
self.params["year"], "content": "93b293b27dabd198afafabbaf87c49863ac82f27",
self.params.get("month", "01"), "options": (("tags", True),),
self.params.get("day", "01")) "keyword": {
return None "tags_artist": "kawanakajima",
"tags_character": "heath_ledger ronald_mcdonald the_joker",
def get_scale(self): "tags_copyright": "dc_comics mcdonald's the_dark_knight",
if self.scale and self.scale.startswith("by_"): "tags_general": str,
return self.scale[3:] },
return self.scale }),
),
},
"realbooru": {
"root": "https://realbooru.com",
"test-tag": (
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
"count": ">= 64",
}),
),
"test-pool": (
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
"count": 3,
}),
),
"test-post": (
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
}),
),
},
}
generate_extractors(EXTRACTORS, globals(), (
BooruTagExtractor,
BooruPoolExtractor,
BooruPostExtractor,
))

@ -6,98 +6,27 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from https://gelbooru.com/""" """Extractors for https://gelbooru.com/"""
from . import booru from . import booru
from .common import Message from .. import text, exception
from .. import text
class GelbooruExtractor(booru.XmlParserMixin, class GelbooruBase():
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for gelbooru extractors""" """Base class for gelbooru extractors"""
category = "gelbooru" category = "gelbooru"
api_url = "https://gelbooru.com/index.php" root = "https://gelbooru.com"
post_url = "https://gelbooru.com/index.php?page=post&s=view&id={}"
pool_url = "https://gelbooru.com/index.php?page=pool&s=show&id={}"
def __init__(self, match): def _prepare_post(self, post, extended_tags=False):
super().__init__(match) url = booru.BooruExtractor._prepare_post(self, post, extended_tags)
self.use_api = self.config("api", True)
if self.use_api:
self.params.update({"page": "dapi", "s": "post", "q": "index"})
else:
self.items = self.items_noapi
self.session.cookies["fringeBenefits"] = "yup"
self.per_page = 42
@staticmethod
def get_file_url(image):
url = image["file_url"]
if url.startswith("https://mp4.gelbooru.com/"): if url.startswith("https://mp4.gelbooru.com/"):
ihash = image["md5"] md5 = post["md5"]
return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format( return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
ihash[0:2], ihash[2:4], ihash) md5[0:2], md5[2:4], md5)
return url return url
def items_noapi(self):
yield Message.Version, 1
data = self.get_metadata()
for post in self.get_posts():
post = self.get_post_data(post)
url = post["file_url"]
post.update(data)
text.nameext_from_url(url, post)
yield Message.Directory, post
yield Message.Url, url, post
def get_posts(self):
"""Return an iterable containing all relevant post objects"""
url = "https://gelbooru.com/index.php?page=post&s=list"
params = {
"tags": self.params["tags"],
"pid" : self.page_start * self.per_page
}
while True:
page = self.request(url, params=params).text
ids = list(text.extract_iter(page, '<span id="s', '"'))
yield from ids
if len(ids) < self.per_page:
return
params["pid"] += self.per_page
def get_post_data(self, post_id):
"""Extract metadata of a single post"""
page = self.request(self.post_url.format(post_id)).text
data = text.extract_all(page, (
(None , '<meta name="keywords"', ''),
("tags" , ' imageboard- ', '"'),
("id" , '<li>Id: ', '<'),
("created_at", '<li>Posted: ', '<'),
("width" , '<li>Size: ', 'x'),
("height" , '', '<'),
("source" , '<li>Source: <a href="', '"'),
("rating" , '<li>Rating: ', '<'),
(None , '<li>Score: ', ''),
("score" , '>', '<'),
("file_url" , '<li><a href="http', '"'),
("change" , ' id="lupdated" value="', '"'),
))[0]
data["file_url"] = "http" + data["file_url"].replace("m//", "m/", 1)
data["md5"] = data["file_url"].rpartition("/")[2].partition(".")[0]
data["rating"] = (data["rating"] or "?")[0].lower()
data["tags"] = " ".join(
[tag.replace(" ", "_") for tag in data["tags"].split(", ")])
if self.extags:
self.extended_tags(data, page)
return data
class GelbooruTagExtractor(GelbooruBase, booru.BooruTagExtractor):
class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
"""Extractor for images from gelbooru.com based on search-tags""" """Extractor for images from gelbooru.com based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)") r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
@ -112,7 +41,7 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
) )
class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor): class GelbooruPoolExtractor(GelbooruBase, booru.BooruPoolExtractor):
"""Extractor for image-pools from gelbooru.com""" """Extractor for image-pools from gelbooru.com"""
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)") r"\?page=pool&s=show&id=(?P<pool>\d+)")
@ -126,8 +55,23 @@ class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
}), }),
) )
def metadata(self):
url = "{}/index.php?page=pool&s=show&id={}".format(
self.root, self.pool_id)
page = self.request(url).text
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
if not name:
raise exception.NotFoundError("pool")
self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
return {
"pool": text.parse_int(self.pool_id),
"pool_name": text.unescape(name),
}
class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor): class GelbooruPostExtractor(GelbooruBase, booru.BooruPostExtractor):
"""Extractor for single images from gelbooru.com""" """Extractor for single images from gelbooru.com"""
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)") r"\?page=post&s=view&id=(?P<post>\d+)")
@ -135,6 +79,3 @@ class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229", "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"count": 1, "count": 1,
}) })
def get_posts(self):
return (self.post,)

@ -1,59 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://realbooru.com/"""
from . import booru
class RealbooruExtractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for realbooru extractors"""
category = "realbooru"
api_url = "https://realbooru.com/index.php"
post_url = "https://realbooru.com/index.php?page=post&s=view&id={}"
pool_url = "https://realbooru.com/index.php?page=pool&s=show&id={}"
def __init__(self, match):
super().__init__(match)
self.params.update({"page": "dapi", "s": "post", "q": "index"})
class RealbooruTagExtractor(booru.TagMixin, RealbooruExtractor):
"""Extractor for images from realbooru.com based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
"count": ">= 64",
})
class RealbooruPoolExtractor(booru.GelbooruPoolMixin, RealbooruExtractor):
"""Extractor for image-pools from realbooru.com"""
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
"count": 3,
})
class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
"""Extractor for single images from realbooru.com"""
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
# "options": (("tags", True),),
# "keyword": {
# "tags_general" : str,
# "tags_metadata": str,
# "tags_model" : "jennifer_lawrence",
# },
})

@ -1,63 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://rule34.xxx/"""
from . import booru
class Rule34Extractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for rule34 extractors"""
category = "rule34"
api_url = "https://rule34.xxx/index.php"
post_url = "https://rule34.xxx/index.php?page=post&s=view&id={}"
pool_url = "https://rule34.xxx/index.php?page=pool&s=show&id={}"
page_limit = 4000
def __init__(self, match):
super().__init__(match)
self.params.update({"page": "dapi", "s": "post", "q": "index"})
class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
"""Extractor for images from rule34.xxx based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"pattern": r"https?://([^.]+\.)?rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 1,
})
class Rule34PoolExtractor(booru.GelbooruPoolMixin, Rule34Extractor):
"""Extractor for image-pools from rule34.xxx"""
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
"count": 3,
})
class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
"""Extractor for single images from rule34.xxx"""
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"options": (("tags", True),),
"keyword": {
"tags_artist": "danraku",
"tags_character": "kashima_(kantai_collection)",
"tags_copyright": "kantai_collection",
"tags_general": str,
"tags_metadata": str,
},
})

@ -1,61 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://safebooru.org/"""
from . import booru
class SafebooruExtractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for safebooru extractors"""
category = "safebooru"
api_url = "https://safebooru.org/index.php"
post_url = "https://safebooru.org/index.php?page=post&s=view&id={}"
pool_url = "https://safebooru.org/index.php?page=pool&s=show&id={}"
def __init__(self, match):
super().__init__(match)
self.params.update({"page": "dapi", "s": "post", "q": "index"})
class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
"""Extractor for images from safebooru.org based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
})
class SafebooruPoolExtractor(booru.GelbooruPoolMixin, SafebooruExtractor):
"""Extractor for image-pools from safebooru.org"""
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
"count": 5,
})
class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
"""Extractor for single images from safebooru.org"""
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
"options": (("tags", True),),
"keyword": {
"tags_artist": "kawanakajima",
"tags_character": "heath_ledger ronald_mcdonald the_joker",
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
"tags_general": str,
},
})
Loading…
Cancel
Save