[booru] add generalized extractors for *booru sites

similar to cc15fbe7
pull/1195/head
Mike Fährmann 4 years ago
parent 5f23441e12
commit a3a863fc13
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -92,11 +92,8 @@ modules = [
"pururin",
"reactor",
"readcomiconline",
"realbooru",
"reddit",
"redgifs",
"rule34",
"safebooru",
"sankaku",
"sankakucomplex",
"seiga",
@ -122,6 +119,7 @@ modules = [
"xhamster",
"xvideos",
"yuki",
"booru",
"moebooru",
"foolfuuka",
"foolslide",

@ -1,247 +1,248 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2020 Mike Fährmann
# Copyright 2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Base classes for extractors for danbooru and co"""
"""Extractors for *booru sites"""
from .common import Extractor, Message, generate_extractors
from .. import text, util, exception
from .common import Extractor, Message
from .. import text, exception
from xml.etree import ElementTree
import collections
import datetime
import operator
import re
class BooruExtractor(Extractor):
"""Base class for all booru extractors"""
"""Base class for *booru extractors"""
basecategory = "booru"
filename_fmt = "{category}_{id}_{md5}.{extension}"
api_url = ""
post_url = ""
per_page = 50
page_start = 1
page_limit = None
sort = False
def __init__(self, match):
super().__init__(match)
self.params = {}
self.extags = self.post_url and self.config("tags", False)
def skip(self, num):
pages = num // self.per_page
if self.page_limit and pages + self.page_start > self.page_limit:
pages = self.page_limit - self.page_start
self.page_start += pages
return pages * self.per_page
page_start = 0
per_page = 100
def items(self):
yield Message.Version, 1
data = self.get_metadata()
self.reset_page()
while True:
images = self.parse_response(
self.request(self.api_url, params=self.params))
for image in images:
self.login()
extended_tags = self.config("tags", False)
data = self.metadata()
for post in self.posts():
try:
url = self.get_file_url(image)
url = self._prepare_post(post, extended_tags)
except KeyError:
continue
if url.startswith("/"):
url = text.urljoin(self.api_url, url)
image.update(data)
text.nameext_from_url(url, image)
if self.extags:
self.extended_tags(image)
yield Message.Directory, image
yield Message.Url, url, image
if len(images) < self.per_page:
return
self.update_page(image)
post.update(data)
text.nameext_from_url(url, post)
yield Message.Directory, post
yield Message.Url, url, post
def reset_page(self):
"""Initialize params to point to the first page"""
self.params["page"] = self.page_start
def skip(self, num):
pages = num // self.per_page
self.page_start += pages
return pages * self.per_page
def update_page(self, data):
"""Update params to point to the next page"""
def login(self):
"""Login and set necessary cookies"""
def parse_response(self, response):
"""Parse JSON API response"""
images = response.json()
if self.sort:
images.sort(key=operator.itemgetter("score", "id"),
reverse=True)
return images
def metadata(self):
"""Return a dict with general metadata"""
return ()
def get_metadata(self):
"""Collect metadata for extractor-job"""
return {}
def posts(self):
"""Return an iterable with post objects"""
return ()
@staticmethod
def get_file_url(image):
return image["file_url"]
def _prepare_post(self, post, extended_tags=False):
url = post["file_url"]
if url[0] == "/":
url = self.root + url
if extended_tags:
self._fetch_extended_tags(post)
post["date"] = text.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
return url
def extended_tags(self, image, page=None):
"""Retrieve extended tag information"""
def _fetch_extended_tags(self, post, page=None):
if not page:
url = self.post_url.format(image["id"])
url = "{}/index.php?page=post&s=view&id={}".format(
self.root, post["id"])
page = self.request(url).text
html = text.extract(page, '<ul id="tag-', '</ul>')[0]
if html:
tags = collections.defaultdict(list)
tags_html = text.extract(page, '<ul id="tag-', '</ul>')[0]
pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"']+)", re.S)
for tag_type, tag_name in pattern.findall(tags_html or ""):
pattern = re.compile(
r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
for tag_type, tag_name in pattern.findall(html):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
image["tags_" + key] = " ".join(value)
post["tags_" + key] = " ".join(value)
def _api_request(self, params):
url = self.root + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text)
class XmlParserMixin():
"""Mixin for XML based API responses"""
def parse_response(self, response):
root = ElementTree.fromstring(response.text)
return [post.attrib for post in root]
def _pagination(self, params):
params["pid"] = self.page_start
params["limit"] = self.per_page
while True:
root = self._api_request(params)
for post in root:
yield post.attrib
class MoebooruPageMixin():
"""Pagination for Moebooru and Danbooru v1"""
def update_page(self, data):
if self.page_limit:
self.params["page"] = None
self.params["before_id"] = data["id"]
else:
self.params["page"] += 1
if len(root) < self.per_page:
return
params["pid"] += 1
class GelbooruPageMixin():
"""Pagination for Gelbooru-like sites"""
page_start = 0
class BooruPostExtractor(BooruExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)"
def reset_page(self):
self.params["pid"] = self.page_start
def __init__(self, match):
BooruExtractor.__init__(self, match)
self.post_id = match.group(1)
def update_page(self, data):
self.params["pid"] += 1
def posts(self):
return self._pagination({"id": self.post_id})
class TagMixin():
"""Extraction of images based on search-tags"""
class BooruTagExtractor(BooruExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)"
def __init__(self, match):
super().__init__(match)
self.tags = text.unquote(match.group("tags").replace("+", " "))
self.params["tags"] = self.tags
self.params["limit"] = self.per_page
BooruExtractor.__init__(self, match)
self.tags = text.unquote(match.group(1).replace("+", " "))
def get_metadata(self):
def metadata(self):
return {"search_tags": self.tags}
def posts(self):
return self._pagination({"tags" : self.tags})
class PoolMixin():
"""Extraction of image-pools"""
class BooruPoolExtractor(BooruExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)"
def __init__(self, match):
super().__init__(match)
self.pool = match.group("pool")
self.params["tags"] = "pool:" + self.pool
self.params["limit"] = self.per_page
def get_metadata(self):
return {"pool": text.parse_int(self.pool)}
BooruExtractor.__init__(self, match)
self.pool_id = match.group(1)
self.post_ids = ()
def skip(self, num):
self.page_start += num
return num
class GelbooruPoolMixin(PoolMixin):
"""Image-pool extraction for Gelbooru-like sites"""
per_page = 1
def metadata(self):
url = "{}/index.php?page=pool&s=show&id={}".format(
self.root, self.pool_id)
page = self.request(url).text
def get_metadata(self):
page = self.request(self.pool_url.format(self.pool)).text
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
if not name:
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
if not name:
raise exception.NotFoundError("pool")
self.posts = list(text.extract_iter(
page, 'class="thumb" id="p', '"', pos))
self.post_ids = text.extract_iter(
page, 'class="thumb" id="p', '"', pos)
return {
"pool": text.parse_int(self.pool),
"pool": text.parse_int(self.pool_id),
"pool_name": text.unescape(name),
"count": len(self.posts),
}
def reset_page(self):
self.index = self.page_start
self.update_page(None)
def update_page(self, data):
try:
post = self.posts[self.index]
self.index += 1
except IndexError:
post = "0"
self.params["tags"] = "id:" + post
class PostMixin():
"""Extraction of a single image-post"""
subcategory = "post"
archive_fmt = "{id}"
def __init__(self, match):
super().__init__(match)
self.post = match.group("post")
self.params["tags"] = "id:" + self.post
class MoebooruPopularMixin():
"""Extraction and metadata handling for Moebooru and Danbooru v1"""
subcategory = "popular"
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
page_start = None
sort = True
def __init__(self, match):
super().__init__(match)
self.params.update(text.parse_query(match.group("query")))
self.scale = match.group("scale")
def get_metadata(self, fmt="%Y-%m-%d"):
date = self.get_date() or datetime.date.today().isoformat()
scale = self.get_scale() or "day"
if scale == "week":
date = datetime.date.fromisoformat(date)
date = (date - datetime.timedelta(days=date.weekday())).isoformat()
elif scale == "month":
date = date[:-3]
return {"date": date, "scale": scale}
def get_date(self):
if "year" in self.params:
return "{:>04}-{:>02}-{:>02}".format(
self.params["year"],
self.params.get("month", "01"),
self.params.get("day", "01"))
return None
def get_scale(self):
if self.scale and self.scale.startswith("by_"):
return self.scale[3:]
return self.scale
def posts(self):
params = {}
for params["id"] in util.advance(self.post_ids, self.page_start):
for post in self._api_request(params):
yield post.attrib
EXTRACTORS = {
"rule34": {
"root": "https://rule34.xxx",
"test-tag": (
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 1,
}),
),
"test-pool": (
("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
"count": 3,
}),
),
"test-post": (
("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"options": (("tags", True),),
"keyword": {
"tags_artist": "danraku",
"tags_character": "kashima_(kantai_collection)",
"tags_copyright": "kantai_collection",
"tags_general": str,
"tags_metadata": str,
},
}),
),
},
"safebooru": {
"root": "https://safebooru.org",
"test-tag": (
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
}),
),
"test-pool": (
("https://safebooru.org/index.php?page=pool&s=show&id=11", {
"count": 5,
}),
),
"test-post": (
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
"options": (("tags", True),),
"keyword": {
"tags_artist": "kawanakajima",
"tags_character": "heath_ledger ronald_mcdonald the_joker",
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
"tags_general": str,
},
}),
),
},
"realbooru": {
"root": "https://realbooru.com",
"test-tag": (
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
"count": ">= 64",
}),
),
"test-pool": (
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
"count": 3,
}),
),
"test-post": (
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
}),
),
},
}
generate_extractors(EXTRACTORS, globals(), (
BooruTagExtractor,
BooruPoolExtractor,
BooruPostExtractor,
))

@ -6,98 +6,27 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://gelbooru.com/"""
"""Extractors for https://gelbooru.com/"""
from . import booru
from .common import Message
from .. import text
from .. import text, exception
class GelbooruExtractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
class GelbooruBase():
"""Base class for gelbooru extractors"""
category = "gelbooru"
api_url = "https://gelbooru.com/index.php"
post_url = "https://gelbooru.com/index.php?page=post&s=view&id={}"
pool_url = "https://gelbooru.com/index.php?page=pool&s=show&id={}"
root = "https://gelbooru.com"
def __init__(self, match):
super().__init__(match)
self.use_api = self.config("api", True)
if self.use_api:
self.params.update({"page": "dapi", "s": "post", "q": "index"})
else:
self.items = self.items_noapi
self.session.cookies["fringeBenefits"] = "yup"
self.per_page = 42
@staticmethod
def get_file_url(image):
url = image["file_url"]
def _prepare_post(self, post, extended_tags=False):
url = booru.BooruExtractor._prepare_post(self, post, extended_tags)
if url.startswith("https://mp4.gelbooru.com/"):
ihash = image["md5"]
md5 = post["md5"]
return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
ihash[0:2], ihash[2:4], ihash)
md5[0:2], md5[2:4], md5)
return url
def items_noapi(self):
yield Message.Version, 1
data = self.get_metadata()
for post in self.get_posts():
post = self.get_post_data(post)
url = post["file_url"]
post.update(data)
text.nameext_from_url(url, post)
yield Message.Directory, post
yield Message.Url, url, post
def get_posts(self):
"""Return an iterable containing all relevant post objects"""
url = "https://gelbooru.com/index.php?page=post&s=list"
params = {
"tags": self.params["tags"],
"pid" : self.page_start * self.per_page
}
while True:
page = self.request(url, params=params).text
ids = list(text.extract_iter(page, '<span id="s', '"'))
yield from ids
if len(ids) < self.per_page:
return
params["pid"] += self.per_page
def get_post_data(self, post_id):
"""Extract metadata of a single post"""
page = self.request(self.post_url.format(post_id)).text
data = text.extract_all(page, (
(None , '<meta name="keywords"', ''),
("tags" , ' imageboard- ', '"'),
("id" , '<li>Id: ', '<'),
("created_at", '<li>Posted: ', '<'),
("width" , '<li>Size: ', 'x'),
("height" , '', '<'),
("source" , '<li>Source: <a href="', '"'),
("rating" , '<li>Rating: ', '<'),
(None , '<li>Score: ', ''),
("score" , '>', '<'),
("file_url" , '<li><a href="http', '"'),
("change" , ' id="lupdated" value="', '"'),
))[0]
data["file_url"] = "http" + data["file_url"].replace("m//", "m/", 1)
data["md5"] = data["file_url"].rpartition("/")[2].partition(".")[0]
data["rating"] = (data["rating"] or "?")[0].lower()
data["tags"] = " ".join(
[tag.replace(" ", "_") for tag in data["tags"].split(", ")])
if self.extags:
self.extended_tags(data, page)
return data
class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
class GelbooruTagExtractor(GelbooruBase, booru.BooruTagExtractor):
"""Extractor for images from gelbooru.com based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
@ -112,7 +41,7 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
)
class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
class GelbooruPoolExtractor(GelbooruBase, booru.BooruPoolExtractor):
"""Extractor for image-pools from gelbooru.com"""
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
@ -126,8 +55,23 @@ class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
}),
)
def metadata(self):
url = "{}/index.php?page=pool&s=show&id={}".format(
self.root, self.pool_id)
page = self.request(url).text
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
if not name:
raise exception.NotFoundError("pool")
self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
return {
"pool": text.parse_int(self.pool_id),
"pool_name": text.unescape(name),
}
class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
class GelbooruPostExtractor(GelbooruBase, booru.BooruPostExtractor):
"""Extractor for single images from gelbooru.com"""
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
@ -135,6 +79,3 @@ class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"count": 1,
})
def get_posts(self):
return (self.post,)

@ -1,59 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://realbooru.com/"""
from . import booru
class RealbooruExtractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for realbooru extractors"""
category = "realbooru"
api_url = "https://realbooru.com/index.php"
post_url = "https://realbooru.com/index.php?page=post&s=view&id={}"
pool_url = "https://realbooru.com/index.php?page=pool&s=show&id={}"
def __init__(self, match):
super().__init__(match)
self.params.update({"page": "dapi", "s": "post", "q": "index"})
class RealbooruTagExtractor(booru.TagMixin, RealbooruExtractor):
"""Extractor for images from realbooru.com based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
"count": ">= 64",
})
class RealbooruPoolExtractor(booru.GelbooruPoolMixin, RealbooruExtractor):
"""Extractor for image-pools from realbooru.com"""
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
"count": 3,
})
class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
"""Extractor for single images from realbooru.com"""
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
# "options": (("tags", True),),
# "keyword": {
# "tags_general" : str,
# "tags_metadata": str,
# "tags_model" : "jennifer_lawrence",
# },
})

@ -1,63 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://rule34.xxx/"""
from . import booru
class Rule34Extractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for rule34 extractors"""
category = "rule34"
api_url = "https://rule34.xxx/index.php"
post_url = "https://rule34.xxx/index.php?page=post&s=view&id={}"
pool_url = "https://rule34.xxx/index.php?page=pool&s=show&id={}"
page_limit = 4000
def __init__(self, match):
super().__init__(match)
self.params.update({"page": "dapi", "s": "post", "q": "index"})
class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
"""Extractor for images from rule34.xxx based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"pattern": r"https?://([^.]+\.)?rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 1,
})
class Rule34PoolExtractor(booru.GelbooruPoolMixin, Rule34Extractor):
"""Extractor for image-pools from rule34.xxx"""
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
"count": 3,
})
class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
"""Extractor for single images from rule34.xxx"""
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"options": (("tags", True),),
"keyword": {
"tags_artist": "danraku",
"tags_character": "kashima_(kantai_collection)",
"tags_copyright": "kantai_collection",
"tags_general": str,
"tags_metadata": str,
},
})

@ -1,61 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://safebooru.org/"""
from . import booru
class SafebooruExtractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for safebooru extractors"""
category = "safebooru"
api_url = "https://safebooru.org/index.php"
post_url = "https://safebooru.org/index.php?page=post&s=view&id={}"
pool_url = "https://safebooru.org/index.php?page=pool&s=show&id={}"
def __init__(self, match):
super().__init__(match)
self.params.update({"page": "dapi", "s": "post", "q": "index"})
class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
"""Extractor for images from safebooru.org based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
})
class SafebooruPoolExtractor(booru.GelbooruPoolMixin, SafebooruExtractor):
"""Extractor for image-pools from safebooru.org"""
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
"count": 5,
})
class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
"""Extractor for single images from safebooru.org"""
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
"options": (("tags", True),),
"keyword": {
"tags_artist": "kawanakajima",
"tags_character": "heath_ledger ronald_mcdonald the_joker",
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
"tags_general": str,
},
})
Loading…
Cancel
Save