[booru] split '_prepare_post()'

pull/1229/head
Mike Fährmann 4 years ago
parent 53222445d5
commit e41e2be2f9
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -13,6 +13,7 @@ from .. import text, util, exception
from xml.etree import ElementTree from xml.etree import ElementTree
import collections import collections
import operator
import re import re
@ -25,19 +26,25 @@ class BooruExtractor(Extractor):
def items(self): def items(self):
self.login() self.login()
extended_tags = self.config("tags", False)
data = self.metadata() data = self.metadata()
tags = self.config("tags", False)
for post in self.posts(): for post in self.posts():
try: try:
url = self._prepare_post(post, extended_tags) url = self._file_url(post)
if url[0] == "/": if url[0] == "/":
url = self.root + url url = self.root + url
except (KeyError, TypeError): except (KeyError, TypeError):
self.log.debug("Unable to fetch download URL for post %s " self.log.debug("Unable to fetch download URL for post %s "
"(md5: %s)", post.get("id"), post.get("md5")) "(md5: %s)", post.get("id"), post.get("md5"))
continue continue
if tags:
self._extended_tags(post)
self._prepare(post)
post.update(data) post.update(data)
text.nameext_from_url(url, post) text.nameext_from_url(url, post)
yield Message.Directory, post yield Message.Directory, post
yield Message.Url, url, post yield Message.Url, url, post
@ -57,17 +64,14 @@ class BooruExtractor(Extractor):
"""Return an iterable with post objects""" """Return an iterable with post objects"""
return () return ()
def _prepare_post(self, post, extended_tags=False): _file_url = operator.itemgetter("file_url")
url = post["file_url"]
if url[0] == "/": @staticmethod
url = self.root + url def _prepare(post):
if extended_tags:
self._fetch_extended_tags(post)
post["date"] = text.parse_datetime( post["date"] = text.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y") post["created_at"], "%a %b %d %H:%M:%S %z %Y")
return url
def _fetch_extended_tags(self, post, page=None): def _extended_tags(self, post, page=None):
if not page: if not page:
url = "{}/index.php?page=post&s=view&id={}".format( url = "{}/index.php?page=post&s=view&id={}".format(
self.root, post["id"]) self.root, post["id"])

@ -17,11 +17,12 @@ class GelbooruBase():
category = "gelbooru" category = "gelbooru"
root = "https://gelbooru.com" root = "https://gelbooru.com"
def _prepare_post(self, post, extended_tags=False): @staticmethod
url = booru.BooruExtractor._prepare_post(self, post, extended_tags) def _file_url(post):
if url.startswith("https://mp4.gelbooru.com/"): url = post["file_url"]
if url.startswith(("https://mp4.gelbooru.com/", "https://video-cdn")):
md5 = post["md5"] md5 = post["md5"]
return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format( url = "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
md5[0:2], md5[2:4], md5) md5[0:2], md5[2:4], md5)
return url return url

@ -23,14 +23,11 @@ class MoebooruExtractor(BooruExtractor):
filename_fmt = "{category}_{id}_{md5}.{extension}" filename_fmt = "{category}_{id}_{md5}.{extension}"
page_start = 1 page_start = 1
def _prepare_post(self, post, extended_tags=False): @staticmethod
url = post["file_url"] def _prepare(post):
if extended_tags:
self._fetch_extended_tags(post)
post["date"] = text.parse_timestamp(post["created_at"]) post["date"] = text.parse_timestamp(post["created_at"])
return url
def _fetch_extended_tags(self, post): def _extended_tags(self, post):
url = "{}/post/show/{}".format(self.root, post["id"]) url = "{}/post/show/{}".format(self.root, post["id"])
page = self.request(url).text page = self.request(url).text
html = text.extract(page, '<ul id="tag-', '</ul>')[0] html = text.extract(page, '<ul id="tag-', '</ul>')[0]

@ -41,20 +41,21 @@ class SankakuExtractor(BooruExtractor):
def skip(self, num): def skip(self, num):
return 0 return 0
def _prepare_post(self, post, extended_tags=False): def _file_url(self, post):
url = post["file_url"] url = post["file_url"]
if not url and self._warning: if not url and self._warning:
self.log.warning( self.log.warning(
"Login required to download 'contentious_content' posts") "Login required to download 'contentious_content' posts")
SankakuExtractor._warning = False SankakuExtractor._warning = False
if extended_tags: return url
self._fetch_extended_tags(post)
@staticmethod
def _prepare(post):
post["created_at"] = post["created_at"]["s"] post["created_at"] = post["created_at"]["s"]
post["date"] = text.parse_timestamp(post["created_at"]) post["date"] = text.parse_timestamp(post["created_at"])
post["tags"] = [tag["name"] for tag in post["tags"]] post["tags"] = [tag["name"] for tag in post["tags"]]
return url
def _fetch_extended_tags(self, post): def _extended_tags(self, post):
tags = collections.defaultdict(list) tags = collections.defaultdict(list)
types = self.TAG_TYPES types = self.TAG_TYPES
for tag in post["tags"]: for tag in post["tags"]:

Loading…
Cancel
Save