From e41e2be2f9d7a9825d7269911ca89efbb4ccf403 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 24 Dec 2020 01:04:44 +0100 Subject: [PATCH] [booru] split '_prepare_post()' --- gallery_dl/extractor/booru.py | 24 ++++++++++++++---------- gallery_dl/extractor/gelbooru.py | 9 +++++---- gallery_dl/extractor/moebooru.py | 9 +++------ gallery_dl/extractor/sankaku.py | 11 ++++++----- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 2cc984b7..64cde807 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -13,6 +13,7 @@ from .. import text, util, exception from xml.etree import ElementTree import collections +import operator import re @@ -25,19 +26,25 @@ class BooruExtractor(Extractor): def items(self): self.login() - extended_tags = self.config("tags", False) data = self.metadata() + tags = self.config("tags", False) + for post in self.posts(): try: - url = self._prepare_post(post, extended_tags) + url = self._file_url(post) if url[0] == "/": url = self.root + url except (KeyError, TypeError): self.log.debug("Unable to fetch download URL for post %s " "(md5: %s)", post.get("id"), post.get("md5")) continue + + if tags: + self._extended_tags(post) + self._prepare(post) post.update(data) text.nameext_from_url(url, post) + yield Message.Directory, post yield Message.Url, url, post @@ -57,17 +64,14 @@ class BooruExtractor(Extractor): """Return an iterable with post objects""" return () - def _prepare_post(self, post, extended_tags=False): - url = post["file_url"] - if url[0] == "/": - url = self.root + url - if extended_tags: - self._fetch_extended_tags(post) + _file_url = operator.itemgetter("file_url") + + @staticmethod + def _prepare(post): post["date"] = text.parse_datetime( post["created_at"], "%a %b %d %H:%M:%S %z %Y") - return url - def _fetch_extended_tags(self, post, page=None): + def _extended_tags(self, post, page=None): if not page: url = "{}/index.php?page=post&s=view&id={}".format( self.root, post["id"]) diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index b0614e20..7a28e9cd 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -17,11 +17,12 @@ class GelbooruBase(): category = "gelbooru" root = "https://gelbooru.com" - def _prepare_post(self, post, extended_tags=False): - url = booru.BooruExtractor._prepare_post(self, post, extended_tags) - if url.startswith("https://mp4.gelbooru.com/"): + @staticmethod + def _file_url(post): + url = post["file_url"] + if url.startswith(("https://mp4.gelbooru.com/", "https://video-cdn")): md5 = post["md5"] - return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format( + url = "https://img2.gelbooru.com/images/{}/{}/{}.webm".format( md5[0:2], md5[2:4], md5) return url diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py index 273c067f..0ac55cd0 100644 --- a/gallery_dl/extractor/moebooru.py +++ b/gallery_dl/extractor/moebooru.py @@ -23,14 +23,11 @@ class MoebooruExtractor(BooruExtractor): filename_fmt = "{category}_{id}_{md5}.{extension}" page_start = 1 - def _prepare_post(self, post, extended_tags=False): - url = post["file_url"] - if extended_tags: - self._fetch_extended_tags(post) + @staticmethod + def _prepare(post): post["date"] = text.parse_timestamp(post["created_at"]) - return url - def _fetch_extended_tags(self, post): + def _extended_tags(self, post): url = "{}/post/show/{}".format(self.root, post["id"]) page = self.request(url).text html = text.extract(page, '