enable '--chapter-filter'

The same filter infrastructure that can be applied to image URLS now
also works for manga chapters and other delegated URLs.

TODO: actually provide any metadata (currently supported is only
deviantart and imagefap).
pull/40/head
Mike Fährmann 7 years ago
parent 31cd5b1c1d
commit 0dedbe759c
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -153,7 +153,7 @@ def main():
prepare_range(args.image_range, "image") prepare_range(args.image_range, "image")
prepare_range(args.chapter_range, "chapter") prepare_range(args.chapter_range, "chapter")
prepare_filter(args.image_filter, "image") prepare_filter(args.image_filter, "image")
# prepare_filter(args.chapter_filter, "chapter") prepare_filter(args.chapter_filter, "chapter")
pformat = config.get(("output", "progress"), True) pformat = config.get(("output", "progress"), True)
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR: if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:

@ -161,8 +161,12 @@ class MangaExtractor(Extractor):
chapters.reverse() chapters.reverse()
yield Message.Version, 1 yield Message.Version, 1
for chapter in chapters: try:
yield Message.Queue, chapter for chapter, data in chapters:
yield Message.Queue, chapter, data
except ValueError:
for chapter in chapters:
yield Message.Queue, chapter, {}
def login(self): def login(self):
"""Login and set necessary cookies""" """Login and set necessary cookies"""

@ -44,8 +44,9 @@ class DeviantartExtractor(Extractor):
def items(self): def items(self):
yield Message.Version, 1 yield Message.Version, 1
for deviation in self.deviations(): for deviation in self.deviations():
if isinstance(deviation, str): if isinstance(deviation, tuple):
yield Message.Queue, deviation url, data = deviation
yield Message.Queue, url, data
continue continue
self.prepare(deviation) self.prepare(deviation)
@ -159,7 +160,7 @@ class DeviantartExtractor(Extractor):
def _folder_urls(self, folders, category): def _folder_urls(self, folders, category):
url = "https://{}.deviantart.com/{}/0/".format(self.user, category) url = "https://{}.deviantart.com/{}/0/".format(self.user, category)
return [url + folder["name"] for folder in folders] return [(url + folder["name"], folder) for folder in folders]
class DeviantartGalleryExtractor(DeviantartExtractor): class DeviantartGalleryExtractor(DeviantartExtractor):

@ -157,10 +157,12 @@ class ImagefapUserExtractor(Extractor):
def items(self): def items(self):
yield Message.Version, 1 yield Message.Version, 1
for gallery in self.get_gallery_ids(): for gid, name in self.get_gallery_data():
yield Message.Queue, "http://www.imagefap.com/gallery/" + gallery url = "http://www.imagefap.com/gallery/" + gid
data = {"gallery_id": int(gid), "name": name}
yield Message.Queue, url, data
def get_gallery_ids(self): def get_gallery_data(self):
"""Yield all gallery_ids of a specific user""" """Yield all gallery_ids of a specific user"""
folders = self.get_gallery_folders() folders = self.get_gallery_folders()
url = "http://www.imagefap.com/ajax_usergallery_folder.php" url = "http://www.imagefap.com/ajax_usergallery_folder.php"
@ -168,7 +170,14 @@ class ImagefapUserExtractor(Extractor):
for folder_id in folders: for folder_id in folders:
params["id"] = folder_id params["id"] = folder_id
page = self.request(url, params=params).text page = self.request(url, params=params).text
yield from text.extract_iter(page, '<a href="/gallery/', '"')
pos = 0
while True:
gid, pos = text.extract(page, '<a href="/gallery/', '"', pos)
if not gid:
break
name, pos = text.extract(page, "<b>", "<", pos)
yield gid, name
def get_gallery_folders(self): def get_gallery_folders(self):
"""Create a list of all folder_ids of a specific user""" """Create a list of all folder_ids of a specific user"""

@ -129,7 +129,7 @@ class PinterestPinitExtractor(PinterestExtractor):
if not location or location in ("https://api.pinterest.com/None", if not location or location in ("https://api.pinterest.com/None",
"https://www.pinterest.com"): "https://www.pinterest.com"):
raise exception.NotFoundError("pin") raise exception.NotFoundError("pin")
yield Message.Queue, location yield Message.Queue, location, {}
class PinterestAPI(): class PinterestAPI():

@ -174,7 +174,7 @@ class PixivMeExtractor(PixivExtractor):
if response.status_code == 404: if response.status_code == 404:
raise exception.NotFoundError("user") raise exception.NotFoundError("user")
yield Message.Version, 1 yield Message.Version, 1
yield Message.Queue, response.headers["Location"] yield Message.Queue, response.headers["Location"], {}
class PixivWorkExtractor(PixivExtractor): class PixivWorkExtractor(PixivExtractor):

@ -33,4 +33,4 @@ class RecursiveExtractor(Extractor):
yield Message.Version, 1 yield Message.Version, 1
with extractor.blacklist(blist): with extractor.blacklist(blist):
for match in re.finditer(r"https?://[^\s\"']+", page): for match in re.finditer(r"https?://[^\s\"']+", page):
yield Message.Queue, match.group(0) yield Message.Queue, match.group(0), {}

@ -45,7 +45,7 @@ class RedditExtractor(Extractor):
if match: if match:
extra.append(match.group(1)) extra.append(match.group(1))
else: else:
yield Message.Queue, url yield Message.Queue, url, {}
if not extra or depth == self.max_depth: if not extra or depth == self.max_depth:
return return

@ -69,7 +69,7 @@ class TestExtractor(Extractor):
yield Message.Version, 1 yield Message.Version, 1
for test in tests: for test in tests:
yield Message.Queue, test[0] yield Message.Queue, test[0], {}
@staticmethod @staticmethod
def __contains__(_): def __contains__(_):

@ -96,8 +96,9 @@ class Job():
self.handle_directory(msg[1]) self.handle_directory(msg[1])
elif msg[0] == Message.Queue: elif msg[0] == Message.Queue:
if self.pred_queue(msg[1], None): _, url, kwds = msg
self.handle_queue(msg[1]) if self.pred_queue(url, kwds):
self.handle_queue(url, kwds)
elif msg[0] == Message.Version: elif msg[0] == Message.Version:
if msg[1] != 1: if msg[1] != 1:
@ -112,7 +113,7 @@ class Job():
def handle_directory(self, keywords): def handle_directory(self, keywords):
"""Handle Message.Directory""" """Handle Message.Directory"""
def handle_queue(self, url): def handle_queue(self, url, keywords):
"""Handle Message.Queue""" """Handle Message.Queue"""
def update_kwdict(self, kwdict): def update_kwdict(self, kwdict):
@ -147,7 +148,7 @@ class DownloadJob(Job):
"""Set and create the target directory for downloads""" """Set and create the target directory for downloads"""
self.pathfmt.set_directory(keywords) self.pathfmt.set_directory(keywords)
def handle_queue(self, url): def handle_queue(self, url, keywords):
try: try:
DownloadJob(url).run() DownloadJob(url).run()
except exception.NoExtractorError: except exception.NoExtractorError:
@ -181,10 +182,10 @@ class KeywordJob(Job):
print("-----------------------------") print("-----------------------------")
self.print_keywords(keywords) self.print_keywords(keywords)
def handle_queue(self, url): def handle_queue(self, url, keywords):
print("This extractor transfers work to other extractors and does not " print("Keywords for chapter filters:")
"provide any keywords on its own. Try " print("-----------------------------")
"'gallery-dl --list-keywords \"", url, "\"' instead.", sep="") self.print_keywords(keywords)
raise exception.StopExtraction() raise exception.StopExtraction()
@staticmethod @staticmethod
@ -218,13 +219,13 @@ class UrlJob(Job):
Job.__init__(self, url) Job.__init__(self, url)
self.depth = depth self.depth = depth
if depth == self.maxdepth: if depth == self.maxdepth:
self.handle_queue = print self.handle_queue = self.handle_url
@staticmethod @staticmethod
def handle_url(url, _): def handle_url(url, _):
print(url) print(url)
def handle_queue(self, url): def handle_queue(self, url, _):
try: try:
UrlJob(url, self.depth + 1).run() UrlJob(url, self.depth + 1).run()
except exception.NoExtractorError: except exception.NoExtractorError:
@ -277,8 +278,9 @@ class TestJob(DownloadJob):
def handle_directory(self, keywords): def handle_directory(self, keywords):
self.update_keyword(keywords) self.update_keyword(keywords)
def handle_queue(self, url): def handle_queue(self, url, keywords):
self.update_url(url) self.update_url(url)
self.update_keyword(keywords)
def update_url(self, url): def update_url(self, url):
"""Update the URL hash""" """Update the URL hash"""

@ -213,7 +213,7 @@ def build_parser():
selection.add_argument( selection.add_argument(
"--chapter-filter", "--chapter-filter",
metavar="EXPR", dest="chapter_filter", metavar="EXPR", dest="chapter_filter",
help="Same as '--filter' except for chapters (not yet implemented)", help="Same as '--filter' except for chapters",
) )
parser.add_argument( parser.add_argument(

Loading…
Cancel
Save