enable '--chapter-filter'

The same filter infrastructure that can be applied to image URLS now
also works for manga chapters and other delegated URLs.

TODO: actually provide any metadata (currently supported is only
deviantart and imagefap).
pull/40/head
Mike Fährmann 7 years ago
parent 31cd5b1c1d
commit 0dedbe759c
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -153,7 +153,7 @@ def main():
prepare_range(args.image_range, "image")
prepare_range(args.chapter_range, "chapter")
prepare_filter(args.image_filter, "image")
# prepare_filter(args.chapter_filter, "chapter")
prepare_filter(args.chapter_filter, "chapter")
pformat = config.get(("output", "progress"), True)
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:

@ -161,8 +161,12 @@ class MangaExtractor(Extractor):
chapters.reverse()
yield Message.Version, 1
for chapter in chapters:
yield Message.Queue, chapter
try:
for chapter, data in chapters:
yield Message.Queue, chapter, data
except ValueError:
for chapter in chapters:
yield Message.Queue, chapter, {}
def login(self):
"""Login and set necessary cookies"""

@ -44,8 +44,9 @@ class DeviantartExtractor(Extractor):
def items(self):
yield Message.Version, 1
for deviation in self.deviations():
if isinstance(deviation, str):
yield Message.Queue, deviation
if isinstance(deviation, tuple):
url, data = deviation
yield Message.Queue, url, data
continue
self.prepare(deviation)
@ -159,7 +160,7 @@ class DeviantartExtractor(Extractor):
def _folder_urls(self, folders, category):
url = "https://{}.deviantart.com/{}/0/".format(self.user, category)
return [url + folder["name"] for folder in folders]
return [(url + folder["name"], folder) for folder in folders]
class DeviantartGalleryExtractor(DeviantartExtractor):

@ -157,10 +157,12 @@ class ImagefapUserExtractor(Extractor):
def items(self):
yield Message.Version, 1
for gallery in self.get_gallery_ids():
yield Message.Queue, "http://www.imagefap.com/gallery/" + gallery
for gid, name in self.get_gallery_data():
url = "http://www.imagefap.com/gallery/" + gid
data = {"gallery_id": int(gid), "name": name}
yield Message.Queue, url, data
def get_gallery_ids(self):
def get_gallery_data(self):
"""Yield all gallery_ids of a specific user"""
folders = self.get_gallery_folders()
url = "http://www.imagefap.com/ajax_usergallery_folder.php"
@ -168,7 +170,14 @@ class ImagefapUserExtractor(Extractor):
for folder_id in folders:
params["id"] = folder_id
page = self.request(url, params=params).text
yield from text.extract_iter(page, '<a href="/gallery/', '"')
pos = 0
while True:
gid, pos = text.extract(page, '<a href="/gallery/', '"', pos)
if not gid:
break
name, pos = text.extract(page, "<b>", "<", pos)
yield gid, name
def get_gallery_folders(self):
"""Create a list of all folder_ids of a specific user"""

@ -129,7 +129,7 @@ class PinterestPinitExtractor(PinterestExtractor):
if not location or location in ("https://api.pinterest.com/None",
"https://www.pinterest.com"):
raise exception.NotFoundError("pin")
yield Message.Queue, location
yield Message.Queue, location, {}
class PinterestAPI():

@ -174,7 +174,7 @@ class PixivMeExtractor(PixivExtractor):
if response.status_code == 404:
raise exception.NotFoundError("user")
yield Message.Version, 1
yield Message.Queue, response.headers["Location"]
yield Message.Queue, response.headers["Location"], {}
class PixivWorkExtractor(PixivExtractor):

@ -33,4 +33,4 @@ class RecursiveExtractor(Extractor):
yield Message.Version, 1
with extractor.blacklist(blist):
for match in re.finditer(r"https?://[^\s\"']+", page):
yield Message.Queue, match.group(0)
yield Message.Queue, match.group(0), {}

@ -45,7 +45,7 @@ class RedditExtractor(Extractor):
if match:
extra.append(match.group(1))
else:
yield Message.Queue, url
yield Message.Queue, url, {}
if not extra or depth == self.max_depth:
return

@ -69,7 +69,7 @@ class TestExtractor(Extractor):
yield Message.Version, 1
for test in tests:
yield Message.Queue, test[0]
yield Message.Queue, test[0], {}
@staticmethod
def __contains__(_):

@ -96,8 +96,9 @@ class Job():
self.handle_directory(msg[1])
elif msg[0] == Message.Queue:
if self.pred_queue(msg[1], None):
self.handle_queue(msg[1])
_, url, kwds = msg
if self.pred_queue(url, kwds):
self.handle_queue(url, kwds)
elif msg[0] == Message.Version:
if msg[1] != 1:
@ -112,7 +113,7 @@ class Job():
def handle_directory(self, keywords):
"""Handle Message.Directory"""
def handle_queue(self, url):
def handle_queue(self, url, keywords):
"""Handle Message.Queue"""
def update_kwdict(self, kwdict):
@ -147,7 +148,7 @@ class DownloadJob(Job):
"""Set and create the target directory for downloads"""
self.pathfmt.set_directory(keywords)
def handle_queue(self, url):
def handle_queue(self, url, keywords):
try:
DownloadJob(url).run()
except exception.NoExtractorError:
@ -181,10 +182,10 @@ class KeywordJob(Job):
print("-----------------------------")
self.print_keywords(keywords)
def handle_queue(self, url):
print("This extractor transfers work to other extractors and does not "
"provide any keywords on its own. Try "
"'gallery-dl --list-keywords \"", url, "\"' instead.", sep="")
def handle_queue(self, url, keywords):
print("Keywords for chapter filters:")
print("-----------------------------")
self.print_keywords(keywords)
raise exception.StopExtraction()
@staticmethod
@ -218,13 +219,13 @@ class UrlJob(Job):
Job.__init__(self, url)
self.depth = depth
if depth == self.maxdepth:
self.handle_queue = print
self.handle_queue = self.handle_url
@staticmethod
def handle_url(url, _):
print(url)
def handle_queue(self, url):
def handle_queue(self, url, _):
try:
UrlJob(url, self.depth + 1).run()
except exception.NoExtractorError:
@ -277,8 +278,9 @@ class TestJob(DownloadJob):
def handle_directory(self, keywords):
self.update_keyword(keywords)
def handle_queue(self, url):
def handle_queue(self, url, keywords):
self.update_url(url)
self.update_keyword(keywords)
def update_url(self, url):
"""Update the URL hash"""

@ -213,7 +213,7 @@ def build_parser():
selection.add_argument(
"--chapter-filter",
metavar="EXPR", dest="chapter_filter",
help="Same as '--filter' except for chapters (not yet implemented)",
help="Same as '--filter' except for chapters",
)
parser.add_argument(

Loading…
Cancel
Save