diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 715abcb7..85dd8969 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -117,8 +117,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): def __init__(self, match): FoolfuukaExtractor.__init__(self, match) - self.board = match.group(match.lastindex-1) - self.thread = match.group(match.lastindex) + self.board = self.groups[-2] + self.thread = self.groups[-1] self.data = None def metadata(self): @@ -140,20 +140,22 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): class FoolfuukaBoardExtractor(FoolfuukaExtractor): """Base extractor for FoolFuuka based boards/archives""" subcategory = "board" - pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$" + pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$" example = "https://archived.moe/a/" def __init__(self, match): FoolfuukaExtractor.__init__(self, match) - self.board = match.group(match.lastindex) + self.board = self.groups[-2] + self.page = self.groups[-1] def items(self): index_base = "{}/_/api/chan/index/?board={}&page=".format( self.root, self.board) thread_base = "{}/{}/thread/".format(self.root, self.board) - for page in itertools.count(1): - with self.request(index_base + format(page)) as response: + page = self.page + for pnum in itertools.count(text.parse_int(page, 1)): + with self.request(index_base + format(pnum)) as response: try: threads = response.json() except ValueError: @@ -167,6 +169,9 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor): thread["_extractor"] = FoolfuukaThreadExtractor yield Message.Queue, thread["url"], thread + if page: + return + class FoolfuukaSearchExtractor(FoolfuukaExtractor): """Base extractor for search results on FoolFuuka based boards/archives""" @@ -179,17 +184,16 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): def __init__(self, match): FoolfuukaExtractor.__init__(self, match) self.params = params = {} - args = match.group(match.lastindex).split("/") - key = None - for arg in args: + key = None + for arg in self.groups[-1].split("/"): if key: params[key] = text.unescape(arg) key = None else: key = arg - board = match.group(match.lastindex-1) + board = self.groups[-2] if board != "_": params["boards"] = board diff --git a/test/results/desuarchive.py b/test/results/desuarchive.py index 8e180652..43cd9689 100644 --- a/test/results/desuarchive.py +++ b/test/results/desuarchive.py @@ -15,12 +15,32 @@ __tests__ = ( "#sha1_url": "e7d624aded15a069194e38dc731ec23217a422fb", }, +{ + "#url" : "https://desuarchive.org/a", + "#category": ("foolfuuka", "desuarchive", "board"), + "#class" : foolfuuka.FoolfuukaBoardExtractor, +}, + { "#url" : "https://desuarchive.org/a/", "#category": ("foolfuuka", "desuarchive", "board"), "#class" : foolfuuka.FoolfuukaBoardExtractor, }, +{ + "#url" : "https://desuarchive.org/a/2", + "#category": ("foolfuuka", "desuarchive", "board"), + "#class" : foolfuuka.FoolfuukaBoardExtractor, +}, + +{ + "#url" : "https://desuarchive.org/a/page/2", + "#category": ("foolfuuka", "desuarchive", "board"), + "#class" : foolfuuka.FoolfuukaBoardExtractor, + "#pattern" : foolfuuka.FoolfuukaThreadExtractor.pattern, + "#count" : 10, +}, + { "#url" : "https://desuarchive.org/_/search/text/test/", "#category": ("foolfuuka", "desuarchive", "search"),