[foolfuuka] improve 'board' pattern & support pages (#5408)

pull/5433/head
Mike Fährmann 6 months ago
parent 0c17884673
commit 64948f2c09
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -117,8 +117,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.board = match.group(match.lastindex-1)
self.thread = match.group(match.lastindex)
self.board = self.groups[-2]
self.thread = self.groups[-1]
self.data = None
def metadata(self):
@ -140,20 +140,22 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$"
example = "https://archived.moe/a/"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.board = match.group(match.lastindex)
self.board = self.groups[-2]
self.page = self.groups[-1]
def items(self):
index_base = "{}/_/api/chan/index/?board={}&page=".format(
self.root, self.board)
thread_base = "{}/{}/thread/".format(self.root, self.board)
for page in itertools.count(1):
with self.request(index_base + format(page)) as response:
page = self.page
for pnum in itertools.count(text.parse_int(page, 1)):
with self.request(index_base + format(pnum)) as response:
try:
threads = response.json()
except ValueError:
@ -167,6 +169,9 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
thread["_extractor"] = FoolfuukaThreadExtractor
yield Message.Queue, thread["url"], thread
if page:
return
class FoolfuukaSearchExtractor(FoolfuukaExtractor):
"""Base extractor for search results on FoolFuuka based boards/archives"""
@ -179,17 +184,16 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.params = params = {}
args = match.group(match.lastindex).split("/")
key = None
for arg in args:
key = None
for arg in self.groups[-1].split("/"):
if key:
params[key] = text.unescape(arg)
key = None
else:
key = arg
board = match.group(match.lastindex-1)
board = self.groups[-2]
if board != "_":
params["boards"] = board

@ -15,12 +15,32 @@ __tests__ = (
"#sha1_url": "e7d624aded15a069194e38dc731ec23217a422fb",
},
{
"#url" : "https://desuarchive.org/a",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},
{
"#url" : "https://desuarchive.org/a/",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},
{
"#url" : "https://desuarchive.org/a/2",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},
{
"#url" : "https://desuarchive.org/a/page/2",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
"#pattern" : foolfuuka.FoolfuukaThreadExtractor.pattern,
"#count" : 10,
},
{
"#url" : "https://desuarchive.org/_/search/text/test/",
"#category": ("foolfuuka", "desuarchive", "search"),

Loading…
Cancel
Save