|
|
@ -117,8 +117,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
def __init__(self, match):
|
|
|
|
FoolfuukaExtractor.__init__(self, match)
|
|
|
|
FoolfuukaExtractor.__init__(self, match)
|
|
|
|
self.board = match.group(match.lastindex-1)
|
|
|
|
self.board = self.groups[-2]
|
|
|
|
self.thread = match.group(match.lastindex)
|
|
|
|
self.thread = self.groups[-1]
|
|
|
|
self.data = None
|
|
|
|
self.data = None
|
|
|
|
|
|
|
|
|
|
|
|
def metadata(self):
|
|
|
|
def metadata(self):
|
|
|
@ -140,20 +140,22 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
|
|
|
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
|
|
|
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
|
|
|
"""Base extractor for FoolFuuka based boards/archives"""
|
|
|
|
"""Base extractor for FoolFuuka based boards/archives"""
|
|
|
|
subcategory = "board"
|
|
|
|
subcategory = "board"
|
|
|
|
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
|
|
|
|
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$"
|
|
|
|
example = "https://archived.moe/a/"
|
|
|
|
example = "https://archived.moe/a/"
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
def __init__(self, match):
|
|
|
|
FoolfuukaExtractor.__init__(self, match)
|
|
|
|
FoolfuukaExtractor.__init__(self, match)
|
|
|
|
self.board = match.group(match.lastindex)
|
|
|
|
self.board = self.groups[-2]
|
|
|
|
|
|
|
|
self.page = self.groups[-1]
|
|
|
|
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
def items(self):
|
|
|
|
index_base = "{}/_/api/chan/index/?board={}&page=".format(
|
|
|
|
index_base = "{}/_/api/chan/index/?board={}&page=".format(
|
|
|
|
self.root, self.board)
|
|
|
|
self.root, self.board)
|
|
|
|
thread_base = "{}/{}/thread/".format(self.root, self.board)
|
|
|
|
thread_base = "{}/{}/thread/".format(self.root, self.board)
|
|
|
|
|
|
|
|
|
|
|
|
for page in itertools.count(1):
|
|
|
|
page = self.page
|
|
|
|
with self.request(index_base + format(page)) as response:
|
|
|
|
for pnum in itertools.count(text.parse_int(page, 1)):
|
|
|
|
|
|
|
|
with self.request(index_base + format(pnum)) as response:
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
threads = response.json()
|
|
|
|
threads = response.json()
|
|
|
|
except ValueError:
|
|
|
|
except ValueError:
|
|
|
@ -167,6 +169,9 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
|
|
|
thread["_extractor"] = FoolfuukaThreadExtractor
|
|
|
|
thread["_extractor"] = FoolfuukaThreadExtractor
|
|
|
|
yield Message.Queue, thread["url"], thread
|
|
|
|
yield Message.Queue, thread["url"], thread
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if page:
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
|
|
|
class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
|
|
|
"""Base extractor for search results on FoolFuuka based boards/archives"""
|
|
|
|
"""Base extractor for search results on FoolFuuka based boards/archives"""
|
|
|
@ -179,17 +184,16 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
|
|
|
def __init__(self, match):
|
|
|
|
def __init__(self, match):
|
|
|
|
FoolfuukaExtractor.__init__(self, match)
|
|
|
|
FoolfuukaExtractor.__init__(self, match)
|
|
|
|
self.params = params = {}
|
|
|
|
self.params = params = {}
|
|
|
|
args = match.group(match.lastindex).split("/")
|
|
|
|
|
|
|
|
key = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for arg in args:
|
|
|
|
key = None
|
|
|
|
|
|
|
|
for arg in self.groups[-1].split("/"):
|
|
|
|
if key:
|
|
|
|
if key:
|
|
|
|
params[key] = text.unescape(arg)
|
|
|
|
params[key] = text.unescape(arg)
|
|
|
|
key = None
|
|
|
|
key = None
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
key = arg
|
|
|
|
key = arg
|
|
|
|
|
|
|
|
|
|
|
|
board = match.group(match.lastindex-1)
|
|
|
|
board = self.groups[-2]
|
|
|
|
if board != "_":
|
|
|
|
if board != "_":
|
|
|
|
params["boards"] = board
|
|
|
|
params["boards"] = board
|
|
|
|
|
|
|
|
|
|
|
|