|
|
@ -4,7 +4,8 @@ import re
|
|
|
|
|
|
|
|
|
|
|
|
class Extractor(BasicExtractor):
|
|
|
|
class Extractor(BasicExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
thread_url_fmt = "https://www.8ch.net/{0}/res/{1}.html"
|
|
|
|
url_base = "https://8ch.net"
|
|
|
|
|
|
|
|
thread_url_fmt = url_base + "/{0}/res/{1}.html"
|
|
|
|
regex = r'>File: <a href="([^"]+)">([^<]+)\.[^<]+<.*?<span class="postfilename"( title="([^"]+)")?>([^<]+)<'
|
|
|
|
regex = r'>File: <a href="([^"]+)">([^<]+)\.[^<]+<.*?<span class="postfilename"( title="([^"]+)")?>([^<]+)<'
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match, config):
|
|
|
|
def __init__(self, match, config):
|
|
|
@ -18,4 +19,6 @@ class Extractor(BasicExtractor):
|
|
|
|
text = self.request(url).text
|
|
|
|
text = self.request(url).text
|
|
|
|
for match in re.finditer(self.regex, text):
|
|
|
|
for match in re.finditer(self.regex, text):
|
|
|
|
url, prefix, fullname, name = match.group(1, 2, 4, 5)
|
|
|
|
url, prefix, fullname, name = match.group(1, 2, 4, 5)
|
|
|
|
|
|
|
|
if url.startswith("/"):
|
|
|
|
|
|
|
|
url = self.url_base + url
|
|
|
|
yield (url, prefix + "-" + unquote(fullname or name))
|
|
|
|
yield (url, prefix + "-" + unquote(fullname or name))
|
|
|
|