[8chan] fix image urls

pull/13/head
Mike Fährmann 10 years ago
parent 54d40b85fb
commit ac002cc4b3

@ -4,7 +4,8 @@ import re
class Extractor(BasicExtractor):
thread_url_fmt = "https://www.8ch.net/{0}/res/{1}.html"
url_base = "https://8ch.net"
thread_url_fmt = url_base + "/{0}/res/{1}.html"
regex = r'>File: <a href="([^"]+)">([^<]+)\.[^<]+<.*?<span class="postfilename"( title="([^"]+)")?>([^<]+)<'
def __init__(self, match, config):
@ -18,4 +19,6 @@ class Extractor(BasicExtractor):
text = self.request(url).text
for match in re.finditer(self.regex, text):
url, prefix, fullname, name = match.group(1, 2, 4, 5)
if url.startswith("/"):
url = self.url_base + url
yield (url, prefix + "-" + unquote(fullname or name))

Loading…
Cancel
Save