use consistent names for URL constants

root := <scheme>://<host>
base_url := <root>/<common path>
pull/54/head
Mike Fährmann 7 years ago
parent 69cbc0619f
commit 92027f67f9
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -26,23 +26,23 @@ class DynastyscansChapterExtractor(Extractor):
test = [
(("http://dynasty-scans.com/chapters/"
"hitoribocchi_no_oo_seikatsu_ch33"), {
"url": "ff79ea9956522a8dafd261c1fbe3c74aa8470dc5",
"url": "dce64e8c504118f1ab4135c00245ea12413896cb",
"keyword": "fb2f470b995df5b301ccede31ed9829a010236db",
}),
(("http://dynasty-scans.com/chapters/"
"new_game_the_spinoff_special_13"), {
"url": "2cd5e04bd16f842dc884c145a44cf0c64ec27a21",
"url": "dbe5bbb74da2edcfb1832895a484e2a40bc8b538",
"keyword": "281bbe0fb74b812ced595619ca5876983490dc0e",
}),
]
url_base = "https://dynasty-scans.com/"
root = "https://dynasty-scans.com"
def __init__(self, match):
Extractor.__init__(self)
self.chaptername = match.group(1)
def items(self):
page = self.request(self.url_base + "chapters/" + self.chaptername,
page = self.request(self.root + "/chapters/" + self.chaptername,
encoding="utf-8").text
data = self.get_job_metadata(page)
imgs = self.get_image_data(page)
@ -50,7 +50,7 @@ class DynastyscansChapterExtractor(Extractor):
yield Message.Version, 1
yield Message.Directory, data
for data["page"], img in enumerate(imgs, 1):
url = self.url_base + img["image"]
url = self.root + img["image"]
text.nameext_from_url(url, data)
data["name"] = img["name"]
yield Message.Url, url, data

@ -29,7 +29,7 @@ class HentaifoundryUserExtractor(Extractor):
"exception": exception.NotFoundError,
}),
]
url_base = "https://www.hentai-foundry.com/pictures/user/"
base_url = "https://www.hentai-foundry.com/pictures/user/"
def __init__(self, match):
Extractor.__init__(self)
@ -50,18 +50,18 @@ class HentaifoundryUserExtractor(Extractor):
needle = 'thumbTitle"><a href="/pictures/user/'
for _ in range((count-1) // 25 + 1):
pos = 0
url = self.url_base + self.artist + "/page/" + str(num)
url = self.base_url + self.artist + "/page/" + str(num)
page = self.request(url).text
for _ in range(25):
part, pos = text.extract(page, needle, '"', pos)
if not part:
return
yield self.get_image_metadata(self.url_base + part)
yield self.get_image_metadata(self.base_url + part)
num += 1
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
url = self.url_base + self.artist + "?enterAgree=1"
url = self.base_url + self.artist + "?enterAgree=1"
response = self.request(url, fatal=False)
if response.status_code == 404:
raise exception.NotFoundError("user")
@ -73,15 +73,14 @@ class HentaifoundryUserExtractor(Extractor):
def get_image_metadata(self, url):
"""Collect metadata for an image"""
page = self.request(url).text
offset = len(self.url_base) + len(self.artist)
index = text.extract(url, '/', '/', offset)[0]
index = url.rsplit("/", 2)[1]
title, pos = text.extract(
page, 'Pictures</a> &raquo; <span>', '<')
url, pos = text.extract(
part, pos = text.extract(
page, '//pictures.hentai-foundry.com', '"', pos)
data = {"index": util.safe_int(index), "title": text.unescape(title)}
text.nameext_from_url(url, data)
return "https://pictures.hentai-foundry.com" + url, data
text.nameext_from_url(part, data)
return "https://pictures.hentai-foundry.com" + part, data
def set_filters(self, token):
"""Set site-internal filters to show all images"""

@ -25,7 +25,7 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
"keyword": "0ab7bef5cf995d9229dc900dc508311cefb32306",
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
})]
url_base = "http://www.imagebam.com"
root = "http://www.imagebam.com"
def __init__(self, match):
AsynchronousExtractor.__init__(self)
@ -44,7 +44,7 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
url = self.url_base + "/gallery/" + self.gkey
url = self.root + "/gallery/" + self.gkey
page = self.request(url, encoding="utf-8").text
data, pos = text.extract_all(page, (
(None , "<img src='/img/icons/photos.png'", ""),
@ -59,7 +59,7 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
"""Yield all image-urls and -ids for a gallery"""
done = False
while not done:
page = self.request(self.url_base + url).text
page = self.request(self.root + url).text
pos = text.extract(
page, 'class="btn btn-default" title="Next">', ''
)[1]

@ -16,7 +16,7 @@ import re
class ImgboxExtractor(Extractor):
"""Base class for imgbox extractors"""
category = "imgbox"
url_base = "https://imgbox.com"
root = "https://imgbox.com"
def items(self):
data = self.get_job_metadata()
@ -24,7 +24,7 @@ class ImgboxExtractor(Extractor):
yield Message.Directory, data
for image_key in self.get_image_keys():
imgpage = self.request(self.url_base + "/" + image_key).text
imgpage = self.request(self.root + "/" + image_key).text
imgdata = self.get_image_metadata(imgpage)
if imgdata["filename"]:
imgdata.update(data)
@ -85,7 +85,7 @@ class ImgboxGalleryExtractor(AsynchronousExtractor, ImgboxExtractor):
self.image_keys = []
def get_job_metadata(self):
page = self.request(self.url_base + "/g/" + self.gallery_key).text
page = self.request(self.root + "/g/" + self.gallery_key).text
if "The specified gallery could not be found." in page:
raise exception.NotFoundError("gallery")
self.image_keys = re.findall(r'<a href="/([^"]+)"><img alt="', page)

@ -15,13 +15,13 @@ from .. import text
class ImgchiliExtractor(Extractor):
"""Base class for imgchili extractors"""
category = "imgchili"
url_base = "https://imgchili.net/"
root = "https://imgchili.net"
def __init__(self, match):
Extractor.__init__(self)
self.url = match.group(0)
self.match = match
self.session.headers["Referer"] = self.url_base
self.session.headers["Referer"] = self.root
def items(self):
page = self.request(self.url, encoding="utf-8").text

@ -21,22 +21,22 @@ class SenmangaChapterExtractor(Extractor):
pattern = [r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)"]
test = [
("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", {
"url": "32d88382fcad66859d089cd9a61249f375492ec5",
"url": "5f95140ff511d8497e2ec08fa7267c6bb231faec",
"keyword": "705d941a150765edb33cd2707074bd703a93788c",
"content": "a791dda85ac0d37e3b36d754560cbb65b8dab5b9",
}),
("http://raw.senmanga.com/Love-Lab/2016-03/1", {
"url": "d4f37c7347e56a09f9679d63c1f24cd32621d0b8",
"url": "8347b9f00c14b864dd3c19a1f5ae52adb2ef00de",
"keyword": "4e72e4ade57671ad0af9c8d81feeff4259d5bbec",
}),
]
url_base = "http://raw.senmanga.com"
root = "https://raw.senmanga.com"
def __init__(self, match):
Extractor.__init__(self)
part = match.group(1)
self.chapter_url = "{}/{}/".format(self.url_base, part)
self.img_url = "{}/viewer/{}/".format(self.url_base, part)
self.chapter_url = "{}/{}/".format(self.root, part)
self.img_url = "{}/viewer/{}/".format(self.root, part)
self.session.headers["Referer"] = self.chapter_url
self.session.headers["User-Agent"] = "Mozilla 5.0"

Loading…
Cancel
Save