From 2ace5c7b3ce5e065a6624d619e643406fa60fbc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 19 Oct 2017 18:25:31 +0200 Subject: [PATCH] [senmanga] fix extraction and download --- gallery_dl/extractor/senmanga.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py index 5196680b..cb91f765 100644 --- a/gallery_dl/extractor/senmanga.py +++ b/gallery_dl/extractor/senmanga.py @@ -21,16 +21,16 @@ class SenmangaChapterExtractor(Extractor): pattern = [r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)"] test = [ ("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", { - "url": "32d88382fcad66859d089cd9a61249f375492ec5", + "url": "5f95140ff511d8497e2ec08fa7267c6bb231faec", "keyword": "705d941a150765edb33cd2707074bd703a93788c", - "content": "a791dda85ac0d37e3b36d754560cbb65b8dab5b9", + "content": "0e37b1995708ffc175f2e175d91a518e6948c379", }), ("http://raw.senmanga.com/Love-Lab/2016-03/1", { - "url": "d4f37c7347e56a09f9679d63c1f24cd32621d0b8", - "keyword": "4e72e4ade57671ad0af9c8d81feeff4259d5bbec", + "url": "8347b9f00c14b864dd3c19a1f5ae52adb2ef00de", + "keyword": "0765e9d81b7430b3055b25a2627d6438f62de635", }), ] - url_base = "http://raw.senmanga.com" + url_base = "https://raw.senmanga.com" def __init__(self, match): Extractor.__init__(self) @@ -38,7 +38,7 @@ class SenmangaChapterExtractor(Extractor): self.chapter_url = "{}/{}/".format(self.url_base, part) self.img_url = "{}/viewer/{}/".format(self.url_base, part) self.session.headers["Referer"] = self.chapter_url - self.session.headers["User-Agent"] = "Mozilla 5.0" + self.session.headers["User-Agent"] = "Mozilla/5.0" def items(self): data = self.get_job_metadata() @@ -51,13 +51,13 @@ class SenmangaChapterExtractor(Extractor): def get_job_metadata(self): """Collect metadata for extractor-job""" page = self.request(self.chapter_url).text + self.session.cookies.clear() title, pos = text.extract(page, '', '') - count, pos = text.extract(page, ' of ', ' ', pos) - manga, pos = text.extract(title, '| Raw | ', ' | Chapter ') - chapter, pos = text.extract(title, '', ' | Page ', pos) + count, pos = text.extract(page, ' of ', '\n', pos) + manga, _, chapter = title.partition(" - Chapter ") return { - "manga": text.unescape(manga.replace("-", " ")), - "chapter_string": chapter, + "manga": text.unescape(manga), + "chapter_string": chapter.partition(" - Page ")[0], "count": util.safe_int(count), "lang": "jp", "language": "Japanese",