From 35e09869d171334fafd0fcf64f1cd0085d7dd43b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 12 Jan 2018 14:59:49 +0100 Subject: [PATCH] [mangapark] fix image URLs and use HTTPS --- CHANGELOG.md | 2 ++ docs/supportedsites.rst | 4 ++-- gallery_dl/extractor/mangapark.py | 28 ++++++++++++++-------------- scripts/build_supportedsites.py | 8 ++++++-- test/test_extractors.py | 3 +-- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7faecfe7..cc09c0b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ - `puremashiro` - http://reader.puremashiro.moe/ ([#66](https://github.com/mikf/gallery-dl/issues/66)) - `idolcomplex` - https://idol.sankakucomplex.com/ - Added an option to filter reblogs on `tumblr` ([#61](https://github.com/mikf/gallery-dl/issues/61)) +- Added OAuth user authentication for `tumblr` ([#65](https://github.com/mikf/gallery-dl/issues/65)) +- Added support for `slideshare` mobile URLs ([#67](https://github.com/mikf/gallery-dl/issues/67)) - Improved pagination for various …booru sites to work around page limits - Fixed chapter information parsing for certain manga on `kissmanga` ([#58](https://github.com/mikf/gallery-dl/issues/58)) and `batoto` ([#60](https://github.com/mikf/gallery-dl/issues/60)) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 3c5a5e8b..889b7eae 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -48,7 +48,7 @@ Manga Fox http://www.mangafox.me/ Chapters Manga Here http://www.mangahere.co/ Chapters, Manga Manga Stream https://mangastream.com/ Chapters Mangapanda http://www.mangapanda.com/ Chapters, Manga -MangaPark http://mangapark.me/ Chapters, Manga +MangaPark https://mangapark.me/ Chapters, Manga Mangareader http://www.mangareader.net/ Chapters, Manga nhentai https://nhentai.net/ Galleries Niconico Seiga http://seiga.nicovideo.jp Images from Users, individual Images Required @@ -71,7 +71,7 @@ Sense-Scans http://sensescans.com/ Chapters, Manga SlideShare https://www.slideshare.net/ Presentations Spectrum Nexus |http://www.thes-0| Chapters, Manga The /b/ Archive https://thebarchive.com/ Threads -Tumblr https://www.tumblr.com/ Images from Users, Posts, Tag-Searches Optional (OAuth) +Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) Twitter https://twitter.com/ Tweets Warosu https://warosu.org/ Threads World Three http://www.slide.world-three.org/ Chapters, Manga diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index ca0c276c..ceedc604 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2017 Mike Fährmann +# Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from http://mangapark.me/""" +"""Extract manga-chapters and entire manga from https://mangapark.me/""" from .common import Extractor, MangaExtractor, Message from .. import text, util +from urllib.parse import urljoin class MangaparkExtractor(Extractor): """Base class for mangapark extractors""" category = "mangapark" - root = "http://mangapark.me" + root = "https://mangapark.me" @staticmethod def parse_chapter_path(path, data): @@ -38,9 +39,9 @@ class MangaparkExtractor(Extractor): class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor): """Extractor for manga from mangapark.me""" pattern = [r"(?:https?://)?(?:www\.)?(mangapark\.me/manga/[^/]+)/?$"] - test = [("http://mangapark.me/manga/aria", { - "url": "0c5a57e2b402c8cc9ceefba82619f6280346f984", - "keyword": "75a68497d1f305eaf9b9fec235bd1da6e499546f", + test = [("https://mangapark.me/manga/aria", { + "url": "4cb5606530b4eeacde7a4c9fd38296eb6ff46563", + "keyword": "e87ab8e7ad2571bbe587881e7fd422e8f582f818", })] def chapters(self, page): @@ -78,16 +79,16 @@ class MangaparkChapterExtractor(MangaparkExtractor): pattern = [(r"(?:https?://)?(?:www\.)?mangapark\.me(/manga/[^/]+" r"/s\d+(?:/v\d+)?/c\d+[^/]*(?:/e\d+)?)")] test = [ - ("http://mangapark.me/manga/gosu/s2/c55", { + ("https://mangapark.me/manga/gosu/s2/c55", { "count": 50, "keyword": "72ac1714b492b021a1fe26d9271ed132d51a930e", }), - (("http://mangapark.me/manga/" - "ad-astra-per-aspera-hata-kenjirou/s1/c1.2"), { + (("https://mangapark.me/manga/" + "ad-astra-per-aspera-hata-kenjirou/s5/c1.2"), { "count": 40, - "keyword": "0ac6a028f6479b2ecfe7b2d014074a0aea027e90", + "keyword": "fb5082bb60e19cae0a194b89f69f333888a9325d", }), - ("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", { + ("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", { "count": 15, "keyword": "dc9233cdd83d8659300f0a20ec3c493873f71741", }), @@ -124,8 +125,7 @@ class MangaparkChapterExtractor(MangaparkExtractor): data["count"] = util.safe_int(data["count"]) return data - @staticmethod - def get_images(page): + def get_images(self, page): """Collect image-urls, -widths and -heights""" pos = 0 num = 0 @@ -136,7 +136,7 @@ class MangaparkChapterExtractor(MangaparkExtractor): num += 1 width , pos = text.extract(page, ' width="', '"', pos) height, pos = text.extract(page, ' _heighth="', '"', pos) - yield url, { + yield urljoin(self.root, url), { "page": num, "width": width, "height": height, diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index c6b0bc13..951f8010 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -154,6 +154,7 @@ class RstTable(): _subs = [] + def substitute(value, size): sub = "|{}-{}|".format(value[:15], len(_subs)) _subs.append((value, sub)) @@ -180,7 +181,7 @@ def build_list(): for extrlist in extractors: extrlist.sort(key=subcategory_key) for extr in extrlist: - extr.cat = map_category(extr.category) + extr.cat = map_category(extr.category) extr.subcat = map_subcategory(extr.subcategory) extractors.sort(key=category_key) @@ -206,7 +207,10 @@ def map_category(c): def map_subcategory(sc): - return SUBCATEGORY_MAP.get(sc, sc.capitalize() + "s") + if sc in SUBCATEGORY_MAP: + return SUBCATEGORY_MAP[sc] + sc = sc.capitalize() + return sc if sc.endswith("s") else sc + "s" def category_key(extrlist): diff --git a/test/test_extractors.py b/test/test_extractors.py index a5735af9..a16baf85 100644 --- a/test/test_extractors.py +++ b/test/test_extractors.py @@ -20,8 +20,7 @@ SKIP = { # temporary issues "batoto", # R.I.P. "imgyt", # server maintenance - "loveisover", - "mangapark", + "luscious", }