From e3926bf351c15a864f0ae04322aca89c68bc40dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 5 Oct 2016 09:20:03 +0200 Subject: [PATCH] [hentaihere] add manga- and chapter-extractors --- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/hentaihere.py | 67 ++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 gallery_dl/extractor/hentaihere.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 9b6ecf99..5505181c 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -30,6 +30,7 @@ modules = [ "hentai2read", "hentaibox", "hentaifoundry", + "hentaihere", "hitomi", "imagebam", "imagefap", diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py new file mode 100644 index 00000000..ea35afd5 --- /dev/null +++ b/gallery_dl/extractor/hentaihere.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +# Copyright 2016 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract hentaimanga from https://hentaihere.com/""" + +from .. import text +from . import hentaicdn +import re + +class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor): + """Extractor for mangas from hentaihere.com""" + category = "hentaihere" + pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/?$"] + test = [ + ("http://hentaihere.com/m/S13812", { + "url": "167ec26c73c7d01ad8ad0a2b88257a901aa8330e", + }), + ("http://hentaihere.com/m/S7608", { + "url": "17dd982270456ce51ec7189f9e37728ef9f894c8", + }), + ] + + def __init__(self, match): + hentaicdn.HentaicdnMangaExtractor.__init__(self) + self.gid = match.group(1) + + def get_chapters(self): + return text.extract_iter( + self.request("http://hentaihere.com/m/S" + self.gid).text, + '
  • \n", "")[0] + pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at " + match = re.match(pattern, title) + return { + "gallery-id": self.gid, + "title": match.group(1), + "type": match.group(2), + "chapter": self.chapter, + "chapter-name": match.group(3), + "author": match.group(4), + "count": len(images), + "lang": "en", + "language": "English", + }