diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py index 7e4cce4e..9ae22a91 100644 --- a/gallery_dl/extractor/imgth.py +++ b/gallery_dl/extractor/imgth.py @@ -1,60 +1,73 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://imgth.com/""" +"""Extractors for https://imgth.com/""" -from .common import Extractor, Message +from .common import GalleryExtractor from .. import text -class ImgthGalleryExtractor(Extractor): +class ImgthGalleryExtractor(GalleryExtractor): """Extractor for image galleries from imgth.com""" category = "imgth" - subcategory = "gallery" - directory_fmt = ("{category}", "{gallery_id} {title}") - filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}" - archive_fmt = "{gallery_id}_{num}" - pattern = r"(?:https?://)?imgth\.com/gallery/(\d+)" - test = ("http://imgth.com/gallery/37/wallpaper-anime", { - "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748", - "keyword": "6f8c00d6849ea89d1a028764675ec1fe9dbd87e2", - }) + root = "https://imgth.com" + pattern = r"(?:https?://)?(?:www\.)?imgth\.com/gallery/(\d+)" + test = ( + ("https://imgth.com/gallery/37/wallpaper-anime", { + "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748", + "pattern": r"https://imgth\.com/images/2009/11/25" + r"/wallpaper-anime_\w+\.jpg", + "keyword": { + "count": 12, + "date": "dt:2009-11-25 18:21:00", + "extension": "jpg", + "filename": r"re:wallpaper-anime_\w+", + "gallery_id": 37, + "num": int, + "title": "Wallpaper anime", + "user": "celebrities", + }, + }), + ("https://www.imgth.com/gallery/37/wallpaper-anime"), + ) def __init__(self, match): - Extractor.__init__(self, match) - self.gid = match.group(1) - self.url_base = "https://imgth.com/gallery/" + self.gid + "/g/page/" + self.gallery_id = gid = match.group(1) + url = "{}/gallery/{}/g/".format(self.root, gid) + GalleryExtractor.__init__(self, match, url) - def items(self): - page = self.request(self.url_base + "0").text - data = self.metadata(page) - yield Message.Directory, data - for data["num"], url in enumerate(self.images(page), 1): - yield Message.Url, url, text.nameext_from_url(url, data) + def metadata(self, page): + extr = text.extract_from(page) + return { + "gallery_id": text.parse_int(self.gallery_id), + "title": text.unescape(extr("