From ffec41f35244b4b910eaad659b24ef8d15552fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 12 Apr 2015 15:59:52 +0200 Subject: [PATCH] [imgchili] update to new extractor interface --- gallery_dl/extractor/imgchili.py | 81 ++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 20 deletions(-) diff --git a/gallery_dl/extractor/imgchili.py b/gallery_dl/extractor/imgchili.py index afb9c0da..40932912 100644 --- a/gallery_dl/extractor/imgchili.py +++ b/gallery_dl/extractor/imgchili.py @@ -1,24 +1,65 @@ -from .common import BasicExtractor -from ..util import filename_from_url +# -*- coding: utf-8 -*- + +# Copyright 2014, 2015 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from albums at http://imgchili.net/""" + +from .common import SequentialExtractor +from .common import Message +from .common import filename_from_url import re -class Extractor(BasicExtractor): +info = { + "category": "imgchili", + "extractor": "ImgchiliExtractor", + "directory": ["{category}", "{title} - {key}"], + "filename": "{num:>03}-{name}", + "pattern": [ + r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)", + ], +} + +class ImgchiliExtractor(SequentialExtractor): def __init__(self, match, config): - BasicExtractor.__init__(self, config) - self.url = match.group(0) - self.page = self.request(self.url).text; - self.category = "imgchili" - - title = self.get_title() - pos = self.url.rindex("/") - self.directory = title + " - " + self.url[pos+1:] - - def images(self): - pattern = r' src="http://t(\d+\.imgchili.net/[^"]+)"' - for match in re.finditer(pattern, self.page): - url = "http://i" + match.group(1) - yield url, filename_from_url(url) - - def get_title(self): - return self.extract(self.page, "

", "

")[0] + SequentialExtractor.__init__(self, config) + self.match = match + self.num = 0 + + def items(self): + page = self.request(self.match.string).text + yield Message.Version, 1 + yield Message.Headers, {"Referer": "http://imgchili.net/"} + yield Message.Directory, self.get_job_metadata(page) + + pattern = r' src="http://t(\d+\.imgchili\.net/(\d+)/(\d+)_([^/"]+))"' + for match in re.finditer(pattern, page): + yield Message.Url, self.get_file_url(match), self.get_file_metadata(match) + + def get_job_metadata(self, page): + """Collect metadata for extractor-job""" + title = self.extract(page, "

", "

")[0] + return { + "category": info["category"], + "title": title, + "key": self.match.group(1), + } + + def get_file_metadata(self, match): + """Collect metadata for a downloadable file""" + self.num += 1 + return { + "album-id": match.group(2), + "image-id": match.group(3), + "name": match.group(4), + "num": self.num, + } + + @staticmethod + def get_file_url(match): + """Extract download-url from 'match'""" + return "http://i" + match.group(1)