# -*- coding: utf-8 -*- # Copyright 2014, 2015 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract images from albums at http://imgchili.net/""" from .common import SequentialExtractor from .common import Message from .common import filename_from_url import re info = { "category": "imgchili", "extractor": "ImgchiliExtractor", "directory": ["{category}", "{title} - {key}"], "filename": "{num:>03}-{name}", "pattern": [ r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)", ], } class ImgchiliExtractor(SequentialExtractor): def __init__(self, match, config): SequentialExtractor.__init__(self, config) self.match = match self.num = 0 def items(self): page = self.request(self.match.string).text yield Message.Version, 1 yield Message.Headers, {"Referer": "http://imgchili.net/"} yield Message.Directory, self.get_job_metadata(page) pattern = r' src="http://t(\d+\.imgchili\.net/(\d+)/(\d+)_([^/"]+))"' for match in re.finditer(pattern, page): yield Message.Url, self.get_file_url(match), self.get_file_metadata(match) def get_job_metadata(self, page): """Collect metadata for extractor-job""" title = self.extract(page, "