code adjustments according to pep8 nr2

pull/13/head
Mike Fährmann 8 years ago
parent f1b7d41608
commit 94e10f249a
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -10,6 +10,7 @@
from . import booru
class ThreedeebooruExtractor(booru.JSONBooruExtractor):
"""Base class for 3dbooru extractors"""
category = "3dbooru"
@ -19,16 +20,21 @@ class ThreedeebooruExtractor(booru.JSONBooruExtractor):
"User-Agent": "Mozilla/5.0",
}
class ThreedeebooruTagExtractor(ThreedeebooruExtractor, booru.BooruTagExtractor):
class ThreedeebooruTagExtractor(ThreedeebooruExtractor,
booru.BooruTagExtractor):
"""Extractor for images from behoimi.org based on search-tags"""
subcategory = "tag"
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+)"]
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post"
r"(?:/(?:index)?)?\?tags=([^&]+)"]
test = [("http://behoimi.org/post?tags=himekawa_azuru dress", {
"url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1",
"content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
})]
class ThreedeebooruPoolExtractor(ThreedeebooruExtractor, booru.BooruPoolExtractor):
class ThreedeebooruPoolExtractor(ThreedeebooruExtractor,
booru.BooruPoolExtractor):
"""Extractor for image-pools from behoimi.org"""
subcategory = "pool"
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"]
@ -37,7 +43,9 @@ class ThreedeebooruPoolExtractor(ThreedeebooruExtractor, booru.BooruPoolExtracto
"content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
})]
class ThreedeebooruPostExtractor(ThreedeebooruExtractor, booru.BooruPostExtractor):
class ThreedeebooruPostExtractor(ThreedeebooruExtractor,
booru.BooruPostExtractor):
"""Extractor for single images from behoimi.org"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"]

@ -10,6 +10,7 @@
from . import chan
class FourchanThreadExtractor(chan.ChanThreadExtractor):
"""Extractor for images from threads from 4chan.org"""
category = "4chan"

@ -10,6 +10,7 @@
from . import chan
class InfinitychanThreadExtractor(chan.ChanThreadExtractor):
"""Extractor for images from threads from 8ch.net"""
category = "8chan"

@ -8,7 +8,6 @@
import re
import importlib
from .. import config
modules = [
"pixiv",
@ -75,6 +74,7 @@ modules = [
"test",
]
def find(url):
"""Find suitable extractor for the given url"""
for pattern, klass in _list_patterns():
@ -83,6 +83,7 @@ def find(url):
return klass(match)
return None
def extractors():
"""Yield all available extractor classes"""
return sorted(
@ -90,12 +91,14 @@ def extractors():
key=lambda x: x.__name__
)
# --------------------------------------------------------------------
# internals
_cache = []
_module_iter = iter(modules)
def _list_patterns():
"""Yield all available (pattern, class) tuples"""
yield from _cache
@ -110,6 +113,7 @@ def _list_patterns():
_cache.extend(tuples)
yield from tuples
def _get_classes(module):
"""Return a list of all extractor classes in a module"""
return [

@ -26,7 +26,8 @@ class BatotoExtractor(Extractor):
if username and password:
cookies = self._login_impl(username, password)
for key, value in cookies.items():
self.session.cookies.set(key, value, domain=".bato.to", path="/")
self.session.cookies.set(
key, value, domain=".bato.to", path="/")
@cache(maxage=360*24*60*60, keyarg=1)
def _login_impl(self, username, password):

@ -14,6 +14,7 @@ import xml.etree.ElementTree as ET
import json
import urllib.parse
class BooruExtractor(Extractor):
"""Base class for all booru extractors"""
info = {}
@ -33,7 +34,9 @@ class BooruExtractor(Extractor):
yield Message.Headers, self.headers
for data in self.items_impl():
try:
yield Message.Url, self.get_file_url(data), self.get_file_metadata(data)
url = self.get_file_url(data)
data = self.get_file_metadata(data)
yield Message.Url, url, data
except KeyError:
continue
@ -75,7 +78,7 @@ class JSONBooruExtractor(BooruExtractor):
self.update_page(reset=True)
while True:
images = json.loads(
self.request(self.api_url, verify=True, params=self.params,
self.request(self.api_url, params=self.params,
headers=self.headers).text
)
for data in images:
@ -91,7 +94,7 @@ class XMLBooruExtractor(BooruExtractor):
self.update_page(reset=True)
while True:
root = ET.fromstring(
self.request(self.api_url, verify=True, params=self.params).text
self.request(self.api_url, params=self.params).text
)
for item in root:
yield item.attrib

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text
class ChanThreadExtractor(Extractor):
"""Base class for extractors for Futaba Channel boards"""
category = "chan"

@ -10,11 +10,13 @@
from . import booru
class DanbooruExtractor(booru.JSONBooruExtractor):
"""Base class for danbooru extractors"""
category = "danbooru"
api_url = "https://danbooru.donmai.us/posts.json"
class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
"""Extractor for images from danbooru based on search-tags"""
subcategory = "tag"
@ -25,6 +27,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
})]
class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
"""Extractor for image-pools from danbooru"""
subcategory = "pool"
@ -34,6 +37,7 @@ class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
})]
class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor):
"""Extractor for single images from danbooru"""
subcategory = "post"

@ -49,7 +49,8 @@ class DeviantartImageExtractor(Extractor):
directory_fmt = ["{category}", "{artist}"]
filename_fmt = "{category}_{index}_{title}.{extension}"
pattern = [r"(?:https?://)?([^\.]+\.deviantart\.com/art/.+-(\d+))"]
test = [("http://shimoda7.deviantart.com/art/For-the-sake-of-a-memory-10073852", {
test = [(("http://shimoda7.deviantart.com/art/"
"For-the-sake-of-a-memory-10073852"), {
"url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e",
"keyword": "ccac27b8f740fc943afca9460608e02c6cbcdf96",
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
@ -66,11 +67,12 @@ class DeviantartImageExtractor(Extractor):
data = self.get_data(page)
data.update(self.get_image(page))
tlen = len(data["title"])
text.nameext_from_url(data["image"], data)
data["title"] = text.unescape(data["title"])
data["description"] = text.unescape(text.unescape(data["description"]))
data["artist"] = text.extract(data["url"], "//", ".")[0]
data["date"] = text.extract(data["date"], ", ", " in ", len(data["title"]))[0]
data["date"] = text.extract(data["date"], ", ", " in ", tlen)[0]
yield Message.Version, 1
yield Message.Directory, data

@ -10,12 +10,14 @@
from .foolslide import FoolslideChapterExtractor
class DokireaderChapterExtractor(FoolslideChapterExtractor):
"""Extractor for manga-chapters from kobato.hologfx.com"""
category = "dokireader"
pattern = [(r"(?:https?://)?(kobato\.hologfx\.com/reader/read/"
r"[^/]+/([a-z]{2})/\d+/\d+)")]
test = [("https://kobato.hologfx.com/reader/read/hitoribocchi_no_oo_seikatsu/en/3/34", {
test = [(("https://kobato.hologfx.com/reader/read/"
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
"keyword": "4ee981ae14c6643f6a03a14c9f2c0d4898202671",
})]

@ -13,6 +13,7 @@ from .. import text
import re
import json
class DynastyscansChapterExtractor(Extractor):
"""Extractor for manga-chapters from dynasty-scans.com"""
category = "dynastyscans"
@ -21,11 +22,13 @@ class DynastyscansChapterExtractor(Extractor):
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?dynasty-scans\.com/chapters/([^/]+)"]
test = [
("http://dynasty-scans.com/chapters/hitoribocchi_no_oo_seikatsu_ch33", {
(("http://dynasty-scans.com/chapters/"
"hitoribocchi_no_oo_seikatsu_ch33"), {
"url": "63950fa1dfdef58ab842c1b9b854c5c1d650cfa0",
"keyword": "81bfda5b98b34ac2a7324bd9e2abad3df9cc7673",
}),
("http://dynasty-scans.com/chapters/new_game_the_spinoff_special_13", {
(("http://dynasty-scans.com/chapters/"
"new_game_the_spinoff_special_13"), {
"url": "6b28c733481ac498da341e85a9eb155864491731",
"keyword": "93b75d0c0aaeb849c99f2225a4b97f466bc3ace9",
}),
@ -55,7 +58,7 @@ class DynastyscansChapterExtractor(Extractor):
"""Collect metadata for extractor-job"""
info , pos = text.extract(page, "<h3 id='chapter-title'><b>", "</b>")
author, pos = text.extract(page, " by ", "</a>", pos)
date , pos = text.extract(page, '<i class="icon-calendar"></i> ', '<', pos)
date , pos = text.extract(page, '"icon-calendar"></i> ', '<', pos)
match = re.match(
r"(?:<a [^>]+>)?([^<]+)(?:</a>)?(?: ch(\d+))?(?:: (.+))?",
info

@ -10,11 +10,13 @@
from . import booru
class E621Extractor(booru.JSONBooruExtractor):
"""Base class for e621 extractors"""
category = "e621"
api_url = "https://e621.net/post/index.json"
class E621TagExtractor(E621Extractor, booru.BooruTagExtractor):
"""Extractor for images from e621.net based on search-tags"""
subcategory = "tag"
@ -27,6 +29,7 @@ class E621TagExtractor(E621Extractor, booru.BooruTagExtractor):
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
})]
class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor):
"""Extractor for image-pools from e621.net"""
subcategory = "pool"
@ -36,6 +39,7 @@ class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor):
"content": "c2c87b7a9150509496cddc75ccab08109922876a",
})]
class E621PostExtractor(E621Extractor, booru.BooruPostExtractor):
"""Extractor for single images from e621.net"""
subcategory = "post"

@ -15,6 +15,7 @@ import time
import random
import requests
class ExhentaiGalleryExtractor(Extractor):
"""Extractor for image-galleries from exhentai.org"""
category = "exhentai"
@ -41,9 +42,12 @@ class ExhentaiGalleryExtractor(Extractor):
self.key = {}
self.count = 0
self.gid, self.token = match.groups()
self.original = config.interpolate(("extractor", "exhentai", "download-original"), True)
self.wait_min = config.interpolate(("extractor", "exhentai", "wait-min"), 3)
self.wait_max = config.interpolate(("extractor", "exhentai", "wait-max"), 6)
self.original = config.interpolate(
("extractor", "exhentai", "download-original"), True)
self.wait_min = config.interpolate(
("extractor", "exhentai", "wait-min"), 3)
self.wait_max = config.interpolate(
("extractor", "exhentai", "wait-max"), 6)
if self.wait_max < self.wait_min:
self.wait_max = self.wait_min
@ -75,7 +79,8 @@ class ExhentaiGalleryExtractor(Extractor):
"""Initialize headers"""
self.session.headers.update({
"User-Agent": "Mozilla/5.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept": "text/html,application/xhtml+xml,"
"application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://exhentai.org/",
})
@ -105,8 +110,8 @@ class ExhentaiGalleryExtractor(Extractor):
def get_images(self, page):
"""Collect url and metadata for all images in this gallery"""
url = "https://exhentai.org/s/" + text.extract(page, 'hentai.org/s/', '"')[0]
yield self.image_from_page(url)
part = text.extract(page, 'hentai.org/s/', '"')[0]
yield self.image_from_page("https://exhentai.org/s/" + part)
yield from self.images_from_api()
def image_from_page(self, url):
@ -122,10 +127,15 @@ class ExhentaiGalleryExtractor(Extractor):
("showkey" , 'var showkey="', '";'),
))[0]
self.key["start"] = data["startkey"]
self.key["show" ] = data["showkey"]
self.key["next" ] = data["nextkey"]
url = ("https://exhentai.org/fullimg.php" + text.unescape(data["origurl"])
if self.original and data["origurl"] else data["url"])
self.key["show"] = data["showkey"]
self.key["next"] = data["nextkey"]
if self.original and data["origurl"]:
part = text.unescape(data["origurl"])
url = "https://exhentai.org/fullimg.php" + part
else:
url = data["url"]
return url, text.nameext_from_url(data["url"], {
"num": 1,
"image-token": data["startkey"],
@ -133,14 +143,14 @@ class ExhentaiGalleryExtractor(Extractor):
def images_from_api(self):
"""Get image url and data from api calls"""
nextkey = self.key["next" ]
nextkey = self.key["next"]
request = {
"method" : "showpage",
"gid" : int(self.gid),
"imgkey" : nextkey,
"showkey": self.key["show"],
}
for request["page"] in range(2, self.count+1):
for request["page"] in range(2, self.count + 1):
while True:
try:
self.wait()
@ -150,9 +160,14 @@ class ExhentaiGalleryExtractor(Extractor):
pass
imgkey = nextkey
nextkey, pos = text.extract(page["i3"], "'", "'")
imgurl , pos = text.extract(page["i3"], '<img id="img" src="', '"', pos)
imgurl , pos = text.extract(page["i3"], 'id="img" src="', '"', pos)
origurl, pos = text.extract(page["i7"], '<a href="', '"')
url = text.unescape(origurl) if self.original and origurl else imgurl
if self.original and origurl:
url = text.unescape(origurl)
else:
url = imgurl
yield url, text.nameext_from_url(imgurl, {
"num": request["page"],
"image-token": imgkey
@ -173,7 +188,8 @@ class ExhentaiGalleryExtractor(Extractor):
password = config.interpolate(("extractor", "exhentai", "password"))
cookies = self._login_impl(username, password)
for key, value in cookies.items():
self.session.cookies.set(key, value, domain=".exhentai.org", path="/")
self.session.cookies.set(
key, value, domain=".exhentai.org", path="/")
@cache(maxage=360*24*60*60, keyarg=1)
def _login_impl(self, username, password):
@ -196,7 +212,8 @@ class ExhentaiGalleryExtractor(Extractor):
"PassWord": password,
"ipb_login_submit": "Login!",
}
self.session.headers["Referer"] = "http://e-hentai.org/bounce_login.php?b=d&bt=1-1"
referer = "http://e-hentai.org/bounce_login.php?b=d&bt=1-1"
self.session.headers["Referer"] = referer
response = self.session.post(url, data=params)
if "You are now logged in as:" not in response.text:

@ -13,6 +13,7 @@ from .. import text, iso639_1
import json
import re
class FoolslideChapterExtractor(Extractor):
"""Base class for chapter extractors on foolslide based sites"""
subcategory = "chapter"

@ -11,13 +11,14 @@
from . import booru
from .. import config
class GelbooruExtractor(booru.XMLBooruExtractor):
"""Base class for gelbooru extractors"""
category = "gelbooru"
api_url = "http://gelbooru.com/"
def setup(self):
self.params.update({"page":"dapi", "s":"post", "q":"index"})
self.params.update({"page": "dapi", "s": "post", "q": "index"})
try:
cookies = config.get(("extractor", self.category, "cookies"))
self.session.cookies.update({
@ -32,26 +33,30 @@ class GelbooruExtractor(booru.XMLBooruExtractor):
else:
self.params["pid"] = 0
class GelbooruTagExtractor(GelbooruExtractor, booru.BooruTagExtractor):
"""Extractor for images from gelbooru.com based on search-tags"""
subcategory = "tag"
pattern = [(r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=list&tags=([^&]+)")]
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=list&tags=([^&]+)"]
test = [("http://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
})]
# TODO: find out how to access pools via gelbooru-api
# class GelbooruPoolExtractor(GelbooruExtractor, booru.BooruPoolExtractor):
# """Extractor for image-pools from gelbooru.com"""
# subcategory = "pool"
# pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=pool&s=show&id=(\d+)"]
# pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
# r"\?page=pool&s=show&id=(\d+)"]
class GelbooruPostExtractor(GelbooruExtractor, booru.BooruPostExtractor):
"""Extractor for single images from gelbooru.com"""
subcategory = "post"
pattern = [(r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=view&id=(\d+)")]
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=view&id=(\d+)"]
test = [("http://gelbooru.com/index.php?page=post&s=view&id=313638", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
})]

@ -10,6 +10,7 @@
from .foolslide import FoolslideChapterExtractor
class GomangaChapterExtractor(FoolslideChapterExtractor):
"""Extractor for manga-chapters from gomanga.co"""
category = "gomanga"

@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text
import json
class HbrowseMangaExtractor(Extractor):
"""Extractor for mangas from hbrowse.com"""
category = "hbrowse"
@ -42,7 +43,8 @@ class HbrowseChapterExtractor(Extractor):
category = "hbrowse"
subcategory = "chapter"
directory_fmt = ["{category}", "{gallery-id} {title}", "c{chapter:>05}"]
filename_fmt = "{category}_{gallery-id}_{chapter:>05}_{num:>03}.{extension}"
filename_fmt = ("{category}_{gallery-id}_{chapter:>05}_"
"{num:>03}.{extension}")
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"]
test = [("http://www.hbrowse.com/10363/c00000", {
"url": "634f4800858913f097bc3b62a8fedaf74b5254bd",

@ -12,6 +12,7 @@ from .. import text
from . import hentaicdn
import re
class Hentai2readMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
"""Extractor for mangas from hentai2read.com"""
category = "hentai2read"
@ -49,7 +50,9 @@ class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
def __init__(self, match):
hentaicdn.HentaicdnChapterExtractor.__init__(self)
self.url_title, self.chapter = match.groups()
self.url = "http://hentai2read.com/{}/{}/".format(self.url_title, self.chapter)
self.url = "http://hentai2read.com/{}/{}/".format(
self.url_title, self.chapter
)
def get_job_metadata(self, page, images):
title = text.extract(page, "<title>", "</title>")[0]

@ -11,21 +11,24 @@
from .common import Extractor, Message
from .. import text, iso639_1
class HentaiboxChapterExtractor(Extractor):
"""Extractor for a single manga chapter from hentaibox.net"""
category = "hentaibox"
subcategory = "chapter"
directory_fmt = ["{category}", "{series}", "{title}"]
filename_fmt = "{num:>03}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?hentaibox\.net/[^/]+/(\d+)_\d+_([^/&]+)"]
test = [("http://www.hentaibox.net/hentai-manga/16_18_Original_Amazon-No-Hiyaku-Amazon-Elixir-Decensored", {
pattern = [r"(?:https?://)?(?:www\.)?hentaibox\.net/"
r"[^/]+/(\d+)_\d+_([^/&]+)"]
test = [(("http://www.hentaibox.net/hentai-manga/"
"16_18_Original_Amazon-No-Hiyaku-Amazon-Elixir-Decensored"), {
"url": "d1a50a9b289d284f178971e01cf312791888e057",
"keyword": "b4b100f800b716e573e072f01b5d604d9b436b70",
})]
def __init__(self, match):
Extractor.__init__(self)
self.url = match.group(0)
self.url = match.group(0)
self.count = match.group(1)
def items(self):
@ -51,4 +54,6 @@ class HentaiboxChapterExtractor(Extractor):
@staticmethod
def get_image_urls(page):
"""Extract and return a list of all image-urls"""
yield from text.extract_iter(page, '<span class="slideshow_path">', '</span>')
yield from text.extract_iter(
page, '<span class="slideshow_path">', '</span>'
)

@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text
import json
class HentaicdnMangaExtractor(Extractor):
"""Base class for extractors for mangas"""
subcategory = "manga"
@ -29,7 +30,8 @@ class HentaicdnChapterExtractor(Extractor):
"""Base class for extractors for a single manga chapter"""
subcategory = "chapter"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{chapter:>02}_{num:>03}.{extension}"
filename_fmt = ("{category}_{gallery-id}_{chapter:>02}_"
"{num:>03}.{extension}")
url = ""
def items(self):

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, exception
class HentaifoundryUserExtractor(Extractor):
"""Extractor for all images of a hentai-foundry-user"""
category = "hentaifoundry"
@ -18,8 +19,10 @@ class HentaifoundryUserExtractor(Extractor):
directory_fmt = ["{category}", "{artist}"]
filename_fmt = "{category}_{index}_{title}.{extension}"
pattern = [
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)/?$",
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile",
(r"(?:https?://)?(?:www\.)?hentai-foundry\.com/"
r"pictures/user/([^/]+)/?$"),
(r"(?:https?://)?(?:www\.)?hentai-foundry\.com/"
r"user/([^/]+)/profile"),
]
test = [
("http://www.hentai-foundry.com/pictures/user/Tenpura", {
@ -62,7 +65,8 @@ class HentaifoundryUserExtractor(Extractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
response = self.session.get(self.url_base + self.artist + "?enterAgree=1")
url = self.url_base + self.artist + "?enterAgree=1"
response = self.session.get(url)
if response.status_code == 404:
raise exception.NotFoundError("user")
page = response.text
@ -73,9 +77,12 @@ class HentaifoundryUserExtractor(Extractor):
def get_image_metadata(self, url):
"""Collect metadata for an image"""
page = self.request(url).text
index = text.extract(url, '/', '/', len(self.url_base) + len(self.artist))[0]
title, pos = text.extract(page, 'Pictures</a> &raquo; <span>', '<')
url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos)
offset = len(self.url_base) + len(self.artist)
index = text.extract(url, '/', '/', offset)[0]
title, pos = text.extract(
page, 'Pictures</a> &raquo; <span>', '<')
url, pos = text.extract(
page, '//pictures.hentai-foundry.com', '"', pos)
data = {"index": index, "title": text.unescape(title)}
text.nameext_from_url(url, data)
return "http://pictures.hentai-foundry.com" + url, data
@ -118,7 +125,8 @@ class HentaifoundryImageExtractor(Extractor):
r"(?:pictures/user/([^/]+)/(\d+)"
r"|[^/]/([^/]+)/(\d+))")]
test = [
("http://www.hentai-foundry.com/pictures/user/Tenpura/340854/notitle", {
(("http://www.hentai-foundry.com/"
"pictures/user/Tenpura/340854/notitle"), {
"url": "f3c0739bf86543697deabbed4bf99eb95a04582b",
"keyword": "96217c5becc1369c36dafa201c3c208518de8f1f",
"content": "5c14cd10deaad79a5152f9de45c9203cf76165a0",
@ -146,10 +154,11 @@ class HentaifoundryImageExtractor(Extractor):
response = self.session.get(url + "?enterAgree=1")
if response.status_code == 404:
raise exception.NotFoundError("image")
extr = text.extract
page = response.text
artist, pos = text.extract(page, '<a href="/pictures/user/', '"')
title , pos = text.extract(page, 'Pictures</a> &raquo; <span>', '<', pos)
url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos)
artist, pos = extr(page, '<a href="/pictures/user/', '"')
title , pos = extr(page, 'Pictures</a> &raquo; <span>', '<', pos)
url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos)
data = {
"artist": artist,
"index": self.index,

@ -12,6 +12,7 @@ from .. import text
from . import hentaicdn
import re
class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
"""Extractor for mangas from hentaihere.com"""
category = "hentaihere"
@ -32,7 +33,7 @@ class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
def get_chapters(self):
return text.extract_iter(
self.request("http://hentaihere.com/m/S" + self.gid).text,
'<li class="sub-chp clearfix">\n<a href="','"'
'<li class="sub-chp clearfix">\n<a href="', '"'
)
@ -48,7 +49,9 @@ class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
def __init__(self, match):
hentaicdn.HentaicdnChapterExtractor.__init__(self)
self.gid, self.chapter = match.groups()
self.url = "http://hentaihere.com/m/S{}/{}/1".format(self.gid, self.chapter)
self.url = "http://hentaihere.com/m/S{}/{}/1".format(
self.gid, self.chapter
)
def get_job_metadata(self, page, images):
title = text.extract(page, "<title>", "</title>")[0]

@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text, iso639_1
import string
class HitomiGalleryExtractor(Extractor):
"""Extractor for image galleries from hitomi.la"""
category = "hitomi"
@ -29,7 +30,8 @@ class HitomiGalleryExtractor(Extractor):
self.gid = match.group(1)
def items(self):
page = self.request("https://hitomi.la/galleries/" + self.gid + ".html").text
url = "https://hitomi.la/galleries/" + self.gid + ".html"
page = self.request(url).text
data = self.get_job_metadata(page)
images = self.get_image_urls(page)
data["count"] = len(images)
@ -41,8 +43,8 @@ class HitomiGalleryExtractor(Extractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
group = ""
gtype = ""
group = ""
gtype = ""
series = ""
_ , pos = text.extract(page, '<h1><a href="/reader/', '')
title , pos = text.extract(page, '.html">', "</a>", pos)

@ -11,6 +11,7 @@
from .common import Extractor, AsynchronousExtractor, Message
from .. import text
class ImagebamGalleryExtractor(AsynchronousExtractor):
"""Extractor for image galleries from imagebam.com"""
category = "imagebam"
@ -18,7 +19,8 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
directory_fmt = ["{category}", "{title} - {gallery-key}"]
filename_fmt = "{num:>03}-{filename}"
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"]
test = [("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
test = [(("http://www.imagebam.com/"
"gallery/adz2y0f9574bjpmonaismyrhtjgvey4o"), {
"url": "d7a4483b6d5ebba81950a349aad58ae034c60eda",
"keyword": "e4a9395dbd06d4af3172a6a61c90601bc47ee18c",
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
@ -56,13 +58,15 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
done = False
while not done:
page = self.request(self.url_base + url).text
_ , pos = text.extract(page, 'class="btn btn-default" title="Next">', '')
pos = text.extract(
page, 'class="btn btn-default" title="Next">', ''
)[1]
if pos == 0:
done = True
else:
url, pos = text.extract(page, ' href="', '"', pos-70)
image_id , pos = text.extract(page, '<img class="image" id="', '"', pos)
image_url, pos = text.extract(page, ' src="', '"', pos)
image_id , pos = text.extract(page, 'class="image" id="', '"', pos)
image_url, pos = text.extract(page, 'src="', '"', pos)
yield image_url, image_id
@ -85,8 +89,8 @@ class ImagebamImageExtractor(Extractor):
def items(self):
page = self.request("http://www.imagebam.com/image/" + self.token).text
url = text.extract(page, 'property="og:image" content="', '"')[0]
data = text.nameext_from_url(url, {"token": self.token})
iurl = text.extract(page, 'property="og:image" content="', '"')[0]
data = text.nameext_from_url(iurl, {"token": self.token})
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data
yield Message.Url, iurl, data

@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text
import json
class ImagefapGalleryExtractor(Extractor):
"""Extractor for image galleries from imagefap.com"""
category = "imagefap"
@ -32,7 +33,7 @@ class ImagefapGalleryExtractor(Extractor):
self.image_id = ""
def items(self):
url = "http://www.imagefap.com/pictures/" + self.gid + "/"
url = "http://www.imagefap.com/pictures/" + self.gid + "/"
page = self.request(url).text
data = self.get_job_metadata(page)
yield Message.Version, 1
@ -72,7 +73,6 @@ class ImagefapGalleryExtractor(Extractor):
params["idx"] += 24
class ImagefapImageExtractor(Extractor):
"""Extractor for single images from imagefap.com"""
category = "imagefap"
@ -113,25 +113,29 @@ class ImagefapImageExtractor(Extractor):
def load_json(self):
"""Load the JSON dictionary associated with the image"""
url = "http://www.imagefap.com/photo/" + self.image_id + "/"
url = "http://www.imagefap.com/photo/" + self.image_id + "/"
page = self.request(url).text
section , pos = text.extract(page, '<meta name="description" content="', '"')
json_data, pos = text.extract(page,
'<script type="application/ld+json">', '</script>', pos)
section , pos = text.extract(
page, '<meta name="description" content="', '"'
)
json_data, pos = text.extract(
page, '<script type="application/ld+json">', '</script>', pos
)
json_dict = json.loads(json_data)
json_dict["section"] = section
return json_dict
class ImagefapUserExtractor(Extractor):
"""Extractor for all galleries from a user at imagefap.com"""
category = "imagefap"
subcategory = "user"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{name}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/profile(?:\.php\?user=|/)([^/]+)",
r"(?:https?://)?(?:www\.)?imagefap\.com/usergallery\.php\?userid=(\d+)"]
pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com/"
r"profile(?:\.php\?user=|/)([^/]+)"),
(r"(?:https?://)?(?:www\.)?imagefap\.com/"
r"usergallery\.php\?userid=(\d+)")]
test = [("http://www.imagefap.com/profile/Mr Bad Example/galleries", {
"url": "145e98a8648c7695c150800ff8fd578ab26c28c1",
})]
@ -165,8 +169,9 @@ class ImagefapUserExtractor(Extractor):
if self.user:
url = "http://www.imagefap.com/profile/" + self.user + "/galleries"
else:
url = "http://www.imagefap.com/usergallery.php?userid=" + str(self.user_id)
url = ("http://www.imagefap.com/usergallery.php?userid=" +
str(self.user_id))
page = self.request(url).text
self.user_id, pos = text.extract(page, '?userid=', '"')
folders , pos = text.extract(page, ' id="tgl_all" value="', '"', pos)
folders, pos = text.extract(page, ' id="tgl_all" value="', '"', pos)
return folders.split("|")[:-1]

@ -13,6 +13,7 @@ from .. import text
from os.path import splitext
from urllib.parse import urljoin
class ImagehostImageExtractor(Extractor):
"""Base class for single-image extractors for various imagehosts"""
subcategory = "image"
@ -76,13 +77,16 @@ class ImgytImageExtractor(ImagehostImageExtractor):
filename += splitext(url)[1] if filename else url
return url, filename
class RapidimgImageExtractor(ImgytImageExtractor):
"""Extractor for single images from rapidimg.net"""
category = "rapidimg"
pattern = [r"(?:https?://)?((?:www\.)?rapidimg\.net/img-([a-z0-9]+)\.html)"]
pattern = [r"(?:https?://)?((?:www\.)?rapidimg\.net/"
r"img-([a-z0-9]+)\.html)"]
test = []
https = False
class FapatImageExtractor(ImgytImageExtractor):
"""Extractor for single images from fapat.me"""
category = "fapat"
@ -108,6 +112,7 @@ class ChronosImageExtractor(ImagehostImageExtractor):
filename, pos = text.extract(page, ' alt="', '"', pos)
return url, filename
class CoreimgImageExtractor(ChronosImageExtractor):
"""Extractor for single images from coreimg.net"""
category = "coreimg"
@ -118,6 +123,7 @@ class CoreimgImageExtractor(ChronosImageExtractor):
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})]
class ImgmaidImageExtractor(ChronosImageExtractor):
"""Extractor for single images from imgmaid.net"""
category = "imgmaid"
@ -125,6 +131,7 @@ class ImgmaidImageExtractor(ChronosImageExtractor):
test = []
https = True
class PicmaniacImageExtractor(ChronosImageExtractor):
"""Extractor for single images from pic-maniac.com"""
category = "picmaniac"
@ -149,6 +156,7 @@ class HosturimageImageExtractor(ImagehostImageExtractor):
url, pos = text.extract(page, "src='", "'", pos)
return url, url
class ImageontimeImageExtractor(HosturimageImageExtractor):
"""Extractor for single images from imageontime.org"""
category = "imageontime"
@ -157,6 +165,7 @@ class ImageontimeImageExtractor(HosturimageImageExtractor):
test = []
https = False
class Img4everImageExtractor(HosturimageImageExtractor):
"""Extractor for single images from img4ever.net"""
category = "img4ever"
@ -165,6 +174,7 @@ class Img4everImageExtractor(HosturimageImageExtractor):
test = []
https = True
class ImguploadImageExtractor(HosturimageImageExtractor):
"""Extractor for single images from imgupload.yt"""
category = "imgupload"
@ -184,10 +194,12 @@ class ImgspotImageExtractor(ImagehostImageExtractor):
url = text.extract(page, "<img class='centred_resized' src='", "'")[0]
return url, url
class ImgtrialImageExtractor(ImgspotImageExtractor):
"""Extractor for single images from imgtrial.com"""
category = "imgtrial"
pattern = [r"(?:https?://)?((?:www\.)?imgtrial\.com/img-([a-z0-9]+)\.html)"]
pattern = [r"(?:https?://)?((?:www\.)?imgtrial\.com"
r"/img-([a-z0-9]+)\.html)"]
class ImagevenueImageExtractor(ImagehostImageExtractor):

@ -12,6 +12,7 @@ from .common import Extractor, AsynchronousExtractor, Message
from .. import text, exception
import re
class ImgboxGalleryExtractor(AsynchronousExtractor):
"""Extractor for image galleries from imgbox.com"""
category = "imgbox"

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text
class ImgchiliExtractor(Extractor):
"""Base class for imgchili extractors"""
category = "imgchili"
@ -47,16 +48,17 @@ class ImgchiliImageExtractor(ImgchiliExtractor):
subcategory = "image"
filename_fmt = "{filename}"
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/show/\d+/(\d+)_[^/]+"]
test = [("http://imgchili.net/show/89427/89427136_test___quot;___gt;.png", {
test = [(("http://imgchili.net/show/89427/"
"89427136_test___quot;___gt;.png"), {
"url": "b93d92a6b58eb30a7ff6f9729cb748d25fea0c86",
"keyword": "376c4584dfae7d7d2e88687d4ee9618bbfd0a35c",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})]
def get_job_metadata(self, page):
name1 , pos = text.extract(page, 'name="description" content="', '. An ')
name2 , pos = text.extract(page, 'image called ', '" />\n', pos)
_ , pos = text.extract(page, '<link rel="image_src"', '', pos)
name1, pos = text.extract(page, '="description" content="', '. An ')
name2, pos = text.extract(page, 'image called ', '" />\n', pos)
_ , pos = text.extract(page, '<link rel="image_src"', '', pos)
self.imgurl, pos = text.extract(page, ' href="', '"', pos)
parts = name2.split("in the gallery ")
name = parts[0] if not parts[0].endswith("...") else name1

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text
class ImgthGalleryExtractor(Extractor):
"""Extractor for image galleries from imgth.com"""
category = "imgth"

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015, 2016 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -11,13 +11,15 @@
from .common import Extractor, Message
from .. import text, exception
class ImgurAlbumExtractor(Extractor):
"""Extractor for image albums from imgur.com"""
category = "imgur"
subcategory = "album"
directory_fmt = ["{category}", "{album-key} - {title}"]
filename_fmt = "{category}_{album-key}_{num:>03}_{hash}{ext}"
pattern = [r"(?:https?://)?(?:m\.|www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
pattern = [r"(?:https?://)?(?:m\.|www\.)?imgur\.com/"
r"(?:a|gallery)/([^/?&#]+)"]
test = [
("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
@ -56,8 +58,8 @@ class ImgurAlbumExtractor(Extractor):
def get_images(self):
"""Return a list of all images in this album"""
url = ("https://imgur.com/ajaxalbums/getimages/"
+ self.album + "/hit.json")
url = ("https://imgur.com/ajaxalbums/getimages/" +
self.album + "/hit.json")
data = self.request(url).json()["data"]
if not data:
raise exception.NotFoundError("album")

@ -10,6 +10,7 @@
from .foolslide import FoolslideChapterExtractor
class JaiminisboxChapterExtractor(FoolslideChapterExtractor):
"""Extractor for manga-chapters from jaiminisbox.com"""
category = "jaiminisbox"

@ -11,14 +11,17 @@
from .common import AsynchronousExtractor, Message
from .. import text, exception
class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
"""Extractor for soundtracks from khinsider.com"""
category = "khinsider"
subcategory = "soundtrack"
directory_fmt = ["{category}", "{album}"]
filename_fmt = "{filename}"
pattern = [r"(?:https?://)?downloads\.khinsider\.com/game-soundtracks/album/(.+)"]
test = [("http://downloads.khinsider.com/game-soundtracks/album/horizon-riders-wii-", {
pattern = [r"(?:https?://)?downloads\.khinsider\.com/"
r"game-soundtracks/album/(.+)"]
test = [(("http://downloads.khinsider.com/game-soundtracks/"
"album/horizon-riders-wii-"), {
"url": "732639e9e72e169f8ec36a71609471aaf67451e0",
"keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68",
})]
@ -28,7 +31,8 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
self.album = match.group(1)
def items(self):
url = "http://downloads.khinsider.com/game-soundtracks/album/" + self.album
url = ("http://downloads.khinsider.com/game-soundtracks/album/" +
self.album)
page = self.request(url, encoding="utf-8").text
data = self.get_job_metadata(page)
yield Message.Version, 1
@ -55,9 +59,12 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
if pos == -1:
raise exception.NotFoundError("soundtrack")
num = 0
for url in text.extract_iter(page, '<tr>\r\n\t\t<td><a href="', '"', pos):
for url in text.extract_iter(page, '<tr>\r\n\t\t<td><a href="',
'"', pos):
page = self.request(url, encoding="utf-8").text
name, pos = text.extract(page, "Song name: <b>", "</b>")
url , pos = text.extract(page, '<p><a style="color: #21363f;" href="', '"', pos)
url , pos = text.extract(
page, '<p><a style="color: #21363f;" href="', '"', pos
)
num += 1
yield url, text.nameext_from_url(name, {"num": num})

@ -11,6 +11,7 @@
from . import kissmanga
from .. import text
class KisscomicExtractor(kissmanga.KissmangaExtractor):
"""Base class for kisscomic extractors"""
category = "kisscomic"
@ -41,7 +42,8 @@ class KisscomicIssueExtractor(KisscomicExtractor,
kissmanga.KissmangaChapterExtractor):
"""Extractor for comic-issues from kisscomic.us"""
subcategory = "issue"
pattern = [r"(?:https?://)?(?:www\.)?kisscomic\.us/chapters/.+-chapter-\d+\.html"]
pattern = [r"(?:https?://)?(?:www\.)?kisscomic\.us/"
r"chapters/.+-chapter-\d+\.html"]
test = [("http://kisscomic.us/chapters/47-ronin-chapter-4.html", {
"url": "7f8e40bf04c4b36f14a60a8e45692068a9a1f88e",
"keyword": "a685f92b6989eebf57f8981b1edd6d3de9148ad6",

@ -12,11 +12,14 @@ from .common import Extractor, Message
from .. import text, cloudflare
import re
class KissmangaExtractor(Extractor):
"""Base class for kissmanga extractors"""
category = "kissmanga"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"]
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
directory_fmt = ["{category}", "{manga}",
"c{chapter:>03}{chapter-minor} - {title}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
"{page:>03}.{extension}")
root = "http://kissmanga.com"
def __init__(self, match):
@ -77,8 +80,8 @@ class KissmangaChapterExtractor(KissmangaExtractor):
"""Collect metadata for extractor-job"""
manga, pos = text.extract(page, "Read manga\n", "\n")
cinfo, pos = text.extract(page, "", "\n", pos)
match = re.match(
r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo)
match = re.match((r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)"
r"(?:\.0*(\d+))?(?:: (.+))?"), cinfo)
chminor = match.group(3)
return {
"manga": manga,

@ -10,11 +10,13 @@
from . import booru
class KonachanExtractor(booru.JSONBooruExtractor):
"""Base class for konachan extractors"""
category = "konachan"
api_url = "https://konachan.com/post.json"
class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor):
"""Extractor for images from konachan.com based on search-tags"""
subcategory = "tag"
@ -23,6 +25,7 @@ class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor):
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
})]
class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor):
"""Extractor for image-pools from konachan.com"""
subcategory = "pool"
@ -31,6 +34,7 @@ class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor):
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
})]
class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor):
"""Extractor for single images from konachan.com"""
subcategory = "post"

@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text, iso639_1
import re
class LusciousAlbumExtractor(Extractor):
"""Extractor for image albums from luscious.net"""
category = "luscious"
@ -21,7 +22,8 @@ class LusciousAlbumExtractor(Extractor):
pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/"
r"(?:c/[^/]+/)?(?:pictures/album|albums)/([^/]+_(\d+))")]
test = [
("https://luscious.net/c/hentai_manga/albums/okinami-no-koigokoro_277031/view/", {
(("https://luscious.net/c/hentai_manga/albums/"
"okinami-no-koigokoro_277031/view/"), {
"url": "7e4984a271a1072ac6483e4228a045895aff86f3",
"keyword": "3b3d36b355fa6a1a6c24be374ae16e6e9b0c729e",
"content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
@ -74,7 +76,7 @@ class LusciousAlbumExtractor(Extractor):
while True:
data = self.request(apiurl.format(pnum)).json()
page = data["html"]
pos = 0
pos = 0
while True:
imgid, pos = text.extract(page, 'container" id="', '"', pos)
if not imgid:

@ -18,10 +18,12 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
category = "mangafox"
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"]
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
"{page:>03}.{extension}")
pattern = [(r"(?:https?://)?(?:www\.)?(mangafox\.me/manga/"
r"[^/]+/(v\d+/)?c\d+[^/]*)")]
test = [("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
test = [(("http://mangafox.me/manga/kidou_keisatsu_patlabor/"
"v05/c006.2/1.html"), {
"keyword": "3bae0396e96868f5f24dff5e547a6bbfcbed7282",
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
})]

@ -12,6 +12,7 @@ from .common import Extractor, AsynchronousExtractor, Message
from .. import text
import re
class MangahereMangaExtractor(Extractor):
"""Extractor for mangas from mangahere.co"""
category = "mangahere"
@ -33,10 +34,10 @@ class MangahereMangaExtractor(Extractor):
def get_chapters(self):
"""Return a list of all chapter urls"""
page = self.request(self.url).text
return reversed(list(
text.extract_iter(page, '<a class="color_0077" href="', '"',
page.index('<div class="detail_list">'))
))
return reversed(list(text.extract_iter(
page, '<a class="color_0077" href="', '"',
page.index('<div class="detail_list">')
)))
class MangahereChapterExtractor(AsynchronousExtractor):
@ -44,7 +45,8 @@ class MangahereChapterExtractor(AsynchronousExtractor):
category = "mangahere"
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"]
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
"{page:>03}.{extension}")
pattern = [(r"(?:https?://)?(?:www\.)?mangahere\.co/manga/"
r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")]
test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", {
@ -60,10 +62,13 @@ class MangahereChapterExtractor(AsynchronousExtractor):
def items(self):
page = self.request(self.url_fmt.format(self.part, 1)).text
data = self.get_job_metadata(page)
urls = zip(
range(1, int(data["count"])+1),
self.get_image_urls(page),
)
yield Message.Version, 1
yield Message.Directory, data.copy()
for i, url in zip(range(int(data["count"])), self.get_image_urls(page)):
data["page"] = i+1
for data["page"], url in urls:
text.nameext_from_url(url, data)
yield Message.Url, url, data.copy()
@ -74,7 +79,8 @@ class MangahereChapterExtractor(AsynchronousExtractor):
_ , pos = text.extract(page, '<select class="wid60"', '', pos)
_ , pos = text.extract(page, '</select>', '', pos)
count, pos = text.extract(page, '>', '<', pos-30)
manga = re.match(r"(.+) \d+(\.\d+)? - Read .+ Chapter \d+(\.\d+)? Online", manga).group(1)
manga = re.match((r"(.+) \d+(\.\d+)? - Read .+ Chapter "
r"\d+(\.\d+)? Online"), manga).group(1)
return {
"manga": text.unescape(manga),
# "title": TODO,

@ -12,11 +12,13 @@ from .common import Extractor, Message
from .. import text, exception
import re
class MangamintExtractor(Extractor):
"""Base class for mangamint extractors"""
category = "mangamint"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"]
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
"{page:>03}.{extension}")
url_base = "https://www.mangamint.com"
def __init__(self, match):
@ -52,7 +54,8 @@ class MangamintMangaExtractor(MangamintExtractor):
if response.status_code == 404:
raise exception.NotFoundError("manga")
page = response.text
table, pos = text.extract(page, '<table class="sticky-enabled">', '</table>')
table, pos = text.extract(
page, '<table class="sticky-enabled">', '</table>')
chapters.extend(text.extract_iter(table, '<a href="', '"'))
if page.find("pager-last", pos) == -1:
break
@ -115,11 +118,12 @@ class MangamintChapterExtractor(MangamintExtractor):
"manga_page": 0,
"form_id": "select_similar_node_widget",
}
params["select_node"] , pos = text.extract(page, r'"identifier":"node\/', '"')
_ , pos = text.extract(page, '>All pages<', '', pos)
params["howmany"] , pos = text.extract(page, 'value="', '"', pos-25)
_ , pos = text.extract(page, 'name="form_build_id"', '', pos)
params["form_build_id"], pos = text.extract(page, 'value="', '"', pos)
e = text.extract
params["select_node"] , pos = e(page, r'"identifier":"node\/', '"')
_ , pos = e(page, '>All pages<', '', pos)
params["howmany"] , pos = e(page, 'value="', '"', pos-25)
_ , pos = e(page, 'name="form_build_id"', '', pos)
params["form_build_id"], pos = e(page, 'value="', '"', pos)
url = self.url_base + "/many/callback"
page = self.request(url, method="post", data=params).json()["data"]
return list(text.extract_iter(page, r'<img src ="', r'"'))

@ -10,6 +10,7 @@
from .mangareader import MangareaderMangaExtractor, MangareaderChapterExtractor
class MangapandaBase():
"""Base class for mangapanda extractors"""
category = "mangapanda"
@ -24,12 +25,14 @@ class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
"url": "50a1ba730b85426b904da256c80f68ba6a8a2566",
})]
class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
"""Extractor for manga-chapters from mangapanda.com"""
subcategory = "chapter"
pattern = [
r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))",
r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
(r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))"),
(r"(?:https?://)?(?:www\.)?mangapanda\.com"
r"(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)"),
]
test = [("http://www.mangapanda.com/red-storm/2", {
"url": "4bf4ddf6c50105ec8a37675495ab80c46608275d",

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text
class MangaparkMangaExtractor(Extractor):
"""Extractor for mangas from mangapark.me"""
category = "mangapark"
@ -44,8 +45,10 @@ class MangaparkChapterExtractor(Extractor):
"""Extractor for manga-chapters from mangapark.me"""
category = "mangapark"
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"]
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
directory_fmt = ["{category}", "{manga}",
"c{chapter:>03}{chapter-minor} - {title}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
"{page:>03}.{extension}")
pattern = [(r"(?:https?://)?(?:www\.)?mangapark\.me/manga/"
r"([^/]+/s(\d+)(?:/v([^/]+))?/c(\d+)(?:([^/]+)|/e(\d+))?)")]
test = [
@ -53,7 +56,8 @@ class MangaparkChapterExtractor(Extractor):
"url": "fefe84492d9118de5962563fbecb9362051c52d5",
"keyword": "652b38c40bdfb5592456b6e7524a3acfdef9fae6",
}),
("http://mangapark.me/manga/ad-astra-per-aspera-hata-kenjirou/s1/c1.2", {
(("http://mangapark.me/manga/"
"ad-astra-per-aspera-hata-kenjirou/s1/c1.2"), {
"url": "64b47f9837d50c3e57793ff6703d840ef7808c52",
"keyword": "f28eb26b4966bebda0e761f241c2dd49e505ce13",
}),
@ -65,9 +69,9 @@ class MangaparkChapterExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self)
self.part = match.group(1)
self.part = match.group(1)
self.version = match.group(2)
self.volume = match.group(3)
self.volume = match.group(3)
self.chapter = match.group(4)
try:
self.chminor = match.group(5) or "v" + match.group(6)
@ -75,8 +79,8 @@ class MangaparkChapterExtractor(Extractor):
self.chminor = ""
def items(self):
page = self.request("http://mangapark.me/manga/" + self.part
+ "?zoom=2").text
page = self.request("http://mangapark.me/manga/" + self.part +
"?zoom=2").text
data = self.get_job_metadata(page)
yield Message.Version, 1
yield Message.Directory, data
@ -114,7 +118,7 @@ class MangaparkChapterExtractor(Extractor):
pos = 0
num = 0
while True:
url , pos = text.extract(page, ' target="_blank" href="', '"', pos)
url, pos = text.extract(page, ' target="_blank" href="', '"', pos)
if not url:
return
num += 1

@ -11,6 +11,7 @@
from .common import AsynchronousExtractor, Extractor, Message
from .. import text
class MangareaderBase():
"""Base class for mangareader extractors"""
category = "mangareader"
@ -45,10 +46,12 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
"""Extractor for manga-chapters from mangareader.net"""
subcategory = "chapter"
pattern = [
r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))",
r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
(r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))"),
(r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/"
r"chapter-(\d+).html)"),
]
test = [("http://www.mangareader.net/karate-shoukoushi-kohinata-minoru/11", {
test = [(("http://www.mangareader.net/"
"karate-shoukoushi-kohinata-minoru/11"), {
"url": "84ffaab4c027ef9022695c53163c3aeabd07ca58",
"keyword": "09b4ad57a082eb371dec027ccfc8ed1157c6eac6",
})]

@ -11,6 +11,7 @@
from .common import Extractor, AsynchronousExtractor, Message
from .. import text
class MangashareMangaExtractor(Extractor):
"""Extractor for mangas from mangashare.com"""
category = "mangashare"
@ -57,10 +58,13 @@ class MangashareChapterExtractor(AsynchronousExtractor):
def items(self):
page = self.request(self.url_fmt.format(self.part, 1)).text
data = self.get_job_metadata(page)
urls = zip(
range(1, int(data["count"])+1),
self.get_image_urls(page),
)
yield Message.Version, 1
yield Message.Directory, data.copy()
for i, url in zip(range(int(data["count"])), (self.get_image_urls(page))):
data["page"] = i+1
for data["page"], url in urls:
text.nameext_from_url(url, data)
yield Message.Url, url, data.copy()

@ -11,6 +11,7 @@
from .common import AsynchronousExtractor, Message
from .. import text
class MangastreamChapterExtractor(AsynchronousExtractor):
"""Extractor for manga-chapters from mangastream.com"""
category = "mangastream"
@ -41,7 +42,9 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
manga, pos = text.extract(page, '<span class="hidden-xs hidden-sm">', "<")
manga, pos = text.extract(
page, '<span class="hidden-xs hidden-sm">', "<"
)
pos = page.find(self.part, pos)
title, pos = text.extract(page, ' - ', '<', pos)
count, pos = text.extract(page, 'Last Page (', ')', pos)

@ -6,6 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
class Message():
Version = 1

@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text
import json
class NhentaiGalleryExtractor(Extractor):
"""Extractor for image-galleries from nhentai.net"""
category = "nhentai"
@ -31,7 +32,8 @@ class NhentaiGalleryExtractor(Extractor):
def items(self):
ginfo = self.get_gallery_info()
data = self.get_job_metadata(ginfo)
urlfmt = ginfo["media_url"] + "galleries/" + data["media-id"] + "/{}.{}"
urlfmt = "{}galleries/{}/{{}}.{{}}".format(
ginfo["media_url"], data["media-id"])
extdict = {"j": "jpg", "p": "png", "g": "gif"}
yield Message.Version, 1
yield Message.Directory, data
@ -45,8 +47,10 @@ class NhentaiGalleryExtractor(Extractor):
def get_gallery_info(self):
"""Extract and return gallery-info"""
page = self.request("http://nhentai.net/g/" + self.gid + "/1/").text
media_url, pos = text.extract(page, ".reader({\n\t\t\tmedia_url: '", "'")
json_data, pos = text.extract(page, "gallery: ", ",\n", pos)
media_url, pos = text.extract(
page, ".reader({\n\t\t\tmedia_url: '", "'")
json_data, pos = text.extract(
page, "gallery: ", ",\n", pos)
json_dict = json.loads(json_data)
json_dict["media_url"] = media_url
return json_dict

@ -12,6 +12,7 @@ from .common import AsynchronousExtractor, Message
from .. import config, text, exception
from ..cache import cache
class NijieExtractor(AsynchronousExtractor):
"""Base class for nijie extractors"""
category = "nijie"
@ -94,8 +95,8 @@ class NijieUserExtractor(NijieExtractor):
def __init__(self, match):
NijieExtractor.__init__(self)
self.artist_id = match.group(1)
self.artist_url = ("https://nijie.info/members_illust.php?id="
+ self.artist_id)
self.artist_url = ("https://nijie.info/members_illust.php?id=" +
self.artist_id)
def get_image_ids(self):
response = self.session.get(self.artist_url)

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, exception
class PinterestExtractor(Extractor):
"""Base class for pinterest extractors"""
category = "pinterest"
@ -65,7 +66,8 @@ class PinterestBoardExtractor(PinterestExtractor):
"""Extractor for images from a board from pinterest.com"""
subcategory = "board"
directory_fmt = ["{category}", "{user}", "{board}"]
pattern = [r"(?:https?://)?(?:[^./]+\.)?pinterest\.com/(?!pin/)([^/]+)/([^/]+)"]
pattern = [r"(?:https?://)?(?:[^./]+\.)?pinterest\.com/"
r"(?!pin/)([^/]+)/([^/]+)"]
test = [
("https://www.pinterest.com/g1952849/test-/", {
"url": "705ee521630a5d613b0449d694a5345e684572a9",
@ -133,7 +135,8 @@ class PinterestPinitExtractor(PinterestExtractor):
class PinterestAPI():
"""Minimal interface for the pinterest API"""
def __init__(self, session, access_token="AV2U9Oe6dyC2vfPugUnBvJ7Duxg9FHCJPXPZIvRDXv9hvwBALwAAAAA"):
def __init__(self, session, access_token="AV2U9Oe6dyC2vfPugUnBvJ7Duxg9"
"FHCJPXPZIvRDXv9hvwBALwAAAAA"):
self.session = session
self.session.params["access_token"] = access_token

@ -14,13 +14,15 @@ from ..cache import cache
import re
import json
class PixivUserExtractor(Extractor):
"""Extractor for works of a pixiv-user"""
category = "pixiv"
subcategory = "user"
directory_fmt = ["{category}", "{artist-id}-{artist-nick}"]
filename_fmt = "{category}_{artist-id}_{id}{num}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/"
r"member(?:_illust)?\.php\?id=(\d+)"]
test = [
("http://www.pixiv.net/member_illust.php?id=173530", {
"url": "8f2fc0437e2095ab750c4340a4eba33ec6269477",
@ -37,7 +39,9 @@ class PixivUserExtractor(Extractor):
self.artist_id = match.group(1)
self.api = PixivAPI(self.session)
self.api_call = self.api.user_works
self.load_ugoira = config.interpolate(("extractor", "pixiv", "ugoira"), True)
self.load_ugoira = config.interpolate(
("extractor", "pixiv", "ugoira"), True
)
def items(self):
metadata = self.get_job_metadata()
@ -78,7 +82,9 @@ class PixivUserExtractor(Extractor):
big = ""
for i in range(work["page_count"]):
work["num"] = "_p{:02}".format(i)
url = "{}{}_p{}.{}{}".format(url[:off], big, i, ext, timestamp)
url = "{}{}_p{}.{}{}".format(
url[:off], big, i, ext, timestamp
)
yield Message.Url, url, work
def get_works(self):
@ -147,16 +153,19 @@ class PixivWorkExtractor(PixivUserExtractor):
r"\?(?:[^&]+&)*illust_id=(\d+)"),
(r"(?:https?://)?i\d+\.pixiv\.net(?:/.*)?/img-[^/]+/img"
r"/\d{4}(?:/\d\d){5}/(\d+)"),
r"(?:https?://)?img\d+\.pixiv\.net/img/[^/]+/(\d+)"]
(r"(?:https?://)?img\d+\.pixiv\.net/img/[^/]+/(\d+)")]
test = [
("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=966412", {
(("http://www.pixiv.net/member_illust.php"
"?mode=medium&illust_id=966412"), {
"url": "efb622f065b0871e92195e7bee0b4d75bd687d8d",
"content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a",
}),
("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=966411", {
(("http://www.pixiv.net/member_illust.php"
"?mode=medium&illust_id=966411"), {
"exception": exception.NotFoundError,
}),
("http://i1.pixiv.net/c/600x600/img-master/img/2008/06/13/00/29/13/966412_p0_master1200.jpg", {
(("http://i1.pixiv.net/c/600x600/img-master/"
"img/2008/06/13/00/29/13/966412_p0_master1200.jpg"), {
"url": "efb622f065b0871e92195e7bee0b4d75bd687d8d",
}),
]
@ -212,6 +221,7 @@ def require_login(func):
return func(self, *args)
return wrap
class PixivAPI():
"""Minimal interface for the Pixiv Public-API for mobile devices
@ -312,6 +322,8 @@ class PixivAPI():
def _parse(response, empty=[None]):
"""Parse a Pixiv Public-API response"""
data = json.loads(response.text)
if data.get("status") == "failure" or data.get("response", empty) == empty:
status = data.get("status")
response = data.get("response", empty)
if status == "failure" or response == empty:
raise exception.NotFoundError()
return data

@ -10,6 +10,8 @@
from .foolslide import FoolslideChapterExtractor
from .. import text
import re
class PowermangaChapterExtractor(FoolslideChapterExtractor):
"""Extractor for manga-chapters from powermanga.org"""
@ -26,7 +28,8 @@ class PowermangaChapterExtractor(FoolslideChapterExtractor):
def __init__(self, match):
if match.group(1) == "p":
page = self.request("https://powermanga.org/" + match.group(2)).text
url = "https://powermanga.org/" + match.group(2)
page = self.request(url).text
pos = page.index("class='small-button smallblack'>Download</a>")
url = text.extract(page, "<a href='", "'", pos)[0]
match = re.match(self.pattern[0], url)

@ -12,6 +12,7 @@ from . import kissmanga
from .. import text
import re
class ReadcomiconlineExtractor(kissmanga.KissmangaExtractor):
"""Base class for readcomiconline extractors"""
category = "readcomiconline"
@ -41,7 +42,8 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineExtractor,
kissmanga.KissmangaChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""
subcategory = "issue"
pattern = [r"(?:https?://)?(?:www\.)?readcomiconline\.to/Comic/.+/.+\?id=\d+"]
pattern = [r"(?:https?://)?(?:www\.)?readcomiconline\.to/"
r"Comic/.+/.+\?id=\d+"]
test = [("http://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
"url": "dd1659d9eb5f6ebb421e66316c98d71682a44c2d",
"keyword": "bc2f937893c1204ba40e0293e86f0a8943be1304",

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text
class ReadcomicsComicExtractor(Extractor):
"""Extractor for comics from readcomics.tv"""
category = "readcomics"

@ -12,6 +12,7 @@ import re
from .common import Extractor, Message
from .. import adapter
class RecursiveExtractor(Extractor):
category = "recursive"

@ -10,13 +10,14 @@
from . import booru
class Rule34Extractor(booru.XMLBooruExtractor):
"""Base class for rule34 extractors"""
category = "rule34"
api_url = "https://rule34.xxx/index.php"
def setup(self):
self.params.update({"page":"dapi", "s":"post", "q":"index"})
self.params.update({"page": "dapi", "s": "post", "q": "index"})
def update_page(self, reset=False):
if reset is False:
@ -24,6 +25,7 @@ class Rule34Extractor(booru.XMLBooruExtractor):
else:
self.params["pid"] = 0
class Rule34TagExtractor(Rule34Extractor, booru.BooruTagExtractor):
"""Extractor for images from rule34.xxx based on search-tags"""
subcategory = "tag"
@ -34,6 +36,7 @@ class Rule34TagExtractor(Rule34Extractor, booru.BooruTagExtractor):
"content": "a01768c6f86f32eb7ebbdeb87c30b0d9968d7f97",
})]
class Rule34PostExtractor(Rule34Extractor, booru.BooruPostExtractor):
"""Extractor for single images from rule34.xxx"""
subcategory = "post"

@ -10,13 +10,14 @@
from . import booru
class SafebooruExtractor(booru.XMLBooruExtractor):
"""Base class for safebooru extractors"""
category = "safebooru"
api_url = "http://safebooru.org/index.php"
def setup(self):
self.params.update({"page":"dapi", "s":"post", "q":"index"})
self.params.update({"page": "dapi", "s": "post", "q": "index"})
def update_page(self, reset=False):
if reset is False:
@ -24,6 +25,7 @@ class SafebooruExtractor(booru.XMLBooruExtractor):
else:
self.params["pid"] = 0
class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
"""Extractor for images from safebooru.org based on search-tags"""
subcategory = "tag"
@ -34,6 +36,7 @@ class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
})]
class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor):
"""Extractor for single images from safebooru.org"""
subcategory = "post"

@ -11,8 +11,9 @@
from .common import AsynchronousExtractor, Message
from .. import text
class SankakuTagExtractor(AsynchronousExtractor):
"""Extractor for images from chan.sankakucomplex.com based on search-tags"""
"""Extractor for images from chan.sankakucomplex.com by search-tags"""
category = "sankaku"
subcategory = "tag"
directory_fmt = ["{category}", "{tags}"]
@ -54,8 +55,9 @@ class SankakuTagExtractor(AsynchronousExtractor):
page = self.request(self.url, params=params).text
pos = text.extract(page, '<div id=more-popular-posts-link>', '')[1]
while True:
image_id, pos = text.extract(page,
'<span class="thumb blacklisted" id=p', '>', pos)
image_id, pos = text.extract(
page, '<span class="thumb blacklisted" id=p', '>', pos
)
if not image_id:
break
image = self.get_image_metadata(image_id)

@ -13,6 +13,7 @@ from .. import config, exception
from ..cache import cache
from xml.etree import ElementTree
class SeigaExtractor(Extractor):
"""Base class for seiga extractors"""
category = "seiga"
@ -88,7 +89,7 @@ class SeigaUserExtractor(SeigaExtractor):
def get_images(self):
keymap = {0: "image-id", 2: "title", 3: "description",
7: "summary", 8: "genre", 18: "date"}
7: "summary", 8: "genre", 18: "date"}
url = "http://seiga.nicovideo.jp/api/user/data?id=" + self.user_id
response = self.request(url)
root = ElementTree.fromstring(response.text)

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text
class SenmangaChapterExtractor(Extractor):
"""Extractor for manga-chapters from raw.senmanga.com"""
category = "senmanga"

@ -10,12 +10,14 @@
from .foolslide import FoolslideChapterExtractor
class SensescansChapterExtractor(FoolslideChapterExtractor):
"""Extractor for manga-chapters from reader.sensescans.com"""
category = "sensescans"
pattern = [(r"(?:https?://)?(reader\.sensescans\.com/read/"
r"[^/]+/([a-z]{2})/\d+/\d+)")]
test = [("http://reader.sensescans.com/read/magi__labyrinth_of_magic/en/33/319/page/1", {
test = [(("http://reader.sensescans.com/read/"
"magi__labyrinth_of_magic/en/33/319/page/1"), {
"url": "fff6186aca61017aeb5c743cdfccac4b9b1f2557",
"keyword": "77f906f04bf49d3bd636e8c92d85dc25aa361754",
})]

@ -11,6 +11,7 @@
from .common import Extractor, AsynchronousExtractor, Message
from .. import text
class SpectrumnexusMangaExtractor(Extractor):
"""Extractor for mangas from thespectrum.net"""
category = "spectrumnexus"
@ -33,7 +34,9 @@ class SpectrumnexusMangaExtractor(Extractor):
def get_chapters(self):
"""Return a list of all chapter identifiers"""
page = self.request(self.url).text
page = text.extract(page, '<select class="selectchapter"', '</select>')[0]
page = text.extract(
page, '<select class="selectchapter"', '</select>'
)[0]
return text.extract_iter(page, '<option value="', '"')
@ -44,11 +47,13 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
directory_fmt = ["{category}", "{manga}", "{identifier}"]
filename_fmt = "{manga} {identifier} {page:>03}.{extension}"
pattern = [
(r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+\.html)"
r"\?ch=(Chapter\+(\d+)|Volume\+(\d+))"),
(r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)"),
(r"(?:https?://)?(view\.thespectrum\.net/series/"
r"[^\.]+\.html)\?ch=(Chapter\+(\d+)|Volume\+(\d+))"),
(r"(?:https?://)?(view\.thespectrum\.net/series/"
r"[^/]+-chapter-(\d+)\.html)"),
]
test = [("http://view.thespectrum.net/series/toriko.html?ch=Chapter+343&page=1", {
test = [(("http://view.thespectrum.net/series/"
"toriko.html?ch=Chapter+343&page=1"), {
"url": "c0fc7dc594841217cc622a67edd79f06e9900333",
"keyword": "8499166b62db0c87e7109cc5f9aa837b4815dd9c",
})]

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import extractor, exception
class TestExtractor(Extractor):
"""Extractor to select and run the test URLs of other extractors

@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text
import json
class TumblrUserExtractor(Extractor):
"""Extractor for all images from a tumblr-user"""
category = "tumblr"

@ -11,13 +11,15 @@
from .common import Extractor, Message
from .. import text
class TwitterTweetExtractor(Extractor):
"""Extractor for images from tweets on twitter.com"""
category = "twitter"
subcategory = "tweet"
directory_fmt = ["{category}", "{user}"]
filename_fmt = "{tweet-id}_{num}.{extension}"
pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/(([^/]+)/status/(\d+))"]
pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/"
r"(([^/]+)/status/(\d+))"]
test = [("https://twitter.com/PicturesEarth/status/672897688871018500", {
"url": "d9e68d41301d2fe382eb27711dea28366be03b1a",
"keyword": "3cd8e27026a2112008985b1b53f5e4baf4616177",

@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text
class WhentaiUserExtractor(Extractor):
"""Extractor for images of a whentai-user"""
category = "whentai"
@ -91,14 +92,20 @@ class WhentaiImageExtractor(Extractor):
def items(self):
data = self.get_image_metadata()
url = self.get_image_url(data["user"])
url = self.get_image_url(data["user"])
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data
def get_image_url(self, user):
data = {"type": "image", "cnt": "1", "paid": "0", "post": "1",
"from": str(int(self.imageid) + 1), "author": user.replace("_", " ")}
data = {
"type": "image",
"cnt": "1",
"paid": "0",
"post": "1",
"from": str(int(self.imageid) + 1),
"author": user.replace("_", " ")
}
page = self.request("http://whentai.com/ajax/getuploadslist",
method="POST", data=data).text
return text.extract(page, 'src="', '"')[0].replace("/t2", "/")

@ -10,12 +10,14 @@
from .foolslide import FoolslideChapterExtractor
class WorldthreeChapterExtractor(FoolslideChapterExtractor):
"""Extractor for manga-chapters from slide.world-three.org"""
category = "worldthree"
pattern = [(r"(?:https?://)?(?:www\.)?(slide\.world-three\.org/read/"
r"[^/]+/([a-z]{2})/\d+/\d+)")]
test = [("http://www.slide.world-three.org/read/black_bullet/en/2/7/page/1", {
test = [(("http://www.slide.world-three.org/"
"read/black_bullet/en/2/7/page/1"), {
"url": "be2f04f6e2d311b35188094cfd3e768583271584",
"keyword": "6d77d9fc806d76d881491a52ccd8dfd875c47d05",
})]

@ -10,11 +10,13 @@
from . import booru
class YandereExtractor(booru.JSONBooruExtractor):
"""Base class for yandere extractors"""
category = "yandere"
api_url = "https://yande.re/post.json"
class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
"""Extractor for images from yande.re based on search-tags"""
subcategory = "tag"
@ -23,6 +25,7 @@ class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
"content": "59201811c728096b2d95ce6896fd0009235fe683",
})]
class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
"""Extractor for image-pools from yande.re"""
subcategory = "pool"
@ -31,6 +34,7 @@ class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
})]
class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor):
"""Extractor for single images from yande.re"""
subcategory = "post"

@ -10,6 +10,7 @@
from .foolslide import FoolslideChapterExtractor
class YomangaChapterExtractor(FoolslideChapterExtractor):
"""Extractor for manga-chapters from yomanga.co"""
category = "yomanga"

@ -10,6 +10,7 @@
from .foolslide import FoolslideChapterExtractor
class YonkouprodChapterExtractor(FoolslideChapterExtractor):
"""Extractor for manga-chapters from yonkouprod.com"""
category = "yonkouprod"

Loading…
Cancel
Save