commit
fbf4ef4f3a
@ -0,0 +1,91 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://2ch.hk/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
|
||||
|
||||
class _2chThreadExtractor(Extractor):
|
||||
"""Extractor for 2ch threads"""
|
||||
category = "2ch"
|
||||
subcategory = "thread"
|
||||
root = "https://2ch.hk"
|
||||
directory_fmt = ("{category}", "{board}", "{thread} {title}")
|
||||
filename_fmt = "{tim}{filename:? //}.{extension}"
|
||||
archive_fmt = "{board}_{thread}_{tim}"
|
||||
pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/res/(\d+)"
|
||||
example = "https://2ch.hk/a/res/12345.html"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.board, self.thread = match.groups()
|
||||
|
||||
def items(self):
|
||||
url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
|
||||
posts = self.request(url).json()["threads"][0]["posts"]
|
||||
|
||||
op = posts[0]
|
||||
title = op.get("subject") or text.remove_html(op["comment"])
|
||||
|
||||
thread = {
|
||||
"board" : self.board,
|
||||
"thread": self.thread,
|
||||
"title" : text.unescape(title)[:50],
|
||||
}
|
||||
|
||||
yield Message.Directory, thread
|
||||
for post in posts:
|
||||
files = post.get("files")
|
||||
if files:
|
||||
post["post_name"] = post["name"]
|
||||
post["date"] = text.parse_timestamp(post["timestamp"])
|
||||
del post["files"]
|
||||
del post["name"]
|
||||
|
||||
for file in files:
|
||||
file.update(thread)
|
||||
file.update(post)
|
||||
|
||||
file["filename"] = file["fullname"].rpartition(".")[0]
|
||||
file["tim"], _, file["extension"] = \
|
||||
file["name"].rpartition(".")
|
||||
|
||||
yield Message.Url, self.root + file["path"], file
|
||||
|
||||
|
||||
class _2chBoardExtractor(Extractor):
|
||||
"""Extractor for 2ch boards"""
|
||||
category = "2ch"
|
||||
subcategory = "board"
|
||||
root = "https://2ch.hk"
|
||||
pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/?$"
|
||||
example = "https://2ch.hk/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.board = match.group(1)
|
||||
|
||||
def items(self):
|
||||
# index page
|
||||
url = "{}/{}/index.json".format(self.root, self.board)
|
||||
index = self.request(url).json()
|
||||
index["_extractor"] = _2chThreadExtractor
|
||||
for thread in index["threads"]:
|
||||
url = "{}/{}/res/{}.html".format(
|
||||
self.root, self.board, thread["thread_num"])
|
||||
yield Message.Queue, url, index
|
||||
|
||||
# pages 1..n
|
||||
for n in util.advance(index["pages"], 1):
|
||||
url = "{}/{}/{}.json".format(self.root, self.board, n)
|
||||
page = self.request(url).json()
|
||||
page["_extractor"] = _2chThreadExtractor
|
||||
for thread in page["threads"]:
|
||||
url = "{}/{}/res/{}.html".format(
|
||||
self.root, self.board, thread["thread_num"])
|
||||
yield Message.Queue, url, page
|
@ -0,0 +1,123 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://bato.to/"""
|
||||
|
||||
from .common import Extractor, ChapterExtractor, MangaExtractor
|
||||
from .. import text, exception
|
||||
import re
|
||||
|
||||
BASE_PATTERN = (r"(?:https?://)?(?:"
|
||||
r"(?:ba|d|h|m|w)to\.to|"
|
||||
r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|"
|
||||
r"comiko\.(?:net|org)|"
|
||||
r"bat(?:otoo|o?two)\.com)")
|
||||
|
||||
|
||||
class BatotoBase():
|
||||
"""Base class for batoto extractors"""
|
||||
category = "batoto"
|
||||
root = "https://bato.to"
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
kwargs["encoding"] = "utf-8"
|
||||
return Extractor.request(self, url, **kwargs)
|
||||
|
||||
|
||||
class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
|
||||
"""Extractor for bato.to manga chapters"""
|
||||
pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)"
|
||||
example = "https://bato.to/title/12345-MANGA/54321"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.root_from_url(match.group(0))
|
||||
self.chapter_id = match.group(1)
|
||||
url = "{}/title/0/{}".format(self.root, self.chapter_id)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
|
||||
manga_id = text.extr(
|
||||
extr('rel="canonical" href="', '"'), "/title/", "/")
|
||||
|
||||
match = re.match(
|
||||
r"(?:Volume\s+(\d+) )?"
|
||||
r"\w+\s+(\d+)(.*)", info)
|
||||
if match:
|
||||
volume, chapter, minor = match.groups()
|
||||
title = text.remove_html(extr(
|
||||
"selected>", "</option")).partition(" : ")[2]
|
||||
else:
|
||||
volume = chapter = 0
|
||||
minor = ""
|
||||
title = info
|
||||
|
||||
return {
|
||||
"manga" : text.unescape(manga),
|
||||
"manga_id" : text.parse_int(manga_id),
|
||||
"title" : text.unescape(title),
|
||||
"volume" : text.parse_int(volume),
|
||||
"chapter" : text.parse_int(chapter),
|
||||
"chapter_minor": minor,
|
||||
"chapter_id" : text.parse_int(self.chapter_id),
|
||||
"date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
images_container = text.extr(page, 'pageOpts', ':[0,0]}"')
|
||||
images_container = text.unescape(images_container)
|
||||
return [
|
||||
(url, None)
|
||||
for url in text.extract_iter(images_container, r"\"", r"\"")
|
||||
]
|
||||
|
||||
|
||||
class BatotoMangaExtractor(BatotoBase, MangaExtractor):
|
||||
"""Extractor for bato.to manga"""
|
||||
reverse = False
|
||||
chapterclass = BatotoChapterExtractor
|
||||
pattern = (BASE_PATTERN +
|
||||
r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$")
|
||||
example = "https://bato.to/title/12345-MANGA/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.root_from_url(match.group(0))
|
||||
self.manga_id = match.group(1) or match.group(2)
|
||||
url = "{}/title/{}".format(self.root, self.manga_id)
|
||||
MangaExtractor.__init__(self, match, url)
|
||||
|
||||
def chapters(self, page):
|
||||
extr = text.extract_from(page)
|
||||
|
||||
warning = extr(' class="alert alert-warning">', "</div><")
|
||||
if warning:
|
||||
raise exception.StopExtraction("'%s'", text.remove_html(warning))
|
||||
|
||||
data = {
|
||||
"manga_id": text.parse_int(self.manga_id),
|
||||
"manga" : text.unescape(extr(
|
||||
"<title>", "<").rpartition(" - ")[0]),
|
||||
}
|
||||
|
||||
extr('<div data-hk="0-0-0-0"', "")
|
||||
results = []
|
||||
while True:
|
||||
href = extr('<a href="/title/', '"')
|
||||
if not href:
|
||||
break
|
||||
|
||||
chapter = href.rpartition("-ch_")[2]
|
||||
chapter, sep, minor = chapter.partition(".")
|
||||
|
||||
data["chapter"] = text.parse_int(chapter)
|
||||
data["chapter_minor"] = sep + minor
|
||||
data["date"] = text.parse_datetime(
|
||||
extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
|
||||
url = "{}/title/{}".format(self.root, href)
|
||||
results.append((url, data.copy()))
|
||||
return results
|
@ -0,0 +1,167 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://hatenablog.com"""
|
||||
|
||||
import re
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
BASE_PATTERN = (
|
||||
r"(?:hatenablog:https?://([^/?#]+)|(?:https?://)?"
|
||||
r"([\w-]+\.(?:hatenablog\.(?:com|jp)"
|
||||
r"|hatenadiary\.com|hateblo\.jp)))"
|
||||
)
|
||||
QUERY_RE = r"(?:\?([^#]*))?(?:#.*)?$"
|
||||
|
||||
|
||||
class HatenablogExtractor(Extractor):
|
||||
"""Base class for HatenaBlog extractors"""
|
||||
category = "hatenablog"
|
||||
directory_fmt = ("{category}", "{domain}")
|
||||
filename_fmt = "{category}_{domain}_{entry}_{num:>02}.{extension}"
|
||||
archive_fmt = "{filename}"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.domain = match.group(1) or match.group(2)
|
||||
|
||||
def _init(self):
|
||||
self._find_img = re.compile(r'<img +([^>]+)').finditer
|
||||
|
||||
def _handle_article(self, article: str):
|
||||
extr = text.extract_from(article)
|
||||
date = text.parse_datetime(extr('<time datetime="', '"'))
|
||||
entry_link = text.unescape(extr('<a href="', '"'))
|
||||
entry = entry_link.partition("/entry/")[2]
|
||||
title = text.unescape(extr('>', '<'))
|
||||
content = extr(
|
||||
'<div class="entry-content hatenablog-entry">', '</div>')
|
||||
|
||||
images = []
|
||||
for i in self._find_img(content):
|
||||
attributes = i.group(1)
|
||||
if 'class="hatena-fotolife"' not in attributes:
|
||||
continue
|
||||
image = text.unescape(text.extr(attributes, 'src="', '"'))
|
||||
images.append(image)
|
||||
|
||||
data = {
|
||||
"domain": self.domain,
|
||||
"date": date,
|
||||
"entry": entry,
|
||||
"title": title,
|
||||
"count": len(images),
|
||||
}
|
||||
yield Message.Directory, data
|
||||
for data["num"], url in enumerate(images, 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
|
||||
class HatenablogEntriesExtractor(HatenablogExtractor):
|
||||
"""Base class for a list of entries"""
|
||||
allowed_parameters = ()
|
||||
|
||||
def __init__(self, match):
|
||||
HatenablogExtractor.__init__(self, match)
|
||||
self.path = match.group(3)
|
||||
self.query = {key: value for key, value in text.parse_query(
|
||||
match.group(4)).items() if self._acceptable_query(key)}
|
||||
|
||||
def _init(self):
|
||||
HatenablogExtractor._init(self)
|
||||
self._find_pager_url = re.compile(
|
||||
r' class="pager-next">\s*<a href="([^"]+)').search
|
||||
|
||||
def items(self):
|
||||
url = "https://" + self.domain + self.path
|
||||
query = self.query
|
||||
|
||||
while url:
|
||||
page = self.request(url, params=query).text
|
||||
|
||||
extr = text.extract_from(page)
|
||||
attributes = extr('<body ', '>')
|
||||
if "page-archive" in attributes:
|
||||
yield from self._handle_partial_articles(extr)
|
||||
else:
|
||||
yield from self._handle_full_articles(extr)
|
||||
|
||||
match = self._find_pager_url(page)
|
||||
url = text.unescape(match.group(1)) if match else None
|
||||
query = None
|
||||
|
||||
def _handle_partial_articles(self, extr):
|
||||
while True:
|
||||
section = extr('<section class="archive-entry', '</section>')
|
||||
if not section:
|
||||
break
|
||||
|
||||
url = "hatenablog:" + text.unescape(text.extr(
|
||||
section, '<a class="entry-title-link" href="', '"'))
|
||||
data = {"_extractor": HatenablogEntryExtractor}
|
||||
yield Message.Queue, url, data
|
||||
|
||||
def _handle_full_articles(self, extr):
|
||||
while True:
|
||||
attributes = extr('<article ', '>')
|
||||
if not attributes:
|
||||
break
|
||||
if "no-entry" in attributes:
|
||||
continue
|
||||
|
||||
article = extr('', '</article>')
|
||||
yield from self._handle_article(article)
|
||||
|
||||
def _acceptable_query(self, key):
|
||||
return key == "page" or key in self.allowed_parameters
|
||||
|
||||
|
||||
class HatenablogEntryExtractor(HatenablogExtractor):
|
||||
"""Extractor for a single entry URL"""
|
||||
subcategory = "entry"
|
||||
pattern = BASE_PATTERN + r"/entry/([^?#]+)" + QUERY_RE
|
||||
example = "https://BLOG.hatenablog.com/entry/PATH"
|
||||
|
||||
def __init__(self, match):
|
||||
HatenablogExtractor.__init__(self, match)
|
||||
self.path = match.group(3)
|
||||
|
||||
def items(self):
|
||||
url = "https://" + self.domain + "/entry/" + self.path
|
||||
page = self.request(url).text
|
||||
|
||||
extr = text.extract_from(page)
|
||||
while True:
|
||||
attributes = extr('<article ', '>')
|
||||
if "no-entry" in attributes:
|
||||
continue
|
||||
article = extr('', '</article>')
|
||||
return self._handle_article(article)
|
||||
|
||||
|
||||
class HatenablogHomeExtractor(HatenablogEntriesExtractor):
|
||||
"""Extractor for a blog's home page"""
|
||||
subcategory = "home"
|
||||
pattern = BASE_PATTERN + r"(/?)" + QUERY_RE
|
||||
example = "https://BLOG.hatenablog.com"
|
||||
|
||||
|
||||
class HatenablogArchiveExtractor(HatenablogEntriesExtractor):
|
||||
"""Extractor for a blog's archive page"""
|
||||
subcategory = "archive"
|
||||
pattern = (BASE_PATTERN + r"(/archive(?:/\d+(?:/\d+(?:/\d+)?)?"
|
||||
r"|/category/[^?#]+)?)" + QUERY_RE)
|
||||
example = "https://BLOG.hatenablog.com/archive/2024"
|
||||
|
||||
|
||||
class HatenablogSearchExtractor(HatenablogEntriesExtractor):
|
||||
"""Extractor for a blog's search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"(/search)" + QUERY_RE
|
||||
example = "https://BLOG.hatenablog.com/search?q=QUERY"
|
||||
allowed_parameters = ("q",)
|
@ -1,87 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://nudecollect.com/"""
|
||||
|
||||
from .common import GalleryExtractor
|
||||
from .. import text
|
||||
|
||||
|
||||
class NudecollectExtractor(GalleryExtractor):
|
||||
"""Base class for Nudecollect extractors"""
|
||||
category = "nudecollect"
|
||||
directory_fmt = ("{category}", "{title}")
|
||||
filename_fmt = "{slug}_{num:>03}.{extension}"
|
||||
archive_fmt = "{slug}_{num}"
|
||||
root = "https://www.nudecollect.com"
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
kwargs["allow_redirects"] = False
|
||||
return GalleryExtractor.request(self, url, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def get_title(page):
|
||||
return text.unescape(text.extr(page, "<title>", "</title>"))[31:]
|
||||
|
||||
@staticmethod
|
||||
def get_image(page):
|
||||
return text.extr(page, '<img src="', '"')
|
||||
|
||||
|
||||
class NudecollectImageExtractor(NudecollectExtractor):
|
||||
"""Extractor for individual images from nudecollect.com"""
|
||||
subcategory = "image"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
|
||||
r"(/content/([^/?#]+)/image-(\d+)-pics-(\d+)"
|
||||
r"-mirror-(\d+)\.html)")
|
||||
example = ("https://www.nudecollect.com/content/12345_TITLE"
|
||||
"/image-1-pics-108-mirror-1.html")
|
||||
|
||||
def __init__(self, match):
|
||||
NudecollectExtractor.__init__(self, match)
|
||||
_, self.slug, self.num, self.count, self.mirror = match.groups()
|
||||
|
||||
def metadata(self, page):
|
||||
return {
|
||||
"slug" : self.slug,
|
||||
"title" : self.get_title(page),
|
||||
"count" : text.parse_int(self.count),
|
||||
"mirror": text.parse_int(self.mirror),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
return ((self.get_image(page), {"num": text.parse_int(self.num)}),)
|
||||
|
||||
|
||||
class NudecollectAlbumExtractor(NudecollectExtractor):
|
||||
"""Extractor for image albums on nudecollect.com"""
|
||||
subcategory = "album"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
|
||||
r"/content/([^/?#]+)/(?:index-mirror-(\d+)-(\d+)"
|
||||
r"|page-\d+-pics-(\d+)-mirror-(\d+))\.html")
|
||||
example = ("https://www.nudecollect.com/content/12345_TITLE"
|
||||
"/index-mirror-01-123.html")
|
||||
|
||||
def __init__(self, match):
|
||||
self.slug = match.group(1)
|
||||
self.mirror = match.group(2) or match.group(5)
|
||||
self.count = text.parse_int(match.group(3) or match.group(4))
|
||||
url = "{}/content/{}/image-1-pics-{}-mirror-{}.html".format(
|
||||
self.root, self.slug, self.count, self.mirror)
|
||||
NudecollectExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
return {
|
||||
"slug" : self.slug,
|
||||
"title" : self.get_title(page),
|
||||
"mirror": text.parse_int(self.mirror),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
url = self.get_image(page)
|
||||
p1, _, p2 = url.partition("/image0")
|
||||
ufmt = p1 + "/image{:>05}" + p2[4:]
|
||||
return [(ufmt.format(num), None) for num in range(1, self.count + 1)]
|
@ -0,0 +1,138 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for http://www.poringa.net/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from ..cache import cache
|
||||
import itertools
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?poringa\.net"
|
||||
|
||||
|
||||
class PoringaExtractor(Extractor):
|
||||
category = "poringa"
|
||||
directory_fmt = ("{category}", "{user}", "{post_id}")
|
||||
filename_fmt = "{post_id}_{title}_{num:>03}_{filename}.{extension}"
|
||||
archive_fmt = "{post_id}_{num}"
|
||||
root = "http://www.poringa.net"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.item = match.group(1)
|
||||
self.__cookies = True
|
||||
|
||||
def items(self):
|
||||
for post_id in self.posts():
|
||||
url = "{}/posts/imagenes/{}".format(self.root, post_id)
|
||||
|
||||
try:
|
||||
response = self.request(url)
|
||||
except exception.HttpError as exc:
|
||||
self.log.warning(
|
||||
"Unable to fetch posts for '%s' (%s)", post_id, exc)
|
||||
continue
|
||||
|
||||
if "/registro-login?" in response.url:
|
||||
self.log.warning("Private post '%s'", post_id)
|
||||
continue
|
||||
|
||||
page = response.text
|
||||
title, pos = text.extract(
|
||||
page, 'property="og:title" content="', '"')
|
||||
|
||||
try:
|
||||
pos = page.index('<div class="main-info', pos)
|
||||
user, pos = text.extract(
|
||||
page, 'href="http://www.poringa.net/', '"', pos)
|
||||
except ValueError:
|
||||
user = None
|
||||
|
||||
if not user:
|
||||
user = "poringa"
|
||||
|
||||
data = {
|
||||
"post_id" : post_id,
|
||||
"title" : text.unescape(title),
|
||||
"user" : text.unquote(user),
|
||||
"_http_headers": {"Referer": url},
|
||||
}
|
||||
|
||||
main_post = text.extr(
|
||||
page, 'property="dc:content" role="main">', '</div>')
|
||||
urls = list(text.extract_iter(
|
||||
main_post, '<img class="imagen" border="0" src="', '"'))
|
||||
data["count"] = len(urls)
|
||||
|
||||
yield Message.Directory, data
|
||||
for data["num"], url in enumerate(urls, 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
def posts(self):
|
||||
return ()
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
if self.__cookies:
|
||||
self.__cookies = False
|
||||
self.cookies_update(_cookie_cache())
|
||||
|
||||
for _ in range(5):
|
||||
response = Extractor.request(self, url, **kwargs)
|
||||
if response.cookies:
|
||||
_cookie_cache.update("", response.cookies)
|
||||
if response.content.find(
|
||||
b"<title>Please wait a few moments</title>", 0, 600) < 0:
|
||||
return response
|
||||
self.sleep(5.0, "check")
|
||||
|
||||
def _pagination(self, url, params):
|
||||
for params["p"] in itertools.count(1):
|
||||
page = self.request(url, params=params).text
|
||||
|
||||
posts_ids = PoringaPostExtractor.pattern.findall(page)
|
||||
posts_ids = list(dict.fromkeys(posts_ids))
|
||||
yield from posts_ids
|
||||
|
||||
if len(posts_ids) < 19:
|
||||
return
|
||||
|
||||
|
||||
class PoringaPostExtractor(PoringaExtractor):
|
||||
"""Extractor for posts on poringa.net"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"/posts/imagenes/(\d+)"
|
||||
example = "http://www.poringa.net/posts/imagenes/12345/TITLE.html"
|
||||
|
||||
def posts(self):
|
||||
return (self.item,)
|
||||
|
||||
|
||||
class PoringaUserExtractor(PoringaExtractor):
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/(\w+)$"
|
||||
example = "http://www.poringa.net/USER"
|
||||
|
||||
def posts(self):
|
||||
url = self.root + "/buscar/"
|
||||
params = {"q": self.item}
|
||||
return self._pagination(url, params)
|
||||
|
||||
|
||||
class PoringaSearchExtractor(PoringaExtractor):
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/buscar/\?&?q=([^&#]+)"
|
||||
example = "http://www.poringa.net/buscar/?q=QUERY"
|
||||
|
||||
def posts(self):
|
||||
url = self.root + "/buscar/"
|
||||
params = {"q": self.item}
|
||||
return self._pagination(url, params)
|
||||
|
||||
|
||||
@cache()
|
||||
def _cookie_cache():
|
||||
return ()
|
@ -0,0 +1,211 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://www.steamgriddb.com"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?steamgriddb\.com"
|
||||
LANGUAGE_CODES = (
|
||||
"aa", "ab", "ae", "af", "ak", "am", "an", "ar", "as", "av", "ay", "az",
|
||||
"ba", "be", "bg", "bh", "bi", "bm", "bn", "bo", "br", "bs", "ca", "ce",
|
||||
"ch", "co", "cr", "cs", "cu", "cv", "cy", "da", "de", "dv", "dz", "ee",
|
||||
"el", "en", "eo", "es", "et", "eu", "fa", "ff", "fi", "fj", "fo", "fr",
|
||||
"fy", "ga", "gd", "gl", "gn", "gu", "gv", "ha", "he", "hi", "ho", "hr",
|
||||
"ht", "hu", "hy", "hz", "ia", "id", "ie", "ig", "ii", "ik", "io", "is",
|
||||
"it", "iu", "ja", "jv", "ka", "kg", "ki", "kj", "kk", "kl", "km", "kn",
|
||||
"ko", "kr", "ks", "ku", "kv", "kw", "ky", "la", "lb", "lg", "li", "ln",
|
||||
"lo", "lt", "lu", "lv", "mg", "mh", "mi", "mk", "ml", "mn", "mr", "ms",
|
||||
"mt", "my", "na", "nb", "nd", "ne", "ng", "nl", "nn", "no", "nr", "nv",
|
||||
"ny", "oc", "oj", "om", "or", "os", "pa", "pi", "pl", "ps", "pt", "qu",
|
||||
"rm", "rn", "ro", "ru", "rw", "sa", "sc", "sd", "se", "sg", "si", "sk",
|
||||
"sl", "sm", "sn", "so", "sq", "sr", "ss", "st", "su", "sv", "sw", "ta",
|
||||
"te", "tg", "th", "ti", "tk", "tl", "tn", "to", "tr", "ts", "tt", "tw",
|
||||
"ty", "ug", "uk", "ur", "uz", "ve", "vi", "vo", "wa", "wo", "xh", "yi",
|
||||
"yo", "za", "zh", "zu",
|
||||
)
|
||||
FILE_EXT_TO_MIME = {
|
||||
"png": "image/png",
|
||||
"jpeg": "image/jpeg",
|
||||
"jpg": "image/jpeg",
|
||||
"webp": "image/webp",
|
||||
"ico": "image/vnd.microsoft.icon",
|
||||
"all": "all",
|
||||
}
|
||||
|
||||
|
||||
class SteamgriddbExtractor(Extractor):
|
||||
"""Base class for SteamGridDB"""
|
||||
category = "steamgriddb"
|
||||
directory_fmt = ("{category}", "{subcategory}", "{game[id]}")
|
||||
filename_fmt = "{game[id]}_{id}_{num:>02}.{extension}"
|
||||
archive_fmt = "{filename}"
|
||||
root = "https://www.steamgriddb.com"
|
||||
|
||||
def _init(self):
|
||||
self.cookies_update({
|
||||
"userprefs": "%7B%22adult%22%3Afalse%7D",
|
||||
})
|
||||
|
||||
def items(self):
|
||||
download_fake_png = self.config("download-fake-png", True)
|
||||
|
||||
for asset in self.assets():
|
||||
if download_fake_png and asset.get("fake_png"):
|
||||
urls = (asset["url"], asset["fake_png"])
|
||||
else:
|
||||
urls = (asset["url"],)
|
||||
|
||||
asset["count"] = len(urls)
|
||||
yield Message.Directory, asset
|
||||
for asset["num"], url in enumerate(urls, 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, asset)
|
||||
|
||||
def _call(self, endpoint, **kwargs):
|
||||
data = self.request(self.root + endpoint, **kwargs).json()
|
||||
if not data["success"]:
|
||||
raise exception.StopExtraction(data["error"])
|
||||
return data["data"]
|
||||
|
||||
|
||||
class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
|
||||
"""Base class for extracting a list of assets"""
|
||||
|
||||
def __init__(self, match):
|
||||
SteamgriddbExtractor.__init__(self, match)
|
||||
list_type = match.group(1)
|
||||
id = int(match.group(2))
|
||||
self.game_id = id if list_type == "game" else None
|
||||
self.collection_id = id if list_type == "collection" else None
|
||||
self.page = int(match.group(3) or 1)
|
||||
|
||||
def assets(self):
|
||||
limit = 48
|
||||
page = min(self.page - 1, 0)
|
||||
|
||||
sort = self.config("sort", "score_desc")
|
||||
if sort not in ("score_desc", "score_asc", "score_old_desc",
|
||||
"score_old_asc", "age_desc", "age_asc"):
|
||||
raise exception.StopExtractor("Invalid sort '%s'", sort)
|
||||
|
||||
json = {
|
||||
"static" : self.config("static", True),
|
||||
"animated": self.config("animated", True),
|
||||
"humor" : self.config("humor", True),
|
||||
"nsfw" : self.config("nsfw", True),
|
||||
"epilepsy": self.config("epilepsy", True),
|
||||
"untagged": self.config("untagged", True),
|
||||
|
||||
"asset_type": self.asset_type,
|
||||
"limit": limit,
|
||||
"order": sort,
|
||||
}
|
||||
if self.valid_dimensions:
|
||||
json["dimensions"] = self.config_list(
|
||||
"dimensions", "dimension", self.valid_dimensions)
|
||||
json["styles"] = self.config_list("styles", "style", self.valid_styles)
|
||||
json["languages"] = self.config_list(
|
||||
"languages", "language", LANGUAGE_CODES)
|
||||
file_types = self.config_list(
|
||||
"file-types", "file type", self.valid_file_types)
|
||||
json["mime"] = [FILE_EXT_TO_MIME[i] for i in file_types]
|
||||
|
||||
if self.game_id:
|
||||
json["game_id"] = [self.game_id]
|
||||
else:
|
||||
json["collection_id"] = self.collection_id
|
||||
|
||||
while True:
|
||||
json["page"] = page
|
||||
|
||||
data = self._call(
|
||||
"/api/public/search/assets", method="POST", json=json)
|
||||
for asset in data["assets"]:
|
||||
if not asset.get("game"):
|
||||
asset["game"] = data["game"]
|
||||
yield asset
|
||||
|
||||
if data["total"] <= limit * page:
|
||||
break
|
||||
page += 1
|
||||
|
||||
def config_list(self, key, type_name, valid_values):
|
||||
value = self.config(key)
|
||||
if isinstance(value, str):
|
||||
value = value.split(",")
|
||||
|
||||
if value is None or "all" in value:
|
||||
return ["all"]
|
||||
|
||||
for i in value:
|
||||
if i not in valid_values:
|
||||
raise exception.StopExtraction("Invalid %s '%s'", type_name, i)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class SteamgriddbAssetExtractor(SteamgriddbExtractor):
|
||||
"""Extractor for a single asset"""
|
||||
subcategory = "asset"
|
||||
pattern = BASE_PATTERN + r"/(grid|hero|logo|icon)/(\d+)"
|
||||
example = "https://www.steamgriddb.com/grid/1234"
|
||||
|
||||
def __init__(self, match):
|
||||
SteamgriddbExtractor.__init__(self, match)
|
||||
self.asset_type = match.group(1)
|
||||
self.asset_id = match.group(2)
|
||||
|
||||
def assets(self):
|
||||
endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
|
||||
asset = self._call(endpoint)["asset"]
|
||||
return (asset,)
|
||||
|
||||
|
||||
class SteamgriddbGridsExtractor(SteamgriddbAssetsExtractor):
|
||||
subcategory = "grids"
|
||||
asset_type = "grid"
|
||||
pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/grids(?:/(\d+))?"
|
||||
example = "https://www.steamgriddb.com/game/1234/grids"
|
||||
valid_dimensions = ("460x215", "920x430", "600x900", "342x482", "660x930",
|
||||
"512x512", "1024x1024")
|
||||
valid_styles = ("alternate", "blurred", "no_logo", "material",
|
||||
"white_logo")
|
||||
valid_file_types = ("png", "jpeg", "jpg", "webp")
|
||||
|
||||
|
||||
class SteamgriddbHeroesExtractor(SteamgriddbAssetsExtractor):
|
||||
subcategory = "heroes"
|
||||
asset_type = "hero"
|
||||
pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/heroes(?:/(\d+))?"
|
||||
example = "https://www.steamgriddb.com/game/1234/heroes"
|
||||
valid_dimensions = ("1920x620", "3840x1240", "1600x650")
|
||||
valid_styles = ("alternate", "blurred", "material")
|
||||
valid_file_types = ("png", "jpeg", "jpg", "webp")
|
||||
|
||||
|
||||
class SteamgriddbLogosExtractor(SteamgriddbAssetsExtractor):
|
||||
subcategory = "logos"
|
||||
asset_type = "logo"
|
||||
pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/logos(?:/(\d+))?"
|
||||
example = "https://www.steamgriddb.com/game/1234/logos"
|
||||
valid_dimensions = None
|
||||
valid_styles = ("official", "white", "black", "custom")
|
||||
valid_file_types = ("png", "webp")
|
||||
|
||||
|
||||
class SteamgriddbIconsExtractor(SteamgriddbAssetsExtractor):
|
||||
subcategory = "icons"
|
||||
asset_type = "icon"
|
||||
pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/icons(?:/(\d+))?"
|
||||
example = "https://www.steamgriddb.com/game/1234/icons"
|
||||
valid_dimensions = ["{0}x{0}".format(i) for i in (8, 10, 14, 16, 20, 24,
|
||||
28, 32, 35, 40, 48, 54, 56, 57, 60, 64, 72, 76, 80, 90,
|
||||
96, 100, 114, 120, 128, 144, 150, 152, 160, 180, 192,
|
||||
194, 256, 310, 512, 768, 1024)]
|
||||
valid_styles = ("official", "custom")
|
||||
valid_file_types = ("png", "ico")
|
@ -0,0 +1,144 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Ailothaen
|
||||
# Copyright 2024 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for Wikimedia and Wikipedia"""
|
||||
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
class WikimediaExtractor(BaseExtractor):
|
||||
"""Base class for wikimedia extractors"""
|
||||
basecategory = "wikimedia"
|
||||
directory_fmt = ("{category}", "{page}")
|
||||
archive_fmt = "{sha1}"
|
||||
request_interval = (1.0, 2.0)
|
||||
|
||||
def __init__(self, match):
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.title = match.group(match.lastindex)
|
||||
|
||||
def items(self):
|
||||
for info in self._pagination(self.params):
|
||||
image = info["imageinfo"][0]
|
||||
|
||||
image["metadata"] = {
|
||||
m["name"]: m["value"]
|
||||
for m in image["metadata"]}
|
||||
image["commonmetadata"] = {
|
||||
m["name"]: m["value"]
|
||||
for m in image["commonmetadata"]}
|
||||
|
||||
filename = image["canonicaltitle"]
|
||||
image["filename"], _, image["extension"] = \
|
||||
filename.partition(":")[2].rpartition(".")
|
||||
image["date"] = text.parse_datetime(
|
||||
image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
|
||||
image["page"] = self.title
|
||||
|
||||
yield Message.Directory, image
|
||||
yield Message.Url, image["url"], image
|
||||
|
||||
def _pagination(self, params):
|
||||
"""
|
||||
https://www.mediawiki.org/wiki/API:Query
|
||||
https://opendata.stackexchange.com/questions/13381
|
||||
"""
|
||||
|
||||
url = self.root + "/w/api.php"
|
||||
params["action"] = "query"
|
||||
params["format"] = "json"
|
||||
|
||||
while True:
|
||||
data = self.request(url, params=params).json()
|
||||
|
||||
try:
|
||||
pages = data["query"]["pages"]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
yield from pages.values()
|
||||
|
||||
try:
|
||||
continuation = data["continue"]
|
||||
except KeyError:
|
||||
break
|
||||
params.update(continuation)
|
||||
|
||||
|
||||
BASE_PATTERN = WikimediaExtractor.update({
|
||||
"wikipedia": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikipedia\.org",
|
||||
},
|
||||
"wiktionary": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wiktionary\.org",
|
||||
},
|
||||
"wikiquote": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikiquote\.org",
|
||||
},
|
||||
"wikibooks": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikibooks\.org",
|
||||
},
|
||||
"wikisource": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikisource\.org",
|
||||
},
|
||||
"wikinews": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikinews\.org",
|
||||
},
|
||||
"wikiversity": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikiversity\.org",
|
||||
},
|
||||
"wikispecies": {
|
||||
"root": "https://species.wikimedia.org",
|
||||
"pattern": r"species\.wikimedia\.org",
|
||||
},
|
||||
"wikimediacommons": {
|
||||
"root": "https://commons.wikimedia.org",
|
||||
"pattern": r"commons\.wikimedia\.org",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class WikimediaArticleExtractor(WikimediaExtractor):
|
||||
"""Extractor for wikimedia articles"""
|
||||
subcategory = "article"
|
||||
pattern = BASE_PATTERN + r"/wiki/(?!Category:)([^/?#]+)"
|
||||
example = "https://en.wikipedia.org/wiki/TITLE"
|
||||
|
||||
def _init(self):
|
||||
self.params = {
|
||||
"generator": "images",
|
||||
"titles" : self.title,
|
||||
"prop" : "imageinfo",
|
||||
"iiprop": "timestamp|user|userid|comment|canonicaltitle|url|size|"
|
||||
"sha1|mime|metadata|commonmetadata|extmetadata|bitdepth",
|
||||
}
|
||||
|
||||
|
||||
class WikimediaCategoryExtractor(WikimediaExtractor):
|
||||
subcategory = "category"
|
||||
pattern = BASE_PATTERN + r"/wiki/(Category:[^/?#]+)"
|
||||
example = "https://commons.wikimedia.org/wiki/Category:NAME"
|
||||
|
||||
def _init(self):
|
||||
self.params = {
|
||||
"generator": "categorymembers",
|
||||
"gcmtitle" : self.title,
|
||||
"gcmtype" : "file",
|
||||
"prop" : "imageinfo",
|
||||
"iiprop": "timestamp|user|userid|comment|canonicaltitle|url|size|"
|
||||
"sha1|mime|metadata|commonmetadata|extmetadata|bitdepth",
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from .common import GalleryExtractor
|
||||
from .. import text
|
||||
|
||||
|
||||
class ZzupGalleryExtractor(GalleryExtractor):
|
||||
category = "zzup"
|
||||
directory_fmt = ("{category}", "{title}")
|
||||
filename_fmt = "{slug}_{num:>03}.{extension}"
|
||||
archive_fmt = "{slug}_{num}"
|
||||
root = "https://zzup.com"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?zzup\.com(/content"
|
||||
r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html")
|
||||
example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html"
|
||||
|
||||
def __init__(self, match):
|
||||
url = "{}/{}/index.html".format(self.root, match.group(1))
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
self.slug = match.group(2)
|
||||
|
||||
def metadata(self, page):
|
||||
return {
|
||||
"slug" : self.slug,
|
||||
"title": text.unescape(text.extr(
|
||||
page, "<title>", "</title>"))[:-11],
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
path = text.extr(page, 'class="picbox"><a target="_blank" href="', '"')
|
||||
count = text.parse_int(text.extr(path, "-pics-", "-mirror"))
|
||||
page = self.request(self.root + path).text
|
||||
url = self.root + text.extr(page, '\n<a href="', '"')
|
||||
p1, _, p2 = url.partition("/image0")
|
||||
ufmt = p1 + "/image{:>05}" + p2[4:]
|
||||
return [(ufmt.format(num), None) for num in range(1, count + 1)]
|
@ -1,7 +1,8 @@
|
||||
[flake8]
|
||||
exclude = .git,__pycache__,build,dist,archive,results
|
||||
exclude = .git,__pycache__,build,dist,archive
|
||||
ignore = E203,E226,W504
|
||||
per-file-ignores =
|
||||
setup.py: E501
|
||||
gallery_dl/extractor/500px.py: E501
|
||||
gallery_dl/extractor/mangapark.py: E501
|
||||
test/results/*.py: E122,E241,E402,E501
|
||||
|
@ -0,0 +1,64 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
gallery_dl = __import__("gallery_dl.extractor.2ch")
|
||||
_2ch = getattr(gallery_dl.extractor, "2ch")
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://2ch.hk/a/res/6202876.html",
|
||||
"#category": ("", "2ch", "thread"),
|
||||
"#class" : _2ch._2chThreadExtractor,
|
||||
"#pattern" : r"https://2ch\.hk/a/src/6202876/\d+\.\w+",
|
||||
"#count" : range(450, 1000),
|
||||
|
||||
"banned" : 0,
|
||||
"board" : "a",
|
||||
"closed" : 0,
|
||||
"comment" : str,
|
||||
"date" : "type:datetime",
|
||||
"displayname": str,
|
||||
"email" : "",
|
||||
"endless" : 1,
|
||||
"extension": str,
|
||||
"filename" : str,
|
||||
"fullname" : str,
|
||||
"height" : int,
|
||||
"lasthit" : 1705273977,
|
||||
"md5" : r"re:[0-9a-f]{32}",
|
||||
"name" : r"re:\d+\.\w+",
|
||||
"num" : int,
|
||||
"number" : range(1, 1000),
|
||||
"op" : 0,
|
||||
"parent" : int,
|
||||
"path" : r"re:/a/src/6202876/\d+\.\w+",
|
||||
"post_name": "Аноним",
|
||||
"size" : int,
|
||||
"sticky" : 0,
|
||||
"subject" : str,
|
||||
"thread" : "6202876",
|
||||
"thumbnail": str,
|
||||
"tim" : r"re:\d+",
|
||||
"timestamp": int,
|
||||
"title" : "MP4/WEBM",
|
||||
"tn_height": int,
|
||||
"tn_width" : int,
|
||||
"trip" : "",
|
||||
"type" : int,
|
||||
"views" : int,
|
||||
"width" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://2ch.hk/a/",
|
||||
"#category": ("", "2ch", "board"),
|
||||
"#class" : _2ch._2chBoardExtractor,
|
||||
"#pattern" : _2ch._2chThreadExtractor.pattern,
|
||||
"#count" : range(200, 300),
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,144 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import hatenablog
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://cosmiclatte.hatenablog.com/entry/2020/05/28/003227",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
"#count" : 20,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://moko0908.hatenablog.jp/entry/2023/12/31/083846",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://p-shirokuma.hatenadiary.com/entry/20231227/1703685600",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://urakatahero.hateblo.jp/entry/2ndlife",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "hatenablog:https://blog.hyouhon.com/entry/2023/12/22/133549",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cetriolo.hatenablog.com",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
"#range" : "1-7",
|
||||
"#count" : 7,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://moko0908.hatenablog.jp/",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://p-shirokuma.hatenadiary.com/",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://urakatahero.hateblo.jp/",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "hatenablog:https://blog.hyouhon.com/",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : ("https://8saki.hatenablog.com/archive/category/%E3%82%BB%E3"
|
||||
"%83%AB%E3%83%95%E3%82%B8%E3%82%A7%E3%83%AB%E3%83%8D%E3%82"
|
||||
"%A4%E3%83%AB"),
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
"#range" : "1-30",
|
||||
"#count" : 30,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://moko0908.hatenablog.jp/archive/2023",
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
"#count" : 13,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://p-shirokuma.hatenadiary.com/archive/2023/01",
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
"#count" : 5,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://urakatahero.hateblo.jp/archive",
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
"#range" : "1-30",
|
||||
"#count" : 30,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "hatenablog:https://blog.hyouhon.com/archive/2024/01/01",
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "hatenablog:https://blog.hyouhon.com/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
"#range" : "1-30",
|
||||
"#count" : 30,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cosmiclatte.hatenablog.com/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://moko0908.hatenablog.jp/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://p-shirokuma.hatenadiary.com/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://urakatahero.hateblo.jp/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -1,56 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import nudecollect
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.nudecollect.com/content/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/image-4-pics-108-mirror-43.html",
|
||||
"#category": ("", "nudecollect", "image"),
|
||||
"#class" : nudecollect.NudecollectImageExtractor,
|
||||
"#pattern" : r"https://mirror\d+\.nudecollect\.com/showimage/nudecollect-8769086487/image00004-5896498214-43-9689595623/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/9879560327/nudecollect\.com\.jpg",
|
||||
|
||||
"slug" : "20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust",
|
||||
"title" : "20201220 Teenpornstorage Patritcy Vanessa Lesbian Lust",
|
||||
"num" : 4,
|
||||
"count" : 108,
|
||||
"mirror": 43,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.nudecollect.com/content/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/image-10-pics-108-mirror-43.html",
|
||||
"#category": ("", "nudecollect", "image"),
|
||||
"#class" : nudecollect.NudecollectImageExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.nudecollect.com/content/20170219_TheWhiteBoxxx_Caprice_Tracy_Loves_Hot_ass_fingering_and_sensual_lesbian_sex_with_alluring_Czech_babes_x125_1080px/index-mirror-67-125.html",
|
||||
"#category": ("", "nudecollect", "album"),
|
||||
"#class" : nudecollect.NudecollectAlbumExtractor,
|
||||
"#pattern" : r"https://mirror\d+\.nudecollect\.com/showimage/nudecollect-8769086487/image00\d\d\d-5896498214-67-9689595623/20170219_TheWhiteBoxxx_Caprice_Tracy_Loves_Hot_ass_fingering_and_sensual_lesbian_sex_with_alluring_Czech_babes_x125_1080px/9879560327/nudecollect\.com\.jpg",
|
||||
"#count" : 125,
|
||||
|
||||
"slug" : "20170219_TheWhiteBoxxx_Caprice_Tracy_Loves_Hot_ass_fingering_and_sensual_lesbian_sex_with_alluring_Czech_babes_x125_1080px",
|
||||
"title" : "20170219 TheWhiteBoxxx Caprice Tracy Loves Hot ass fingering and sensual lesbian sex with alluring Czech babes x125 1080px",
|
||||
"num" : int,
|
||||
"mirror": 67,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.nudecollect.com/content/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/page-1-pics-108-mirror-43.html",
|
||||
"#category": ("", "nudecollect", "album"),
|
||||
"#class" : nudecollect.NudecollectAlbumExtractor,
|
||||
"#pattern" : r"https://mirror\d+\.nudecollect\.com/showimage/nudecollect-8769086487/image00\d\d\d-5896498214-43-9689595623/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/9879560327/nudecollect\.com\.jpg",
|
||||
"#count" : 108,
|
||||
|
||||
"slug" : "20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust",
|
||||
"title" : "20201220 Teenpornstorage Patritcy Vanessa Lesbian Lust",
|
||||
"num" : int,
|
||||
"mirror": 43,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import poringa
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "http://www.poringa.net/posts/imagenes/3051081/Turrita-alto-ojete.html",
|
||||
"#category": ("", "poringa", "post"),
|
||||
"#class" : poringa.PoringaPostExtractor,
|
||||
"#count" : 26,
|
||||
|
||||
"count" : 26,
|
||||
"num" : range(1, 26),
|
||||
"post_id" : "3051081",
|
||||
"title" : "turrita alto ojete...",
|
||||
"user" : "vipower1top",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "http://www.poringa.net/posts/imagenes/3095554/Otra-culona-de-instagram.html",
|
||||
"#category": ("", "poringa", "post"),
|
||||
"#class" : poringa.PoringaPostExtractor,
|
||||
"#count" : 15,
|
||||
|
||||
"count" : 15,
|
||||
"num" : range(1, 15),
|
||||
"post_id" : "3095554",
|
||||
"title" : "Otra culona de instagram",
|
||||
"user" : "Expectro007",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "http://www.poringa.net/Expectro007",
|
||||
"#category": ("", "poringa", "user"),
|
||||
"#class" : poringa.PoringaUserExtractor,
|
||||
"#pattern" : r"https?://img-\d+\.poringa\.net/poringa/img/././././././Expectro007/\w{3}\.(jpg|png|gif)",
|
||||
"#count" : range(500, 600),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "http://www.poringa.net/buscar/?&q=yuslopez",
|
||||
"#category": ("", "poringa", "search"),
|
||||
"#class" : poringa.PoringaSearchExtractor,
|
||||
"#pattern" : r"https?://img-\d+\.poringa\.net/poringa/img/././././././\w+/\w{3}\.(jpg|png|gif)",
|
||||
"#range" : "1-50",
|
||||
"#count" : 50,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,79 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import szurubooru
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://snootbooru.com/posts/query=sport",
|
||||
"#category": ("szurubooru", "snootbooru", "tag"),
|
||||
"#class" : szurubooru.SzurubooruTagExtractor,
|
||||
"#pattern" : r"https://snootbooru\.com/data/posts/\d+_[0-9a-f]{16}\.\w+",
|
||||
"#count" : range(35, 50),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://snootbooru.com/post/14511",
|
||||
"#category": ("szurubooru", "snootbooru", "post"),
|
||||
"#class" : szurubooru.SzurubooruPostExtractor,
|
||||
"#urls" : "https://snootbooru.com/data/posts/14511_e753313112755da6.png",
|
||||
"#sha1_content": "e69e61e61c5372514808480aae3a8e355c9cd6fb",
|
||||
|
||||
"canvasHeight" : 1000,
|
||||
"canvasWidth" : 1414,
|
||||
"checksum" : "e69e61e61c5372514808480aae3a8e355c9cd6fb",
|
||||
"checksumMD5" : "f4f4ddfcbdf367f466ede0980acb3d7d",
|
||||
"commentCount" : int,
|
||||
"comments" : list,
|
||||
"contentUrl" : "data/posts/14511_e753313112755da6.png",
|
||||
"creationTime" : "2023-12-02T01:11:01.433664Z",
|
||||
"date" : "dt:2023-12-02 01:11:01",
|
||||
"extension" : "png",
|
||||
"favoriteCount": int,
|
||||
"favoritedBy" : list,
|
||||
"featureCount" : int,
|
||||
"fileSize" : 270639,
|
||||
"filename" : "14511_e753313112755da6",
|
||||
"flags" : [],
|
||||
"hasCustomThumbnail": False,
|
||||
"id" : 14511,
|
||||
"lastEditTime" : "2023-12-02T01:12:09.500217Z",
|
||||
"lastFeatureTime": None,
|
||||
"mimeType" : "image/png",
|
||||
"noteCount" : 0,
|
||||
"notes" : [],
|
||||
"ownFavorite" : False,
|
||||
"ownScore" : 0,
|
||||
"pools" : [],
|
||||
"relationCount": 0,
|
||||
"relations" : [],
|
||||
"safety" : "safe",
|
||||
"score" : 0,
|
||||
"source" : None,
|
||||
"tagCount" : 3,
|
||||
"tags" : [
|
||||
"transparent",
|
||||
"sport",
|
||||
"text",
|
||||
],
|
||||
"tags_default" : [
|
||||
"sport",
|
||||
"text"
|
||||
],
|
||||
"tags_type" : [
|
||||
"transparent"
|
||||
],
|
||||
"thumbnailUrl" : "data/generated-thumbnails/14511_e753313112755da6.jpg",
|
||||
"type" : "image",
|
||||
"user" : {
|
||||
"avatarUrl": "data/avatars/komp.png",
|
||||
"name": "komp"
|
||||
},
|
||||
"version" : 2,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,124 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import steamgriddb
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/grid/368023",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
"#urls" : ("https://cdn2.steamgriddb.com/grid/"
|
||||
"82fee171d62c044898d99ba0fddeb203.png"),
|
||||
"#count" : 1,
|
||||
"#sha1_content": "0bffaccae6f35f9fab529684a5b158d1cec4186b",
|
||||
|
||||
"game": {
|
||||
"id" : 5259324,
|
||||
"name": "Helltaker",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/grid/132605",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
"#count" : 2,
|
||||
"#sha1_url" : "4ff9158c008a1f01921d7553bcabf5e6204cdc79",
|
||||
"#sha1_content": "bc16c5eebf71463abdb33cfbf4b45a2fe092a2b2",
|
||||
|
||||
"game": {
|
||||
"id" : 5247997,
|
||||
"name": "OMORI",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/grid/132605",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
"#options" : {"download-fake-png": False},
|
||||
"#count" : 1,
|
||||
"#sha1_url" : "f6819c593ff65f15864796fb89581f05d21adddb",
|
||||
"#sha1_content": "0d9e6114dd8bb9699182fbb7c6bd9064d8b0b6cd",
|
||||
|
||||
"game": {
|
||||
"id" : 5247997,
|
||||
"name": "OMORI",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/hero/61104",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/logo/9610",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/icon/173",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5259324/grids",
|
||||
"#category": ("", "steamgriddb", "grids"),
|
||||
"#class" : steamgriddb.SteamgriddbGridsExtractor,
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5259324/grids",
|
||||
"#category": ("", "steamgriddb", "grids"),
|
||||
"#class" : steamgriddb.SteamgriddbGridsExtractor,
|
||||
"#options" : {"humor": False, "epilepsy": False, "untagged": False},
|
||||
"#range" : "1-33",
|
||||
"#count" : 33,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5331605/heroes",
|
||||
"#category": ("", "steamgriddb", "heroes"),
|
||||
"#class" : steamgriddb.SteamgriddbHeroesExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5255394/logos",
|
||||
"#category": ("", "steamgriddb", "logos"),
|
||||
"#class" : steamgriddb.SteamgriddbLogosExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5279790/icons",
|
||||
"#category": ("", "steamgriddb", "icons"),
|
||||
"#class" : steamgriddb.SteamgriddbIconsExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/collection/332/grids",
|
||||
"#category": ("", "steamgriddb", "grids"),
|
||||
"#class" : steamgriddb.SteamgriddbGridsExtractor,
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/collection/332/heroes",
|
||||
"#category": ("", "steamgriddb", "heroes"),
|
||||
"#class" : steamgriddb.SteamgriddbHeroesExtractor,
|
||||
"#options" : {"animated": False},
|
||||
"#count" : 0,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikibooks.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikibooks", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikibooks.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikibooks", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://commons.wikimedia.org/wiki/File:Starr-050516-1367-Pimenta_dioica-flowers-Maunaloa-Molokai_(24762757525).jpg",
|
||||
"#category": ("wikimedia", "wikimediacommons", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://commons.wikimedia.org/wiki/Category:Network_maps_of_the_Paris_Metro",
|
||||
"#category": ("wikimedia", "wikimediacommons", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikinews.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikinews", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikinews.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikinews", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikipedia.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikipedia", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikipedia.org/wiki/Athena",
|
||||
"#category": ("wikimedia", "wikipedia", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
"#pattern" : r"https://upload.wikimedia.org/wikipedia/.+",
|
||||
"#count" : range(50, 100),
|
||||
|
||||
"bitdepth" : int,
|
||||
"canonicaltitle": str,
|
||||
"comment" : str,
|
||||
"commonmetadata": dict,
|
||||
"date" : "type:datetime",
|
||||
"descriptionshorturl": str,
|
||||
"descriptionurl": str,
|
||||
"extension" : str,
|
||||
"extmetadata" : dict,
|
||||
"filename" : str,
|
||||
"height" : int,
|
||||
"metadata" : dict,
|
||||
"mime" : r"re:image/\w+",
|
||||
"page" : "Athena",
|
||||
"sha1" : r"re:^[0-9a-f]{40}$",
|
||||
"size" : int,
|
||||
"timestamp" : str,
|
||||
"url" : str,
|
||||
"user" : str,
|
||||
"userid" : int,
|
||||
"width" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikipedia.org/wiki/Category:Physics",
|
||||
"#category": ("wikimedia", "wikipedia", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikiquote.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikiquote", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikiquote.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikiquote", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikisource.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikisource", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikisource.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikisource", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,25 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://species.wikimedia.org/wiki/Geranospiza",
|
||||
"#category": ("wikimedia", "wikispecies", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
"#urls" : "https://upload.wikimedia.org/wikipedia/commons/0/01/Geranospiza_caerulescens.jpg",
|
||||
"#sha1_content": "3a17c14b15489928e4154f826af1c42afb5a523e",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://species.wikimedia.org/wiki/Category:Names",
|
||||
"#category": ("wikimedia", "wikispecies", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikiversity.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikiversity", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikiversity.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikiversity", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wiktionary.org/wiki/Word",
|
||||
"#category": ("wikimedia", "wiktionary", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wiktionary.org/wiki/Category:Words",
|
||||
"#category": ("wikimedia", "wiktionary", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import zzup
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://zzup.com/content/NjM=/MetArt_20080206_viki_c_sensazioni_by_ingret/OTE=/index.html",
|
||||
"#category": ("", "zzup", "gallery"),
|
||||
"#class" : zzup.ZzupGalleryExtractor,
|
||||
"#pattern" : r"https://zzup\.com/MjAxNjc3OTIyMjE5Nzk=/showimage/zzup-8769086487/image00\d\d\d-5896498214-1-9689595623/MetArt-20080206_viki_c_sensazioni_by_ingret/9879560327/zzup.com.jpg",
|
||||
|
||||
"slug" : "MetArt_20080206_viki_c_sensazioni_by_ingret",
|
||||
"title" : "MetArt 20080206 viki c sensazioni by ingret",
|
||||
"num" : int,
|
||||
"count" : 135,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://zzup.com/content/MTc2MDYxMw==/Courtesan/NDA=/page-1.html",
|
||||
"#category": ("", "zzup", "gallery"),
|
||||
"#class" : zzup.ZzupGalleryExtractor,
|
||||
"#pattern" : r"https://zzup.com/MjAxNjc3OTIyMjE5Nzk=/showimage/zzup-8769086487/image000\d\d-5896498214-40-9689595623/Courtesan/9879560327/zzup.com.jpg",
|
||||
},
|
||||
|
||||
)
|
Loading…
Reference in new issue