Merge branch '1.17.0'

pull/1331/head
Mike Fährmann 4 years ago
commit fbfcbcbf57
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Mike Fährmann
# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -8,7 +8,7 @@
"""Extractors for *booru sites"""
from .common import Extractor, Message, generate_extractors
from .common import BaseExtractor, Message
from .. import text, util, exception
from xml.etree import ElementTree
@ -17,7 +17,7 @@ import operator
import re
class BooruExtractor(Extractor):
class BooruExtractor(BaseExtractor):
"""Base class for *booru extractors"""
basecategory = "booru"
filename_fmt = "{category}_{id}_{md5}.{extension}"
@ -104,14 +104,55 @@ class BooruExtractor(Extractor):
params["pid"] += 1
BASE_PATTERN = BooruExtractor.update({
"rule34": {
"root": "https://rule34.xxx",
},
"safebooru": {
"root": "https://safebooru.org",
},
"realbooru": {
"root": "https://realbooru.com",
},
})
class BooruPostExtractor(BooruExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
test = (
("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"options": (("tags", True),),
"keyword": {
"tags_artist": "danraku",
"tags_character": "kashima_(kantai_collection)",
"tags_copyright": "kantai_collection",
"tags_general": str,
"tags_metadata": str,
},
}),
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
"options": (("tags", True),),
"keyword": {
"tags_artist": "kawanakajima",
"tags_character": "heath_ledger ronald_mcdonald the_joker",
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
"tags_general": str,
},
}),
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
}),
)
def __init__(self, match):
BooruExtractor.__init__(self, match)
self.post_id = match.group(1)
self.post_id = match.group(match.lastindex)
def posts(self):
return self._pagination({"id": self.post_id})
@ -121,11 +162,26 @@ class BooruTagExtractor(BooruExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
test = (
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 1,
}),
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
}),
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
"count": ">= 64",
}),
)
def __init__(self, match):
BooruExtractor.__init__(self, match)
self.tags = text.unquote(match.group(1).replace("+", " "))
tags = match.group(match.lastindex)
self.tags = text.unquote(tags.replace("+", " "))
def metadata(self):
return {"search_tags": self.tags}
@ -138,11 +194,22 @@ class BooruPoolExtractor(BooruExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)"
pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)"
test = (
("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
"count": 3,
}),
("https://safebooru.org/index.php?page=pool&s=show&id=11", {
"count": 5,
}),
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
"count": 3,
}),
)
def __init__(self, match):
BooruExtractor.__init__(self, match)
self.pool_id = match.group(1)
self.pool_id = match.group(match.lastindex)
self.post_ids = ()
def skip(self, num):
@ -170,87 +237,3 @@ class BooruPoolExtractor(BooruExtractor):
for params["id"] in util.advance(self.post_ids, self.page_start):
for post in self._api_request(params):
yield post.attrib
EXTRACTORS = {
"rule34": {
"root": "https://rule34.xxx",
"test-tag": (
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 1,
}),
),
"test-pool": (
("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
"count": 3,
}),
),
"test-post": (
("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"options": (("tags", True),),
"keyword": {
"tags_artist": "danraku",
"tags_character": "kashima_(kantai_collection)",
"tags_copyright": "kantai_collection",
"tags_general": str,
"tags_metadata": str,
},
}),
),
},
"safebooru": {
"root": "https://safebooru.org",
"test-tag": (
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
}),
),
"test-pool": (
("https://safebooru.org/index.php?page=pool&s=show&id=11", {
"count": 5,
}),
),
"test-post": (
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
"options": (("tags", True),),
"keyword": {
"tags_artist": "kawanakajima",
"tags_character": "heath_ledger ronald_mcdonald the_joker",
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
"tags_general": str,
},
}),
),
},
"realbooru": {
"root": "https://realbooru.com",
"test-tag": (
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
"count": ">= 64",
}),
),
"test-pool": (
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
"count": 3,
}),
),
"test-post": (
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
}),
),
},
}
generate_extractors(EXTRACTORS, globals(), (
BooruTagExtractor,
BooruPoolExtractor,
BooruPostExtractor,
))

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2020 Mike Fährmann
# Copyright 2014-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -527,46 +527,37 @@ class AsynchronousMixin():
messages.put(None)
def generate_extractors(extractor_data, symtable, classes):
"""Dynamically generate Extractor classes"""
extractors = config.get(("extractor",), classes[0].basecategory)
ckey = extractor_data.get("_ckey")
prev = None
class BaseExtractor(Extractor):
instances = None
if extractors:
extractor_data.update(extractors)
for category, info in extractor_data.items():
if not isinstance(info, dict) or "root" not in info:
continue
def __init__(self, match):
if not self.category:
for index, group in enumerate(match.groups()):
if group is not None:
self.category, self.root = self.instances[index]
break
Extractor.__init__(self, match)
@classmethod
def update(cls, instances):
extra_instances = config.get(("extractor",), cls.basecategory)
if extra_instances:
for category, info in extra_instances.items():
if isinstance(info, dict) and "root" in info:
instances[category] = info
pattern_list = []
instance_list = cls.instances = []
for category, info in instances.items():
root = info["root"]
domain = root[root.index(":") + 3:]
pattern = info.get("pattern") or re.escape(domain)
name = (info.get("name") or category).capitalize()
instance_list.append((category, root))
for cls in classes:
pattern = info.get("pattern")
if not pattern:
pattern = re.escape(root[root.index(":") + 3:])
pattern_list.append(pattern + "()")
class Extr(cls):
pass
Extr.__module__ = cls.__module__
Extr.__name__ = Extr.__qualname__ = \
name + cls.subcategory.capitalize() + "Extractor"
Extr.__doc__ = \
"Extractor for " + cls.subcategory + "s from " + domain
Extr.category = category
Extr.pattern = r"(?:https?://)?" + pattern + cls.pattern_fmt
Extr.test = info.get("test-" + cls.subcategory)
Extr.root = root
if "extra" in info:
for key, value in info["extra"].items():
setattr(Extr, key, value)
if prev and ckey:
setattr(Extr, ckey, prev)
symtable[Extr.__name__] = prev = Extr
return r"(?:https?://)?(?:" + "|".join(pattern_list) + r")"
# Undo automatic pyOpenSSL injection by requests

@ -8,21 +8,21 @@
"""Extractors for 4chan archives based on FoolFuuka"""
from .common import Extractor, Message, generate_extractors
from .common import BaseExtractor, Message
from .. import text
import itertools
class FoolfuukaExtractor(Extractor):
class FoolfuukaExtractor(BaseExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
basecategory = "foolfuuka"
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
external = "default"
def __init__(self, match):
Extractor.__init__(self, match)
BaseExtractor.__init__(self, match)
self.session.headers["Referer"] = self.root
if self.external == "direct":
if self.category == "b4k":
self.remote = self._remote_direct
def items(self):
@ -43,7 +43,7 @@ class FoolfuukaExtractor(Extractor):
yield Message.Url, url, post
def metadata(self):
""" """
"""Return general metadata"""
def posts(self):
"""Return an iterable with all relevant posts"""
@ -59,16 +59,90 @@ class FoolfuukaExtractor(Extractor):
return media["remote_media_link"]
BASE_PATTERN = FoolfuukaExtractor.update({
"4plebs": {
"root": "https://archive.4plebs.org",
"pattern": r"(?:archive\.)?4plebs\.org",
},
"archivedmoe": {
"root": "https://archived.moe",
},
"archiveofsins": {
"root": "https://archiveofsins.com",
"pattern": r"(?:www\.)?archiveofsins\.com",
},
"b4k": {
"root": "https://arch.b4k.co",
},
"desuarchive": {
"root": "https://desuarchive.org",
},
"fireden": {
"root": "https://boards.fireden.net",
},
"nyafuu": {
"root": "https://archive.nyafuu.org",
"pattern": r"(?:archive\.)?nyafuu\.org",
},
"rbt": {
"root": "https://rbt.asia",
"pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
},
"thebarchive": {
"root": "https://thebarchive.com",
"pattern": r"thebarchive\.com",
},
})
class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"""Base extractor for threads on FoolFuuka based boards/archives"""
subcategory = "thread"
directory_fmt = ("{category}", "{board[shortname]}",
"{thread_num}{title:? - //}")
pattern_fmt = r"/([^/?#]+)/thread/(\d+)"
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
test = (
("https://archive.4plebs.org/tg/thread/54059290", {
"url": "07452944164b602502b02b24521f8cee5c484d2a",
}),
("https://archived.moe/gd/thread/309639/", {
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
}),
("https://archived.moe/a/thread/159767162/", {
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
}),
("https://archiveofsins.com/h/thread/4668813/", {
"url": "f612d287087e10a228ef69517cf811539db9a102",
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
}),
("https://arch.b4k.co/meta/thread/196/", {
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
}),
("https://desuarchive.org/a/thread/159542679/", {
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
}),
("https://boards.fireden.net/sci/thread/11264294/", {
"url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
}),
("https://archive.nyafuu.org/c/thread/2849220/", {
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
}),
("https://rbt.asia/g/thread/61487650/", {
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
("https://thebarchive.com/b/thread/739772332/", {
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
}),
)
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.board, self.thread = match.groups()
self.board = match.group(match.lastindex-1)
self.thread = match.group(match.lastindex)
self.data = None
def metadata(self):
@ -78,23 +152,34 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
return self.data["op"]
def posts(self):
op = (self.data["op"],)
posts = self.data.get("posts")
if posts:
posts = list(posts.values())
posts.sort(key=lambda p: p["timestamp"])
else:
posts = ()
return itertools.chain((self.data["op"],), posts)
return itertools.chain(op, posts)
return op
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
pattern_fmt = r"/([^/?#]+)/\d*$"
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
test = (
("https://archive.4plebs.org/tg/"),
("https://archived.moe/gd/"),
("https://archiveofsins.com/h/"),
("https://arch.b4k.co/meta/"),
("https://desuarchive.org/a/"),
("https://boards.fireden.net/sci/"),
("https://archive.nyafuu.org/c/"),
("https://rbt.asia/g/"),
("https://thebarchive.com/b/"),
)
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.board = match.group(1)
self.board = match.group(match.lastindex)
def items(self):
index_base = "{}/_/api/chan/index/?board={}&page=".format(
@ -113,7 +198,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
for num, thread in threads.items():
thread["url"] = thread_base + format(num)
thread["_extractor"] = self.childclass
thread["_extractor"] = FoolfuukaThreadExtractor
yield Message.Queue, thread["url"], thread
@ -121,15 +206,24 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
"""Base extractor for search results on FoolFuuka based boards/archives"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search}")
pattern_fmt = r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
request_interval = 1.0
test = (
("https://archive.4plebs.org/_/search/text/test/"),
("https://archived.moe/_/search/text/test/"),
("https://archiveofsins.com/_/search/text/test/"),
("https://archiveofsins.com/_/search/text/test/"),
("https://desuarchive.org/_/search/text/test/"),
("https://boards.fireden.net/_/search/text/test/"),
("https://archive.nyafuu.org/_/search/text/test/"),
("https://rbt.asia/_/search/text/test/"),
("https://thebarchive.com/_/search/text/test/"),
)
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
board, search = match.groups()
self.params = params = {}
args = search.split("/")
args = match.group(match.lastindex).split("/")
key = None
for arg in args:
@ -138,6 +232,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
key = None
else:
key = arg
board = match.group(match.lastindex-1)
if board != "_":
params["boards"] = board
@ -170,105 +266,3 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
if len(posts) <= 3:
return
params["page"] += 1
EXTRACTORS = {
"4plebs": {
"name": "_4plebs",
"root": "https://archive.4plebs.org",
"pattern": r"(?:archive\.)?4plebs\.org",
"test-thread": ("https://archive.4plebs.org/tg/thread/54059290", {
"url": "07452944164b602502b02b24521f8cee5c484d2a",
}),
"test-board": ("https://archive.4plebs.org/tg/",),
"test-search": ("https://archive.4plebs.org/_/search/text/test/",),
},
"archivedmoe": {
"root": "https://archived.moe",
"test-thread": (
("https://archived.moe/gd/thread/309639/", {
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
}),
("https://archived.moe/a/thread/159767162/", {
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
}),
),
"test-board": ("https://archived.moe/gd/",),
"test-search": ("https://archived.moe/_/search/text/test/",),
},
"archiveofsins": {
"root": "https://archiveofsins.com",
"pattern": r"(?:www\.)?archiveofsins\.com",
"test-thread": ("https://archiveofsins.com/h/thread/4668813/", {
"url": "f612d287087e10a228ef69517cf811539db9a102",
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
}),
"test-board": ("https://archiveofsins.com/h/",),
"test-search": ("https://archiveofsins.com/_/search/text/test/",),
},
"b4k": {
"root": "https://arch.b4k.co",
"extra": {"external": "direct"},
"test-thread": ("https://arch.b4k.co/meta/thread/196/", {
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
}),
"test-board": ("https://arch.b4k.co/meta/",),
"test-search": ("https://arch.b4k.co/_/search/text/test/",),
},
"desuarchive": {
"root": "https://desuarchive.org",
"test-thread": ("https://desuarchive.org/a/thread/159542679/", {
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
}),
"test-board": ("https://desuarchive.org/a/",),
"test-search": ("https://desuarchive.org/_/search/text/test/",),
},
"fireden": {
"root": "https://boards.fireden.net",
"test-thread": ("https://boards.fireden.net/sci/thread/11264294/", {
"url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
}),
"test-board": ("https://boards.fireden.net/sci/",),
"test-search": ("https://boards.fireden.net/_/search/text/test/",),
},
"nyafuu": {
"root": "https://archive.nyafuu.org",
"pattern": r"(?:archive\.)?nyafuu\.org",
"test-thread": ("https://archive.nyafuu.org/c/thread/2849220/", {
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
}),
"test-board": ("https://archive.nyafuu.org/c/",),
"test-search": ("https://archive.nyafuu.org/_/search/text/test/",),
},
"rbt": {
"root": "https://rbt.asia",
"pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
"test-thread": (
("https://rbt.asia/g/thread/61487650/", {
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
),
"test-board": ("https://rbt.asia/g/",),
"test-search": ("https://rbt.asia/_/search/text/test/",),
},
"thebarchive": {
"root": "https://thebarchive.com",
"pattern": r"thebarchive\.com",
"test-thread": ("https://thebarchive.com/b/thread/739772332/", {
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
}),
"test-board": ("https://thebarchive.com/b/",),
"test-search": ("https://thebarchive.com/_/search/text/test/",),
},
"_ckey": "childclass",
}
generate_extractors(EXTRACTORS, globals(), (
FoolfuukaThreadExtractor,
FoolfuukaBoardExtractor,
FoolfuukaSearchExtractor,
))

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2020 Mike Fährmann
# Copyright 2016-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -8,23 +8,21 @@
"""Extractors for FoOlSlide based sites"""
from .common import (
Extractor,
ChapterExtractor,
MangaExtractor,
Message,
generate_extractors,
)
from .common import BaseExtractor, Message
from .. import text, util
import json
class FoolslideBase():
class FoolslideExtractor(BaseExtractor):
"""Base class for FoOlSlide extractors"""
basecategory = "foolslide"
def __init__(self, match):
BaseExtractor.__init__(self, match)
self.gallery_url = self.root + match.group(match.lastindex)
def request(self, url):
return Extractor.request(
return BaseExtractor.request(
self, url, encoding="utf-8", method="POST", data={"adult": "true"})
@staticmethod
@ -40,12 +38,53 @@ class FoolslideBase():
return data
class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
BASE_PATTERN = FoolslideExtractor.update({
"dokireader": {
"root": "https://kobato.hologfx.com/reader",
},
"kireicake": {
"root": "https://reader.kireicake.com",
},
"powermanga": {
"root": "https://read.powermanga.org",
"pattern": r"read(?:er)?\.powermanga\.org",
},
"sensescans": {
"root": "https://sensescans.com/reader",
"pattern": r"(?:(?:www\.)?sensescans\.com/reader"
r"|reader\.sensescans\.com)",
},
})
class FoolslideChapterExtractor(FoolslideExtractor):
"""Base class for chapter extractors for FoOlSlide based sites"""
subcategory = "chapter"
directory_fmt = ("{category}", "{manga}", "{chapter_string}")
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
archive_fmt = "{id}"
pattern_fmt = r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
decode = "default"
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
test = (
(("https://kobato.hologfx.com/reader/read/"
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
"keyword": "6e719ac86f0c6dab89390dd7e507e678459e0dbc",
}),
("https://reader.kireicake.com/read/wonderland/en/1/1/", {
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
"keyword": "9f80947920a325e33aea7f5cd69ea669171903b6",
}),
(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
"keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe",
}),
("https://sensescans.com/reader/read/ao_no_orchestra/en/0/26/", {
"url": "bbd428dc578f5055e9f86ad635b510386cd317cd",
"keyword": "083ef6f8831c84127fe4096fa340a249be9d1424",
}),
("https://reader.sensescans.com/read/ao_no_orchestra/en/0/26/"),
)
def items(self):
page = self.request(self.gallery_url).text
@ -83,66 +122,21 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
return json.loads(text.extract(page, "var pages = ", ";")[0])
class FoolslideMangaExtractor(FoolslideBase, MangaExtractor):
class FoolslideMangaExtractor(FoolslideExtractor):
"""Base class for manga extractors for FoOlSlide based sites"""
pattern_fmt = r"(/series/[^/?#]+)"
def chapters(self, page):
extr = text.extract_from(page)
manga = text.unescape(extr('<h1 class="title">', '</h1>')).strip()
author = extr('<b>Author</b>: ', '<br')
artist = extr('<b>Artist</b>: ', '<br')
results = []
while True:
url = extr('<div class="title"><a href="', '"')
if not url:
return results
results.append((url, self.parse_chapter_url(url, {
"manga": manga, "author": author, "artist": artist,
"chapter_string": extr('title="', '"'),
"group" : extr('title="', '"'),
})))
EXTRACTORS = {
"dokireader": {
"root": "https://kobato.hologfx.com/reader",
"test-chapter":
(("https://kobato.hologfx.com/reader/read/"
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
"keyword": "6e719ac86f0c6dab89390dd7e507e678459e0dbc",
}),
"test-manga":
subcategory = "manga"
categorytransfer = True
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
test = (
(("https://kobato.hologfx.com/reader/series/"
"boku_ha_ohimesama_ni_narenai/"), {
"url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
"keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995",
}),
},
"kireicake": {
"root": "https://reader.kireicake.com",
"test-chapter":
("https://reader.kireicake.com/read/wonderland/en/1/1/", {
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
"keyword": "9f80947920a325e33aea7f5cd69ea669171903b6",
}),
"test-manga":
("https://reader.kireicake.com/series/wonderland/", {
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
"keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb",
}),
},
"powermanga": {
"root": "https://read.powermanga.org",
"pattern": r"read(?:er)?\.powermanga\.org",
"test-chapter":
(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
"keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe",
}),
"test-manga":
(("https://read.powermanga.org"
"/series/one_piece_digital_colour_comics/"), {
"count": ">= 1",
@ -158,27 +152,35 @@ EXTRACTORS = {
"volume": int,
},
}),
},
"sensescans": {
"root": "https://sensescans.com/reader",
"pattern": r"(?:(?:www\.)?sensescans\.com/reader"
r"|reader\.sensescans\.com)",
"test-chapter": (
("https://sensescans.com/reader/read/ao_no_orchestra/en/0/26/", {
"url": "bbd428dc578f5055e9f86ad635b510386cd317cd",
"keyword": "083ef6f8831c84127fe4096fa340a249be9d1424",
}),
("https://reader.sensescans.com/read/ao_no_orchestra/en/0/26/"),
),
"test-manga":
("https://sensescans.com/reader/series/yotsubato/", {
"count": ">= 3",
}),
},
"_ckey": "chapterclass",
}
)
def items(self):
page = self.request(self.gallery_url).text
chapters = self.chapters(page)
if not self.config("chapter-reverse", False):
chapters.reverse()
for chapter, data in chapters:
data["_extractor"] = FoolslideChapterExtractor
yield Message.Queue, chapter, data
def chapters(self, page):
extr = text.extract_from(page)
manga = text.unescape(extr('<h1 class="title">', '</h1>')).strip()
author = extr('<b>Author</b>: ', '<br')
artist = extr('<b>Artist</b>: ', '<br')
generate_extractors(EXTRACTORS, globals(), (
FoolslideChapterExtractor,
FoolslideMangaExtractor,
))
results = []
while True:
url = extr('<div class="title"><a href="', '"')
if not url:
return results
results.append((url, self.parse_chapter_url(url, {
"manga": manga, "author": author, "artist": artist,
"chapter_string": extr('title="', '"'),
"group" : extr('title="', '"'),
})))

@ -366,13 +366,6 @@ class InstagramUserExtractor(InstagramExtractor):
)
def items(self):
if self.config("highlights"):
self.log.warning("'highlights' is deprecated, "
"use '\"include\": \"…,highlights\"' instead")
default = ("highlights", "posts")
else:
default = ("posts",)
base = "{}/{}/".format(self.root, self.item)
stories = "{}/stories/{}/".format(self.root, self.item)
return self._dispatch_extractors((
@ -380,7 +373,7 @@ class InstagramUserExtractor(InstagramExtractor):
(InstagramHighlightsExtractor, base + "highlights/"),
(InstagramPostsExtractor , base + "posts/"),
(InstagramChannelExtractor , base + "channel/"),
), default)
), ("posts",))
class InstagramPostsExtractor(InstagramExtractor):

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2020 Mike Fährmann
# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -8,35 +8,25 @@
"""Extractors for mastodon instances"""
from .common import Extractor, Message
from .. import text, util, config, exception
import re
from .common import BaseExtractor, Message
from .. import text, exception
from ..cache import cache
class MastodonExtractor(Extractor):
class MastodonExtractor(BaseExtractor):
"""Base class for mastodon extractors"""
basecategory = "mastodon"
directory_fmt = ("mastodon", "{instance}", "{account[username]}")
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
archive_fmt = "{media[id]}"
cookiedomain = None
instance = None
root = None
def __init__(self, match):
Extractor.__init__(self, match)
self.api = MastodonAPI(self)
def config(self, key, default=None):
return config.interpolate_common(
("extractor",), (
(self.category, self.subcategory),
(self.basecategory, self.instance, self.subcategory),
), key, default,
)
BaseExtractor.__init__(self, match)
self.instance = self.root.partition("://")[2]
self.item = match.group(match.lastindex)
def items(self):
yield Message.Version, 1
for status in self.statuses():
attachments = status["media_attachments"]
if attachments:
@ -60,34 +50,81 @@ class MastodonExtractor(Extractor):
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
INSTANCES = {
"mastodon.social": {
"root" : "https://mastodon.social",
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
},
"pawoo": {
"root" : "https://pawoo.net",
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
"0d62d0a75a8525953098c167eacd3668",
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
"8428ef1fadb446dcfeb4f5ed6872d97b",
},
"baraag": {
"root" : "https://baraag.net",
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
}
}
BASE_PATTERN = MastodonExtractor.update(INSTANCES)
class MastodonUserExtractor(MastodonExtractor):
"""Extractor for all images of an account/user"""
subcategory = "user"
def __init__(self, match):
MastodonExtractor.__init__(self, match)
self.account_name = match.group(1)
pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$"
test = (
("https://mastodon.social/@jk", {
"pattern": r"https://files.mastodon.social/media_attachments"
r"/files/(\d+/){3,}original/\w+",
"range": "1-60",
"count": 60,
}),
("https://pawoo.net/@yoru_nine/", {
"range": "1-60",
"count": 60,
}),
("https://baraag.net/@pumpkinnsfw"),
)
def statuses(self):
handle = "@{}@{}".format(self.account_name, self.instance)
for account in self.api.account_search(handle, 1):
if account["username"] == self.account_name:
api = MastodonAPI(self)
username = self.item
handle = "@{}@{}".format(username, self.instance)
for account in api.account_search(handle, 1):
if account["username"] == username:
break
else:
raise exception.NotFoundError("account")
return self.api.account_statuses(account["id"])
return api.account_statuses(account["id"])
class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
def __init__(self, match):
MastodonExtractor.__init__(self, match)
self.status_id = match.group(1)
pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)"
test = (
("https://mastodon.social/@jk/103794036899778366", {
"count": 4,
}),
("https://pawoo.net/@yoru_nine/105038878897832922", {
"content": "b52e807f8ab548d6f896b09218ece01eba83987a",
}),
("https://baraag.net/@pumpkinnsfw/104364170556898443", {
"content": "67748c1b828c58ad60d0fe5729b59fb29c872244",
}),
)
def statuses(self):
return (self.api.status(self.status_id),)
return (MastodonAPI(self).status(self.item),)
class MastodonAPI():
@ -97,35 +134,46 @@ class MastodonAPI():
https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md
"""
def __init__(self, extractor, access_token=None):
def __init__(self, extractor):
self.root = extractor.root
self.extractor = extractor
access_token = extractor.config("access-token")
if access_token is None or access_token == "cache":
access_token = _access_token_cache(extractor.instance)
if not access_token:
access_token = extractor.config(
"access-token", extractor.access_token)
self.headers = {"Authorization": "Bearer {}".format(access_token)}
try:
access_token = INSTANCES[extractor.category]["access-token"]
except (KeyError, TypeError):
raise exception.StopExtraction(
"Missing access token.\n"
"Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
extractor.instance)
self.headers = {"Authorization": "Bearer " + access_token}
def account_search(self, query, limit=40):
"""Search for content"""
endpoint = "/v1/accounts/search"
params = {"q": query, "limit": limit}
return self._call("accounts/search", params).json()
return self._call(endpoint, params).json()
def account_statuses(self, account_id):
"""Get an account's statuses"""
endpoint = "accounts/{}/statuses".format(account_id)
endpoint = "/v1/accounts/{}/statuses".format(account_id)
params = {"only_media": "1"}
return self._pagination(endpoint, params)
def status(self, status_id):
"""Fetch a Status"""
return self._call("statuses/" + status_id).json()
"""Fetch a status"""
endpoint = "/v1/statuses/" + status_id
return self._call(endpoint).json()
def _call(self, endpoint, params=None):
if endpoint.startswith("http"):
url = endpoint
else:
url = "{}/api/v1/{}".format(self.root, endpoint)
url = self.root + "/api" + endpoint
while True:
response = self.extractor.request(
@ -145,7 +193,7 @@ class MastodonAPI():
raise exception.StopExtraction(response.json().get("error"))
def _pagination(self, endpoint, params):
url = "{}/api/v1/{}".format(self.root, endpoint)
url = endpoint
while url:
response = self._call(url, params)
yield from response.json()
@ -156,86 +204,6 @@ class MastodonAPI():
url = url["url"]
def generate_extractors():
"""Dynamically generate Extractor classes for Mastodon instances"""
symtable = globals()
extractors = config.get(("extractor",), "mastodon")
if extractors:
util.combine_dict(EXTRACTORS, extractors)
config.set(("extractor",), "mastodon", EXTRACTORS)
for instance, info in EXTRACTORS.items():
if not isinstance(info, dict):
continue
category = info.get("category") or instance.replace(".", "")
root = info.get("root") or "https://" + instance
name = (info.get("name") or category).capitalize()
token = info.get("access-token")
pattern = info.get("pattern") or re.escape(instance)
class Extr(MastodonUserExtractor):
pass
Extr.__name__ = Extr.__qualname__ = name + "UserExtractor"
Extr.__doc__ = "Extractor for all images of a user on " + instance
Extr.category = category
Extr.instance = instance
Extr.pattern = (r"(?:https?://)?" + pattern +
r"/@([^/?#]+)(?:/media)?/?$")
Extr.test = info.get("test-user")
Extr.root = root
Extr.access_token = token
symtable[Extr.__name__] = Extr
class Extr(MastodonStatusExtractor):
pass
Extr.__name__ = Extr.__qualname__ = name + "StatusExtractor"
Extr.__doc__ = "Extractor for images from a status on " + instance
Extr.category = category
Extr.instance = instance
Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?#]+/(\d+)"
Extr.test = info.get("test-status")
Extr.root = root
Extr.access_token = token
symtable[Extr.__name__] = Extr
EXTRACTORS = {
"mastodon.social": {
"category" : "mastodon.social",
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
"test-user" : ("https://mastodon.social/@jk", {
"pattern": r"https://files.mastodon.social/media_attachments"
r"/files/(\d+/){3,}original/\w+",
"range": "1-60",
"count": 60,
}),
"test-status" : ("https://mastodon.social/@jk/103794036899778366", {
"count": 4,
}),
},
"pawoo.net": {
"category" : "pawoo",
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
"0d62d0a75a8525953098c167eacd3668",
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
"8428ef1fadb446dcfeb4f5ed6872d97b",
},
"baraag.net": {
"category" : "baraag",
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
},
}
generate_extractors()
@cache(maxage=100*365*24*3600, keyarg=0)
def _access_token_cache(instance):
return None

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -52,4 +52,4 @@ class Message():
# Cookies = 5
Queue = 6
# Urllist = 7
Metadata = 8
# Metadata = 8

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Mike Fährmann
# Copyright 2020-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -8,7 +8,6 @@
"""Extractors for Moebooru based sites"""
from .common import generate_extractors
from .booru import BooruExtractor
from .. import text
@ -52,15 +51,93 @@ class MoebooruExtractor(BooruExtractor):
params["page"] += 1
BASE_PATTERN = MoebooruExtractor.update({
"yandere": {
"root": "https://yande.re",
},
"konachan": {
"root": "https://konachan.com",
"pattern": r"konachan\.(?:com|net)",
},
"hypnohub": {
"root": "https://hypnohub.net",
},
"sakugabooru": {
"root": "https://www.sakugabooru.com",
"pattern": r"(?:www\.)?sakugabooru\.com",
},
"lolibooru": {
"root": "https://lolibooru.moe",
},
})
class MoebooruPostExtractor(MoebooruExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post/show/(\d+)"
test = (
("https://yande.re/post/show/51824", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
"options": (("tags", True),),
"keyword": {
"tags_artist": "sasaki_tamaru",
"tags_circle": "softhouse_chara",
"tags_copyright": "ouzoku",
"tags_general": str,
},
}),
("https://konachan.com/post/show/205189", {
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
"options": (("tags", True),),
"keyword": {
"tags_artist": "patata",
"tags_character": "clownpiece",
"tags_copyright": "touhou",
"tags_general": str,
},
}),
("https://konachan.net/post/show/205189"),
("https://hypnohub.net/post/show/73964", {
"content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee",
}),
("https://www.sakugabooru.com/post/show/125570"),
("https://lolibooru.moe/post/show/287835"),
)
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
self.post_id = match.group(match.lastindex)
def posts(self):
params = {"tags": "id:" + self.post_id}
return self.request(self.root + "/post.json", params=params).json()
class MoebooruTagExtractor(MoebooruExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern_fmt = r"/post\?(?:[^&#]*&)*tags=([^&#]+)"
pattern = BASE_PATTERN + r"/post\?(?:[^&#]*&)*tags=([^&#]+)"
test = (
("https://yande.re/post?tags=ouzoku+armor", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
}),
("https://konachan.com/post?tags=patata", {
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
}),
("https://konachan.net/post?tags=patata"),
("https://hypnohub.net/post?tags=gonoike_biwa", {
"url": "072330c34a1e773d0cafd00e64b8060d34b078b6",
}),
("https://www.sakugabooru.com/post?tags=nichijou"),
("https://lolibooru.moe/post?tags=ruu_%28tksymkw%29"),
)
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
self.tags = text.unquote(match.group(1).replace("+", " "))
tags = match.group(match.lastindex)
self.tags = text.unquote(tags.replace("+", " "))
def metadata(self):
return {"search_tags": self.tags}
@ -74,11 +151,25 @@ class MoebooruPoolExtractor(MoebooruExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern_fmt = r"/pool/show/(\d+)"
pattern = BASE_PATTERN + r"/pool/show/(\d+)"
test = (
("https://yande.re/pool/show/318", {
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
}),
("https://konachan.com/pool/show/95", {
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
}),
("https://konachan.net/pool/show/95"),
("https://hypnohub.net/pool/show/61", {
"url": "fd74991c8729e77acd3c35eb6ddc4128ff445adf",
}),
("https://www.sakugabooru.com/pool/show/54"),
("https://lolibooru.moe/pool/show/239"),
)
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
self.pool_id = match.group(1)
self.pool_id = match.group(match.lastindex)
def metadata(self):
return {"pool": text.parse_int(self.pool_id)}
@ -88,29 +179,34 @@ class MoebooruPoolExtractor(MoebooruExtractor):
return self._pagination(self.root + "/post.json", params)
class MoebooruPostExtractor(MoebooruExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern_fmt = r"/post/show/(\d+)"
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
self.post_id = match.group(1)
def posts(self):
params = {"tags": "id:" + self.post_id}
return self.request(self.root + "/post.json", params=params).json()
class MoebooruPopularExtractor(MoebooruExtractor):
subcategory = "popular"
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
pattern_fmt = r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?"
pattern = BASE_PATTERN + \
r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?"
test = (
("https://yande.re/post/popular_by_month?month=6&year=2014", {
"count": 40,
}),
("https://yande.re/post/popular_recent"),
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
"count": 20,
}),
("https://konachan.com/post/popular_recent"),
("https://konachan.net/post/popular_recent"),
("https://hypnohub.net/post/popular_by_month?month=6&year=2014", {
"count": 20,
}),
("https://hypnohub.net/post/popular_recent"),
("https://www.sakugabooru.com/post/popular_recent"),
("https://lolibooru.moe/post/popular_recent"),
)
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
self.scale, self.query = match.groups()
self.scale = match.group(match.lastindex-1)
self.query = match.group(match.lastindex)
def metadata(self):
self.params = params = text.parse_query(self.query)
@ -138,108 +234,3 @@ class MoebooruPopularExtractor(MoebooruExtractor):
def posts(self):
url = "{}/post/popular_{}.json".format(self.root, self.scale)
return self.request(url, params=self.params).json()
EXTRACTORS = {
"yandere": {
"root": "https://yande.re",
"test-tag": ("https://yande.re/post?tags=ouzoku+armor", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
}),
"test-pool": ("https://yande.re/pool/show/318", {
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
}),
"test-post": ("https://yande.re/post/show/51824", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
"options": (("tags", True),),
"keyword": {
"tags_artist": "sasaki_tamaru",
"tags_circle": "softhouse_chara",
"tags_copyright": "ouzoku",
"tags_general": str,
},
}),
"test-popular": (
("https://yande.re/post/popular_by_month?month=6&year=2014", {
"count": 40,
}),
("https://yande.re/post/popular_recent"),
),
},
"konachan": {
"root": "https://konachan.com",
"pattern": r"konachan\.(?:com|net)",
"test-tag": (
("https://konachan.com/post?tags=patata", {
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
}),
("https://konachan.net/post?tags=patata"),
),
"test-pool": (
("https://konachan.com/pool/show/95", {
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
}),
("https://konachan.net/pool/show/95"),
),
"test-post": (
("https://konachan.com/post/show/205189", {
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
"options": (("tags", True),),
"keyword": {
"tags_artist": "patata",
"tags_character": "clownpiece",
"tags_copyright": "touhou",
"tags_general": str,
},
}),
("https://konachan.net/post/show/205189"),
),
"test-popular": (
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
"count": 20,
}),
("https://konachan.com/post/popular_recent"),
("https://konachan.net/post/popular_recent"),
),
},
"hypnohub": {
"root": "https://hypnohub.net",
"test-tag": ("https://hypnohub.net/post?tags=gonoike_biwa", {
"url": "072330c34a1e773d0cafd00e64b8060d34b078b6",
}),
"test-pool": ("https://hypnohub.net/pool/show/61", {
"url": "fd74991c8729e77acd3c35eb6ddc4128ff445adf",
}),
"test-post": ("https://hypnohub.net/post/show/73964", {
"content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee",
}),
"test-popular": (
("https://hypnohub.net/post/popular_by_month?month=6&year=2014", {
"count": 20,
}),
("https://hypnohub.net/post/popular_recent"),
),
},
"lolibooru": {
"root": "https://lolibooru.moe",
"test-tag" : ("https://lolibooru.moe/post?tags=ruu_%28tksymkw%29",),
"test-pool" : ("https://lolibooru.moe/pool/show/239",),
"test-post" : ("https://lolibooru.moe/post/show/287835",),
"test-popular": ("https://lolibooru.moe/post/popular_recent",),
},
"sakugabooru": {
"root": "https://www.sakugabooru.com",
"pattern": r"(?:www\.)?sakugabooru\.com",
"test-tag" : ("https://www.sakugabooru.com/post?tags=nichijou",),
"test-pool" : ("https://www.sakugabooru.com/pool/show/54",),
"test-post" : ("https://www.sakugabooru.com/post/show/125570",),
"test-popular": ("https://www.sakugabooru.com/post/popular_recent",),
},
}
generate_extractors(EXTRACTORS, globals(), (
MoebooruTagExtractor,
MoebooruPoolExtractor,
MoebooruPostExtractor,
MoebooruPopularExtractor,
))

@ -9,7 +9,7 @@
"""Utility classes to setup OAuth and link accounts to gallery-dl"""
from .common import Extractor, Message
from . import deviantart, flickr, pixiv, reddit, smugmug, tumblr
from . import deviantart, flickr, mastodon, pixiv, reddit, smugmug, tumblr
from .. import text, oauth, util, config, exception
from ..cache import cache
import urllib.parse
@ -106,9 +106,9 @@ class OAuthBase(Extractor):
))
def _oauth2_authorization_code_grant(
self, client_id, client_secret, auth_url, token_url,
self, client_id, client_secret, auth_url, token_url, *,
scope="read", key="refresh_token", auth=True,
message_template=None, cache=None):
cache=None, instance=None):
"""Perform an OAuth2 authorization code grant"""
state = "gallery-dl_{}_{}".format(
@ -159,27 +159,18 @@ class OAuthBase(Extractor):
self.send(data["error"])
return
token = data[key]
token_name = key.replace("_", "-")
# write to cache
if self.cache and cache:
cache.update("#" + str(client_id), data[key])
self.log.info("Writing 'refresh-token' to cache")
cache.update(instance or ("#" + str(client_id)), token)
self.log.info("Writing '%s' to cache", token_name)
# display token
if message_template:
msg = message_template.format(
category=self.subcategory,
key=key.partition("_")[0],
token=data[key],
instance=getattr(self, "instance", ""),
client_id=client_id,
client_secret=client_secret,
)
else:
msg = self._generate_message(
("refresh-token",),
(data[key],),
)
self.send(msg)
self.send(self._generate_message(
(token_name,), (token,),
))
def _generate_message(self, names, values):
_vh, _va, _is, _it = (
@ -326,8 +317,10 @@ class OAuthMastodon(OAuthBase):
def items(self):
yield Message.Version, 1
application = self.oauth_config(self.instance)
if not application:
for application in mastodon.INSTANCES.values():
if self.instance == application["root"].partition("://")[2]:
break
else:
application = self._register(self.instance)
self._oauth2_authorization_code_grant(
@ -335,8 +328,9 @@ class OAuthMastodon(OAuthBase):
application["client-secret"],
"https://{}/oauth/authorize".format(self.instance),
"https://{}/oauth/token".format(self.instance),
instance=self.instance,
key="access_token",
message_template=MASTODON_MSG_TEMPLATE,
cache=mastodon._access_token_cache,
)
@cache(maxage=10*365*24*3600, keyarg=1)
@ -425,29 +419,3 @@ class OAuthPixiv(OAuthBase):
""")
code = input("code: ")
return code.rpartition("=")[2].strip()
MASTODON_MSG_TEMPLATE = """
Your 'access-token' is
{token}
Put this value into your configuration file as
'extractor.mastodon.{instance}.{key}-token'.
You can also add your 'client-id' and 'client-secret' values
if you want to register another account in the future.
Example:
{{
"extractor": {{
"mastodon": {{
"{instance}": {{
"{key}-token": "{token}",
"client-id": "{client_id}",
"client-secret": "{client_secret}"
}}
}}
}}
}}
"""

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2020 Mike Fährmann
# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -42,8 +42,6 @@ class PatreonExtractor(Extractor):
hashes = set()
yield Message.Directory, post
yield Message.Metadata, post
for kind, url, name in itertools.chain(
self._images(post),
self._attachments(post),

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2020 Mike Fährmann
# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -8,28 +8,23 @@
"""Extractors for Shopify instances"""
from .common import Extractor, Message, generate_extractors
from .common import BaseExtractor, Message
from .. import text
import re
class ShopifyExtractor(Extractor):
class ShopifyExtractor(BaseExtractor):
"""Base class for Shopify extractors"""
basecategory = "shopify"
filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}"
archive_fmt = "{id}"
def __init__(self, match):
Extractor.__init__(self, match)
self.item_url = self.root + match.group(1)
def request(self, url, **kwargs):
kwargs["retries"] = float("inf")
return Extractor.request(self, url, **kwargs)
BaseExtractor.__init__(self, match)
self.item_url = self.root + match.group(match.lastindex)
def items(self):
data = self.metadata()
yield Message.Version, 1
yield Message.Directory, data
headers = {"X-Requested-With": "XMLHttpRequest"}
@ -58,22 +53,34 @@ class ShopifyExtractor(Extractor):
"""Return an iterable with all relevant product URLs"""
BASE_PATTERN = ShopifyExtractor.update({
"fashionnova": {
"root": "https://www.fashionnova.com",
"pattern": r"(?:www\.)?fashionnova\.com",
},
})
class ShopifyCollectionExtractor(ShopifyExtractor):
"""Base class for collection extractors for Shopify based sites"""
subcategory = "collection"
directory_fmt = ("{category}", "{collection[title]}")
pattern_fmt = r"(/collections/[\w-]+)/?(?:\?([^#]+))?(?:$|#)"
def __init__(self, match):
ShopifyExtractor.__init__(self, match)
self.params = match.group(2)
pattern = BASE_PATTERN + r"(/collections/[\w-]+)/?(?:$|[?#])"
test = (
("https://www.fashionnova.com/collections/mini-dresses", {
"range": "1-20",
"count": 20,
"archive": False,
}),
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
)
def metadata(self):
return self.request(self.item_url + ".json").json()
def products(self):
params = text.parse_query(self.params)
params["page"] = text.parse_int(params.get("page"), 1)
params = {"page": 1}
fetch = True
last = None
@ -107,36 +114,14 @@ class ShopifyProductExtractor(ShopifyExtractor):
"""Base class for product extractors for Shopify based sites"""
subcategory = "product"
directory_fmt = ("{category}", "Products")
pattern_fmt = r"((?:/collections/[\w-]+)?/products/[\w-]+)"
def products(self):
return (self.item_url,)
EXTRACTORS = {
"fashionnova": {
"root": "https://www.fashionnova.com",
"pattern": r"(?:www\.)?fashionnova\.com",
"test-product": (
pattern = BASE_PATTERN + r"((?:/collections/[\w-]+)?/products/[\w-]+)"
test = (
("https://www.fashionnova.com/products/essential-slide-red", {
"pattern": r"https?://cdn\d*\.shopify.com/",
"count": 3,
}),
("https://www.fashionnova.com/collections/flats/products/name"),
),
"test-collection": (
("https://www.fashionnova.com/collections/mini-dresses", {
"range": "1-20",
"count": 20,
"archive": False,
}),
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
),
},
}
)
generate_extractors(EXTRACTORS, globals(), (
ShopifyProductExtractor,
ShopifyCollectionExtractor,
))
def products(self):
return (self.item_url,)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2020 Mike Fährmann
# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -111,10 +111,6 @@ class Job():
if self.pred_queue(url, kwds):
self.handle_queue(url, kwds)
elif msg[0] == Message.Metadata:
self.update_kwdict(msg[1])
self.handle_metadata(msg[1])
elif msg[0] == Message.Version:
if msg[1] != 1:
raise "unsupported message-version ({}, {})".format(
@ -128,9 +124,6 @@ class Job():
def handle_directory(self, kwdict):
"""Handle Message.Directory"""
def handle_metadata(self, kwdict):
"""Handle Message.Metadata"""
def handle_queue(self, url, kwdict):
"""Handle Message.Queue"""
@ -280,15 +273,6 @@ class DownloadJob(Job):
for callback in self.hooks["post"]:
callback(self.pathfmt)
def handle_metadata(self, kwdict):
"""Run postprocessors with metadata from 'kwdict'"""
if "metadata" in self.hooks:
kwdict["extension"] = "metadata"
pathfmt = self.pathfmt
pathfmt.set_filename(kwdict)
for callback in self.hooks["metadata"]:
callback(pathfmt)
def handle_queue(self, url, kwdict):
if url in self.visited:
return
@ -624,8 +608,5 @@ class DataJob(Job):
def handle_directory(self, kwdict):
self.data.append((Message.Directory, self.filter(kwdict)))
def handle_metadata(self, kwdict):
self.data.append((Message.Metadata, self.filter(kwdict)))
def handle_queue(self, url, kwdict):
self.data.append((Message.Queue, url, self.filter(kwdict)))

@ -136,9 +136,9 @@ def build_parser():
help="Print URLs instead of downloading",
)
output.add_argument(
"-G",
"-G", "--resolve-urls",
dest="list_urls", action="store_const", const=128,
help=argparse.SUPPRESS,
help="Print URLs instead of downloading; resolve intermediary URLs",
)
output.add_argument(
"-j", "--dump-json",

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2020 Mike Fährmann
# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -39,10 +39,6 @@ class ExecPP(PostProcessor):
events = options.get("event")
if events is None:
events = ("after",)
if options.get("final"):
self.log.warning("'final' is deprecated, "
"use '\"event\": \"finalize\"' instead")
events = ("finalize",)
elif isinstance(events, str):
events = events.split(",")
for event in events:

@ -55,10 +55,6 @@ class MetadataPP(PostProcessor):
events = options.get("event")
if events is None:
events = ("file",)
if options.get("bypost"):
self.log.warning("'bypost' is deprecated, use '\"event\": "
"\"post\"' and 'filename' instead")
events = ("metadata",)
elif isinstance(events, str):
events = events.split(",")
for event in events:

@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
__version__ = "1.16.5"
__version__ = "1.17.0-dev"

@ -219,10 +219,6 @@ class TestExtractorWait(unittest.TestCase):
class TextExtractorOAuth(unittest.TestCase):
@classmethod
def setUpClass(cls):
mastodon.generate_extractors()
def test_oauth1(self):
for category in ("flickr", "smugmug", "tumblr"):
extr = extractor.find("oauth:" + category)

Loading…
Cancel
Save