|
|
@ -1,34 +1,40 @@
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
# Copyright 2018-2019 Mike Fährmann
|
|
|
|
# Copyright 2019 Mike Fährmann
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
|
|
|
|
"""Extractors for http://joyreactor.cc/"""
|
|
|
|
"""Generic extractors for *reactor sites"""
|
|
|
|
|
|
|
|
|
|
|
|
from .common import Extractor, Message
|
|
|
|
from .common import SharedConfigExtractor, Message
|
|
|
|
from .. import text
|
|
|
|
from .. import text
|
|
|
|
|
|
|
|
import urllib.parse
|
|
|
|
import json
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
|
|
|
|
BASE_PATTERN = r"(?:https?://)?([^/.]+\.reactor\.cc)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class JoyreactorExtractor(Extractor):
|
|
|
|
class ReactorExtractor(SharedConfigExtractor):
|
|
|
|
"""Base class for joyreactor extractors"""
|
|
|
|
"""Base class for *reactor.cc extractors"""
|
|
|
|
category = "joyreactor"
|
|
|
|
basecategory = "reactor"
|
|
|
|
directory_fmt = ["{category}"]
|
|
|
|
directory_fmt = ["{category}"]
|
|
|
|
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
|
|
|
|
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
|
|
|
|
archive_fmt = "{post_id}_{num}"
|
|
|
|
archive_fmt = "{post_id}_{num}"
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
def __init__(self, match):
|
|
|
|
Extractor.__init__(self)
|
|
|
|
SharedConfigExtractor.__init__(self)
|
|
|
|
self.url = match.group(0)
|
|
|
|
self.url = match.group(0)
|
|
|
|
self.root = "http://" + match.group(1)
|
|
|
|
self.root = "http://" + match.group(1)
|
|
|
|
self.session.headers["Referer"] = self.root
|
|
|
|
self.session.headers["Referer"] = self.root
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not self.category:
|
|
|
|
|
|
|
|
# set category based on domain name
|
|
|
|
|
|
|
|
netloc = urllib.parse.urlsplit(self.root).netloc
|
|
|
|
|
|
|
|
self.category = netloc.rpartition(".")[0]
|
|
|
|
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
def items(self):
|
|
|
|
data = self.metadata()
|
|
|
|
data = self.metadata()
|
|
|
|
yield Message.Version, 1
|
|
|
|
yield Message.Version, 1
|
|
|
@ -70,6 +76,7 @@ class JoyreactorExtractor(Extractor):
|
|
|
|
data = json.loads(script)
|
|
|
|
data = json.loads(script)
|
|
|
|
except ValueError:
|
|
|
|
except ValueError:
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
|
|
|
|
# remove control characters and escape backslashes
|
|
|
|
mapping = dict.fromkeys(range(32))
|
|
|
|
mapping = dict.fromkeys(range(32))
|
|
|
|
script = script.translate(mapping).replace("\\", "\\\\")
|
|
|
|
script = script.translate(mapping).replace("\\", "\\\\")
|
|
|
|
data = json.loads(script)
|
|
|
|
data = json.loads(script)
|
|
|
@ -115,36 +122,92 @@ class JoyreactorExtractor(Extractor):
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class JoyreactorTagExtractor(JoyreactorExtractor):
|
|
|
|
class ReactorTagExtractor(ReactorExtractor):
|
|
|
|
"""Extractor for tag searches on joyreactor.cc"""
|
|
|
|
"""Extractor for tag searches on *reactor.cc sites"""
|
|
|
|
subcategory = "tag"
|
|
|
|
subcategory = "tag"
|
|
|
|
directory_fmt = ["{category}", "{search_tags}"]
|
|
|
|
directory_fmt = ["{category}", "{search_tags}"]
|
|
|
|
archive_fmt = "{search_tags}_{post_id}_{num}"
|
|
|
|
archive_fmt = "{search_tags}_{post_id}_{num}"
|
|
|
|
pattern = [BASE_PATTERN + r"/tag/([^/?&#]+)"]
|
|
|
|
pattern = [BASE_PATTERN + r"/tag/([^/?&#]+)"]
|
|
|
|
test = [
|
|
|
|
test = [("http://anime.reactor.cc/tag/Anime+Art", None)]
|
|
|
|
("http://joyreactor.com/tag/Cirno", {
|
|
|
|
|
|
|
|
"url": "a81382a3146da50b647c475f87427a6ca1d737df",
|
|
|
|
|
|
|
|
"keyword": "dcd3b101cae0a93fbb91281235de1410faf88455",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
("http://joyreactor.cc/tag/Advent+Cirno", {
|
|
|
|
|
|
|
|
"count": ">= 17",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
def __init__(self, match):
|
|
|
|
JoyreactorExtractor.__init__(self, match)
|
|
|
|
ReactorExtractor.__init__(self, match)
|
|
|
|
self.tag = match.group(2)
|
|
|
|
self.tag = match.group(2)
|
|
|
|
|
|
|
|
|
|
|
|
def metadata(self):
|
|
|
|
def metadata(self):
|
|
|
|
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
|
|
|
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class JoyreactorSearchExtractor(JoyreactorTagExtractor):
|
|
|
|
class ReactorSearchExtractor(ReactorTagExtractor):
|
|
|
|
"""Extractor for search results on joyreactor.cc"""
|
|
|
|
"""Extractor for search results on *reactor.cc sites"""
|
|
|
|
subcategory = "search"
|
|
|
|
subcategory = "search"
|
|
|
|
directory_fmt = ["{category}", "search", "{search_tags}"]
|
|
|
|
directory_fmt = ["{category}", "search", "{search_tags}"]
|
|
|
|
archive_fmt = "s_{search_tags}_{post_id}_{num}"
|
|
|
|
archive_fmt = "s_{search_tags}_{post_id}_{num}"
|
|
|
|
pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
|
|
|
|
pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
|
|
|
|
|
|
|
|
test = [("http://anime.reactor.cc/search?q=Art", None)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ReactorUserExtractor(ReactorExtractor):
|
|
|
|
|
|
|
|
"""Extractor for all posts of a user on *reactor.cc sites"""
|
|
|
|
|
|
|
|
subcategory = "user"
|
|
|
|
|
|
|
|
directory_fmt = ["{category}", "user", "{user}"]
|
|
|
|
|
|
|
|
pattern = [BASE_PATTERN + r"/user/([^/?&#]+)"]
|
|
|
|
|
|
|
|
test = [("http://anime.reactor.cc/user/Shuster", None)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
|
|
|
|
ReactorExtractor.__init__(self, match)
|
|
|
|
|
|
|
|
self.user = match.group(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def metadata(self):
|
|
|
|
|
|
|
|
return {"user": text.unescape(self.user).replace("+", " ")}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ReactorPostExtractor(ReactorExtractor):
|
|
|
|
|
|
|
|
"""Extractor for single posts on *reactor.cc sites"""
|
|
|
|
|
|
|
|
subcategory = "post"
|
|
|
|
|
|
|
|
pattern = [BASE_PATTERN + r"/post/(\d+)"]
|
|
|
|
|
|
|
|
test = [("http://anime.reactor.cc/post/3576250", None)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
|
|
|
|
ReactorExtractor.__init__(self, match)
|
|
|
|
|
|
|
|
self.post_id = match.group(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
|
|
|
|
yield Message.Version, 1
|
|
|
|
|
|
|
|
post = self.request(self.url).text
|
|
|
|
|
|
|
|
pos = post.find('class="uhead">')
|
|
|
|
|
|
|
|
for image in self._parse_post(post[pos:]):
|
|
|
|
|
|
|
|
if image["num"] == 1:
|
|
|
|
|
|
|
|
yield Message.Directory, image
|
|
|
|
|
|
|
|
url = image["file_url"]
|
|
|
|
|
|
|
|
yield Message.Url, url, text.nameext_from_url(url, image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
|
|
|
|
# JoyReactor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class JoyreactorTagExtractor(ReactorTagExtractor):
|
|
|
|
|
|
|
|
"""Extractor for tag searches on joyreactor.cc"""
|
|
|
|
|
|
|
|
category = "joyreactor"
|
|
|
|
|
|
|
|
pattern = [JR_BASE_PATTERN + r"/tag/([^/?&#]+)"]
|
|
|
|
|
|
|
|
test = [
|
|
|
|
|
|
|
|
("http://joyreactor.com/tag/Cirno", {
|
|
|
|
|
|
|
|
"url": "a81382a3146da50b647c475f87427a6ca1d737df",
|
|
|
|
|
|
|
|
"keyword": "dcd3b101cae0a93fbb91281235de1410faf88455",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
("http://joyreactor.cc/tag/Advent+Cirno", {
|
|
|
|
|
|
|
|
"count": ">= 17",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class JoyreactorSearchExtractor(ReactorSearchExtractor):
|
|
|
|
|
|
|
|
"""Extractor for search results on joyreactor.cc"""
|
|
|
|
|
|
|
|
category = "joyreactor"
|
|
|
|
|
|
|
|
pattern = [JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
|
|
|
|
test = [
|
|
|
|
test = [
|
|
|
|
("http://joyreactor.com/search?q=Cirno+Gifs", {
|
|
|
|
("http://joyreactor.com/search?q=Cirno+Gifs", {
|
|
|
|
"count": 0, # no search results on joyreactor.com
|
|
|
|
"count": 0, # no search results on joyreactor.com
|
|
|
@ -156,11 +219,10 @@ class JoyreactorSearchExtractor(JoyreactorTagExtractor):
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class JoyreactorUserExtractor(JoyreactorExtractor):
|
|
|
|
class JoyreactorUserExtractor(ReactorUserExtractor):
|
|
|
|
"""Extractor for all posts of a user on joyreactor.cc"""
|
|
|
|
"""Extractor for all posts of a user on joyreactor.cc"""
|
|
|
|
subcategory = "user"
|
|
|
|
category = "joyreactor"
|
|
|
|
directory_fmt = ["{category}", "user", "{user}"]
|
|
|
|
pattern = [JR_BASE_PATTERN + r"/user/([^/?&#]+)"]
|
|
|
|
pattern = [BASE_PATTERN + r"/user/([^/?&#]+)"]
|
|
|
|
|
|
|
|
test = [
|
|
|
|
test = [
|
|
|
|
("http://joyreactor.com/user/Tacoman123", {
|
|
|
|
("http://joyreactor.com/user/Tacoman123", {
|
|
|
|
"url": "0444158f17c22f08515ad4e7abf69ad2f3a63b35",
|
|
|
|
"url": "0444158f17c22f08515ad4e7abf69ad2f3a63b35",
|
|
|
@ -169,18 +231,11 @@ class JoyreactorUserExtractor(JoyreactorExtractor):
|
|
|
|
("http://joyreactor.cc/user/hemantic", None),
|
|
|
|
("http://joyreactor.cc/user/hemantic", None),
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
|
|
|
|
JoyreactorExtractor.__init__(self, match)
|
|
|
|
|
|
|
|
self.user = match.group(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def metadata(self):
|
|
|
|
|
|
|
|
return {"user": text.unescape(self.user).replace("+", " ")}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class JoyreactorPostExtractor(JoyreactorExtractor):
|
|
|
|
class JoyreactorPostExtractor(ReactorPostExtractor):
|
|
|
|
"""Extractor for single posts on joyreactor.cc"""
|
|
|
|
"""Extractor for single posts on joyreactor.cc"""
|
|
|
|
subcategory = "post"
|
|
|
|
category = "joyreactor"
|
|
|
|
pattern = [BASE_PATTERN + r"/post/(\d+)"]
|
|
|
|
pattern = [JR_BASE_PATTERN + r"/post/(\d+)"]
|
|
|
|
test = [
|
|
|
|
test = [
|
|
|
|
("http://joyreactor.com/post/3721876", { # single image
|
|
|
|
("http://joyreactor.com/post/3721876", { # single image
|
|
|
|
"url": "904779f6571436f3d5adbce30c2c272f6401e14a",
|
|
|
|
"url": "904779f6571436f3d5adbce30c2c272f6401e14a",
|
|
|
@ -204,16 +259,64 @@ class JoyreactorPostExtractor(JoyreactorExtractor):
|
|
|
|
}),
|
|
|
|
}),
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
|
|
|
|
JoyreactorExtractor.__init__(self, match)
|
|
|
|
|
|
|
|
self.post_id = match.group(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
yield Message.Version, 1
|
|
|
|
# PornReactor
|
|
|
|
post = self.request(self.url).text
|
|
|
|
|
|
|
|
pos = post.find('class="uhead">')
|
|
|
|
PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)"
|
|
|
|
for image in self._parse_post(post[pos:]):
|
|
|
|
|
|
|
|
if image["num"] == 1:
|
|
|
|
|
|
|
|
yield Message.Directory, image
|
|
|
|
class PornreactorTagExtractor(ReactorTagExtractor):
|
|
|
|
url = image["file_url"]
|
|
|
|
"""Extractor for tag searches on pornreactor.cc"""
|
|
|
|
yield Message.Url, url, text.nameext_from_url(url, image)
|
|
|
|
category = "pornreactor"
|
|
|
|
|
|
|
|
pattern = [PR_BASE_PATTERN + r"/tag/([^/?&#]+)"]
|
|
|
|
|
|
|
|
test = [
|
|
|
|
|
|
|
|
("http://pornreactor.cc/tag/RiceGnat", {
|
|
|
|
|
|
|
|
"count": ">= 120",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
("http://fapreactor.com/tag/RiceGnat", None),
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PornreactorSearchExtractor(ReactorSearchExtractor):
|
|
|
|
|
|
|
|
"""Extractor for search results on pornreactor.cc"""
|
|
|
|
|
|
|
|
category = "pornreactor"
|
|
|
|
|
|
|
|
pattern = [PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
|
|
|
|
|
|
|
|
test = [
|
|
|
|
|
|
|
|
("http://pornreactor.cc/search?q=ecchi+hentai", {
|
|
|
|
|
|
|
|
"range": "1-25",
|
|
|
|
|
|
|
|
"count": ">= 20",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
("http://fapreactor.com/search/ecchi+hentai", None),
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PornreactorUserExtractor(ReactorUserExtractor):
|
|
|
|
|
|
|
|
"""Extractor for all posts of a user on pornreactor.cc"""
|
|
|
|
|
|
|
|
category = "pornreactor"
|
|
|
|
|
|
|
|
pattern = [PR_BASE_PATTERN + r"/user/([^/?&#]+)"]
|
|
|
|
|
|
|
|
test = [
|
|
|
|
|
|
|
|
("http://pornreactor.cc/user/Disillusion", {
|
|
|
|
|
|
|
|
"url": "7e06f87f8dcce3fc7851b6d13aa55712ab45fb04",
|
|
|
|
|
|
|
|
"keyword": "edfefb54ea4863e3731c508ae6caeb4140be0d31",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
("http://fapreactor.com/user/Disillusion", None),
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PornreactorPostExtractor(ReactorPostExtractor):
|
|
|
|
|
|
|
|
"""Extractor for single posts on pornreactor.cc"""
|
|
|
|
|
|
|
|
category = "pornreactor"
|
|
|
|
|
|
|
|
subcategory = "post"
|
|
|
|
|
|
|
|
pattern = [PR_BASE_PATTERN + r"/post/(\d+)"]
|
|
|
|
|
|
|
|
test = [
|
|
|
|
|
|
|
|
("http://pornreactor.cc/post/863166", {
|
|
|
|
|
|
|
|
"url": "9e5f7b374605cbbd413f4f4babb9d1af6f95b843",
|
|
|
|
|
|
|
|
"keyword": "6e9e4bd4e2d4f3f2c7936340ec71f8693129f809",
|
|
|
|
|
|
|
|
"content": "3e2a09f8b5e5ed7722f51c5f423ff4c9260fb23e",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
("http://fapreactor.com/post/863166", {
|
|
|
|
|
|
|
|
"url": "83ff7c87741c05bcf1de6825e2b4739afeb87ed5",
|
|
|
|
|
|
|
|
"keyword": "cf8159224fde59c1dab86677514b4aedeb533d66",
|
|
|
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
]
|