add 'config_instance' method

to allow for a more streamlined access to BaseExtractor instance options
pull/5081/head
Mike Fährmann 8 months ago
parent 34a7afdbc1
commit 89066844f4
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -102,6 +102,9 @@ class Extractor():
def config_accumulate(self, key):
return config.accumulate(self._cfgpath, key)
def config_instance(self, key, default=None):
return default
def _config_shared(self, key, default=None):
return config.interpolate_common(
("extractor",), self._cfgpath, key, default)
@ -735,9 +738,10 @@ class BaseExtractor(Extractor):
for index, group in enumerate(match.groups()):
if group is not None:
if index:
self.category, self.root = self.instances[index-1]
self.category, self.root, info = self.instances[index-1]
if not self.root:
self.root = text.root_from_url(match.group(0))
self.config_instance = info.get
else:
self.root = group
self.category = group.partition("://")[2]
@ -757,7 +761,7 @@ class BaseExtractor(Extractor):
root = info["root"]
if root:
root = root.rstrip("/")
instance_list.append((category, root))
instance_list.append((category, root, info))
pattern = info.get("pattern")
if not pattern:

@ -22,11 +22,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _init(self):
self.api_key = self.config("api-key")
self.user_id = self.config("user-id")
try:
self.api_root = INSTANCES[self.category]["api_root"]
except KeyError:
self.api_root = self.root
self.api_root = self.config_instance("api_root") or self.root
if self.category == "realbooru":
self.items = self._items_realbooru
@ -161,7 +157,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
post["tags_" + key] = " ".join(value)
INSTANCES = {
BASE_PATTERN = GelbooruV02Extractor.update({
"realbooru": {
"root": "https://realbooru.com",
"pattern": r"realbooru\.com",
@ -187,9 +183,7 @@ INSTANCES = {
"root": "https://xbooru.com",
"pattern": r"xbooru\.com",
},
}
BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
})
class GelbooruV02TagExtractor(GelbooruV02Extractor):

@ -75,7 +75,7 @@ class MastodonExtractor(BaseExtractor):
account["acct"], account["moved"]["acct"])
INSTANCES = {
BASE_PATTERN = MastodonExtractor.update({
"mastodon.social": {
"root" : "https://mastodon.social",
"pattern" : r"mastodon\.social",
@ -100,9 +100,7 @@ INSTANCES = {
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
}
}
BASE_PATTERN = MastodonExtractor.update(INSTANCES) + "(?:/web)?"
}) + "(?:/web)?"
class MastodonUserExtractor(MastodonExtractor):
@ -174,10 +172,8 @@ class MastodonAPI():
if access_token is None or access_token == "cache":
access_token = _access_token_cache(extractor.instance)
if not access_token:
try:
access_token = INSTANCES[extractor.category]["access-token"]
except (KeyError, TypeError):
pass
access_token = extractor.config_instance("access-token")
if access_token:
self.headers = {"Authorization": "Bearer " + access_token}
else:

@ -358,8 +358,8 @@ class OAuthMastodon(OAuthBase):
yield Message.Version, 1
from . import mastodon
for application in mastodon.INSTANCES.values():
if self.instance == application["root"].partition("://")[2]:
for _, root, application in mastodon.MastodonExtractor.instances:
if self.instance == root.partition("://")[2]:
break
else:
application = self._register(self.instance)

@ -32,7 +32,7 @@ class PhilomenaExtractor(BooruExtractor):
post["date"] = text.parse_datetime(post["created_at"])
INSTANCES = {
BASE_PATTERN = PhilomenaExtractor.update({
"derpibooru": {
"root": "https://derpibooru.org",
"pattern": r"(?:www\.)?derpibooru\.org",
@ -48,9 +48,7 @@ INSTANCES = {
"pattern": r"furbooru\.org",
"filter_id": "2",
},
}
BASE_PATTERN = PhilomenaExtractor.update(INSTANCES)
})
class PhilomenaPostExtractor(PhilomenaExtractor):
@ -176,10 +174,7 @@ class PhilomenaAPI():
if filter_id:
params["filter_id"] = filter_id
elif not api_key:
try:
params["filter_id"] = INSTANCES[extr.category]["filter_id"]
except (KeyError, TypeError):
params["filter_id"] = "2"
params["filter_id"] = extr.config_instance("filter_id") or "2"
params["page"] = extr.page_start
params["per_page"] = extr.per_page

@ -19,17 +19,12 @@ class Shimmie2Extractor(BaseExtractor):
archive_fmt = "{id}"
def _init(self):
try:
instance = INSTANCES[self.category]
except KeyError:
return
cookies = instance.get("cookies")
cookies = self.config_instance("cookies")
if cookies:
domain = self.root.rpartition("/")[2]
self.cookies_update_dict(cookies, domain=domain)
file_url = instance.get("file_url")
file_url = self.config_instance("file_url")
if file_url:
self.file_url_fmt = file_url
@ -73,7 +68,7 @@ class Shimmie2Extractor(BaseExtractor):
return "'"
INSTANCES = {
BASE_PATTERN = Shimmie2Extractor.update({
"loudbooru": {
"root": "https://loudbooru.com",
"pattern": r"loudbooru\.com",
@ -97,9 +92,7 @@ INSTANCES = {
"root": "https://rule34hentai.net",
"pattern": r"rule34hentai\.net",
},
}
BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?"
}) + r"/(?:index\.php\?q=/?)?"
class Shimmie2TagExtractor(Shimmie2Extractor):

@ -15,7 +15,7 @@ class UrlshortenerExtractor(BaseExtractor):
basecategory = "urlshortener"
INSTANCES = {
BASE_PATTERN = UrlshortenerExtractor.update({
"bitly": {
"root": "https://bit.ly",
"pattern": r"bit\.ly",
@ -26,9 +26,7 @@ INSTANCES = {
"root": "https://t.co",
"pattern": r"t\.co",
},
}
BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES)
})
class UrlshortenerLinkExtractor(UrlshortenerExtractor):
@ -42,10 +40,7 @@ class UrlshortenerLinkExtractor(UrlshortenerExtractor):
self.id = match.group(match.lastindex)
def _init(self):
try:
self.headers = INSTANCES[self.category]["headers"]
except Exception:
self.headers = None
self.headers = self.config_instance("headers")
def items(self):
response = self.request(

@ -457,7 +457,7 @@ def build_extractor_list():
domains[category] = domain(extr)
else:
base = categories[extr.basecategory]
for category, root in extr.instances:
for category, root, info in extr.instances:
base[category].append(extr.subcategory)
if category not in domains:
if not root:

Loading…
Cancel
Save