match user profile handling on deviantart and newgrounds

pull/511/head
Mike Fährmann 5 years ago
parent ea80dadd09
commit d45fabb79d
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -133,7 +133,7 @@ extractor.*.path-remove
-----------------------
=========== =====
Type ``string``
Default ``"\\u0000-\\u001f\\u007f"`` (ASCII control characters)
Default ``"\u0000-\u001f\u007f"`` (ASCII control characters)
Description Set of characters to remove from generated path names.
Note: In a set with 2 or more characters, ``[]^-\`` need to be
@ -551,16 +551,17 @@ Description Provide a ``folders`` metadata field that contains the names of all
extractor.deviantart.include
----------------------------
=========== =====
Type ``list`` of ``strings`` or ``string``
Default ``["gallery"]``
Description Selects the subcategories to include when processing a user profile.
Type ``string`` or ``list`` of ``strings``
Default ``"gallery"``
Example * ``"favorite,journal,scraps"``
* ``["favorite", "journal", "scraps"]``
Description A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are ``"gallery"``, ``"scraps"``, ``"journal"``,
``"favorite"``.
Possible values are
``"gallery"``, ``"scraps"``, ``"journal"``, ``"favorite"``.
It is also possible to use a string with the initial character of
each subcategory, i.e. ``"gsj"`` for
``["gallery", "scraps", "journal"]``
You can use ``"all"`` instead of listing all values separately.
=========== =====
@ -782,6 +783,23 @@ Description Controls how to handle redirects to CAPTCHA pages.
=========== =====
extractor.newgrounds.include
----------------------------
=========== =====
Type ``string`` or ``list`` of ``strings``
Default ``"art"``
Example * ``"movies,audio"``
* ``["movies", "audio", "scraps"]``
Description A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
``"art"``, ``"audio"``, ``"movies"``.
You can use ``"all"`` instead of listing all values separately.
=========== =====
extractor.oauth.browser
-----------------------
=========== =====

@ -254,6 +254,26 @@ class Extractor():
fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
return get("date-min", dmin), get("date-max", dmax)
def _dispatch_extractors(self, extractor_data, default=()):
""" """
extractors = {
data[0].subcategory: data
for data in extractor_data
}
include = self.config("include", default) or ()
if include == "all":
include = extractors
elif isinstance(include, str):
include = include.split(",")
result = [(Message.Version, 1)]
for category in include:
if category in extractors:
extr, url = extractors[category]
result.append((Message.Queue, url, {"_extractor": extr}))
return iter(result)
@classmethod
def _get_tests(cls):
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""

@ -265,46 +265,30 @@ class DeviantartExtractor(Extractor):
content.update(download)
class DeviantartUserExtractor(Extractor):
class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
category = "deviantart"
subcategory = "user"
pattern = BASE_PATTERN + r"/?$"
test = (
("https://www.deviantart.com/shimoda7", {
"options": (("include", "gsjf"),),
"pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)",
"pattern": r"/shimoda7/gallery$",
}),
("https://www.deviantart.com/shimoda7", {
"options": (("include", "all"),),
"pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)$",
"count": 4,
}),
("https://shimoda7.deviantart.com/"),
)
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1) or match.group(2)
incl = self.config("include") or "g"
if isinstance(incl, list):
incl = "".join(item[0] for item in incl if item)
self.include = incl.lower()
def items(self):
base = "https://www.deviantart.com/{}/".format(self.user)
incl = self.include
data = {}
if "g" in incl:
data["_extractor"] = DeviantartGalleryExtractor
yield Message.Queue, base + "gallery", data
if "s" in incl:
data["_extractor"] = DeviantartScrapsExtractor
yield Message.Queue, base + "gallery/scraps", data
if "j" in incl:
data["_extractor"] = DeviantartJournalExtractor
yield Message.Queue, base + "posts", data
if "f" in incl:
data["_extractor"] = DeviantartFavoriteExtractor
yield Message.Queue, base + "favourites", data
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((
(DeviantartGalleryExtractor , base + "gallery"),
(DeviantartScrapsExtractor , base + "gallery/scraps"),
(DeviantartJournalExtractor , base + "posts"),
(DeviantartFavoriteExtractor, base + "favourites"),
), ("gallery",))
class DeviantartGalleryExtractor(DeviantartExtractor):

@ -319,7 +319,6 @@ class NewgroundsUserExtractor(NewgroundsExtractor):
test = (
("https://tomfulp.newgrounds.com", {
"pattern": "https://tomfulp.newgrounds.com/art$",
"count": 1,
}),
("https://tomfulp.newgrounds.com", {
"options": (("include", "all"),),
@ -329,22 +328,9 @@ class NewgroundsUserExtractor(NewgroundsExtractor):
)
def items(self):
data = {}
extr_map = {
"art": NewgroundsArtExtractor,
"audio": NewgroundsAudioExtractor,
"movies": NewgroundsMoviesExtractor,
}
include = self.config("include", ("art",)) or ()
if include == "all":
include = extr_map.keys()
elif isinstance(include, str):
include = include.split(",")
yield Message.Version, 1
for category in include:
if category in extr_map:
url = self.user_root + "/" + category
data["_extractor"] = extr_map[category]
yield Message.Queue, url, data
base = self.user_root + "/"
return self._dispatch_extractors((
(NewgroundsArtExtractor , base + "art"),
(NewgroundsAudioExtractor , base + "audio"),
(NewgroundsMoviesExtractor, base + "movies"),
), ("art",))

Loading…
Cancel
Save