[deviantart] add 'external' option (#302)

If a description is available, this will extract URLs from the
description text and try to find Extractors for them.
pull/359/head
Mike Fährmann 5 years ago
parent f85e42cffc
commit 2fb85178da
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -395,6 +395,18 @@ Description Try to follow external URLs of embedded players.
=========== =====
extractor.deviantart.external
-----------------------------
=========== =====
Type ``bool``
Default ``false``
Description Try to follow external URLs in description fields.
Note: deviantart.metadata_ needs to be enabled to make descriptions
available.
=========== =====
extractor.deviantart.flat
-------------------------
=========== =====
@ -1586,6 +1598,7 @@ Description An object with the ``name`` of a post-processor and its options.
.. _skipped: `extractor.*.skip`_
.. _`date-min and date-max`: `extractor.reddit.date-min & .date-max`_
.. _date-format: extractor.reddit.date-format_
.. _deviantart.metadata: extractor.deviantart.metadata_
.. _.netrc: https://stackoverflow.com/tags/.netrc/info
.. _tempfile.gettempdir(): https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir

@ -22,6 +22,7 @@
"deviantart":
{
"refresh-token": null,
"external": false,
"flat": true,
"folders": false,
"journals": "html",

@ -39,6 +39,7 @@ class DeviantartExtractor(Extractor):
self.offset = 0
self.flat = self.config("flat", True)
self.original = self.config("original", True)
self.external = self.config("external", False)
self.user = match.group(1) or match.group(2)
self.group = False
@ -95,6 +96,13 @@ class DeviantartExtractor(Extractor):
journal = self.api.deviation_content(deviation["deviationid"])
yield self.commit_journal(deviation, journal)
if self.external:
for url in text.extract_iter(
deviation.get("description", ""), 'href="', '"'):
if "deviantart.com/users/outgoing?" in url:
url = text.unquote(url.partition("?")[2])
yield Message.Queue, url, deviation
def deviations(self):
"""Return an iterable containing all relevant Deviation-objects"""
return []
@ -361,6 +369,14 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
r"/f/[^/]+/[^.]+\.gif\?token="),
}),
# external URLs from description (#302)
(("https://www.deviantart.com/uotapo/art/"
"INANAKI-Memorial-Humane7-590297498"), {
"options": (("external", 1), ("metadata", 1), ("original", 0)),
"pattern": r"https?://(sta\.sh|youtu\.be)/\w+$",
"range": "2-",
"count": 6,
}),
# old-style URLs
("https://shimoda7.deviantart.com"
"/art/For-the-sake-of-a-memory-10073852"),

Loading…
Cancel
Save