[deviantart] allow selecting source for 'extra' (#1356)

Setting 'extra' to "stash" or "deviations" will only download embedded
sta.sh content or deviations. 'true' still downloads both.
pull/1374/head
Mike Fährmann 4 years ago
parent a677123abb
commit 5c32a7bf58
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -745,14 +745,18 @@ Description
extractor.deviantart.extra
--------------------------
Type
``bool``
``bool`` or ``string``
Default
``false``
Description
Download embedded Deviations and Sta.sh resources from
description texts and journals.
Note: Enabling this option also enables deviantart.metadata_.
Set this option to ``"stash"`` or ``"deviations"``
to select only one of them as a source.
Note: Enabling this option also enables
`deviantart.metadata <extractor.deviantart.metadata_>`_.
extractor.deviantart.flat
@ -2760,7 +2764,6 @@ Description
.. _base-directory: `extractor.*.base-directory`_
.. _date-format: `extractor.*.date-format`_
.. _deviantart.metadata: `extractor.deviantart.metadata`_
.. _postprocessors: `extractor.*.postprocessors`_
.. _download archive: `extractor.*.archive`_

@ -78,9 +78,18 @@ class DeviantartExtractor(Extractor):
else:
self.user = profile["user"]["username"]
if self.extra:
finditer_stash = DeviantartStashExtractor.pattern.finditer
finditer_deviation = DeviantartDeviationExtractor.pattern.finditer
extra = self.extra
if extra:
if extra == "stash":
extra_stash = DeviantartStashExtractor.pattern.finditer
extra_deviation = None
elif extra == "deviations":
extra_deviation = DeviantartDeviationExtractor.pattern.finditer
extra_stash = None
else:
extra_stash = DeviantartStashExtractor.pattern.finditer
extra_deviation = DeviantartDeviationExtractor.pattern.finditer
extra = True
yield Message.Version, 1
for deviation in self.deviations():
@ -131,21 +140,23 @@ class DeviantartExtractor(Extractor):
if "excerpt" in deviation and self.commit_journal:
journal = self.api.deviation_content(deviation["deviationid"])
if self.extra:
if extra:
deviation["_journal"] = journal["html"]
yield self.commit_journal(deviation, journal)
if self.extra:
if extra:
txt = (deviation.get("description", "") +
deviation.get("_journal", ""))
for match in finditer_stash(txt):
url = text.ensure_http_scheme(match.group(0))
deviation["_extractor"] = DeviantartStashExtractor
yield Message.Queue, url, deviation
for match in finditer_deviation(txt):
url = text.ensure_http_scheme(match.group(0))
deviation["_extractor"] = DeviantartDeviationExtractor
yield Message.Queue, url, deviation
if extra_stash:
for match in extra_stash(txt):
url = text.ensure_http_scheme(match.group(0))
deviation["_extractor"] = DeviantartStashExtractor
yield Message.Queue, url, deviation
if extra_deviation:
for match in extra_deviation(txt):
url = text.ensure_http_scheme(match.group(0))
deviation["_extractor"] = DeviantartDeviationExtractor
yield Message.Queue, url, deviation
def deviations(self):
"""Return an iterable containing all relevant Deviation-objects"""

Loading…
Cancel
Save