diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 4553f8c3..2882ddf8 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -13,11 +13,11 @@ from .. import text import re class DeviantArtExtractor(AsynchronousExtractor): - + """Extract all works of an artist on deviantart""" category = "deviantart" directory_fmt = ["{category}", "{artist}"] filename_fmt = "{category}_{index}_{title}.{extension}" - pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"] + pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"] def __init__(self, match): AsynchronousExtractor.__init__(self) @@ -57,39 +57,36 @@ class DeviantArtExtractor(AsynchronousExtractor): def get_image_metadata(self, image): """Collect metadata for an image""" - match = self.extract_data(image, 'title', + tmatch = self.extract_data(image, 'title', r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in') - if image.startswith(" ismature"): - # adult image - url, _ = text.extract(image, 'href="', '"') - page = self.request(url).text - _ , pos = text.extract(page, ' class="dev-content-normal "', '') - url , pos = text.extract(page, ' src="', '"', pos) - index , pos = text.extract(page, ' data-embed-id="', '"', pos) - width , pos = text.extract(page, ' width="', '"', pos) - height, pos = text.extract(page, ' height="', '"', pos) + hmatch = self.extract_data(image, 'href', r'[^"]+-(\d+)') + + url, pos = text.extract(image, ' data-super-full-img="', '"', tmatch.end()) + if url: + width , pos = text.extract(image, ' data-super-full-width="', '"', pos) + height, pos = text.extract(image, ' data-super-full-height="', '"', pos) else: - # normal image - index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1) - url, pos = text.extract(image, ' data-super-full-img="', '"', match.end()) + url, pos = text.extract(image, ' data-super-img="', '"', pos) if url: - width , pos = text.extract(image, ' data-super-full-width="', '"', pos) - height, pos = text.extract(image, ' data-super-full-height="', '"', pos) - else: - url , pos = text.extract(image, ' data-super-img="', '"', pos) width , pos = text.extract(image, ' data-super-width="', '"', pos) height, pos = text.extract(image, ' data-super-height="', '"', pos) - data = { - "index": index, - "title": match.group(1), - "artist": match.group(2), - "date": match.group(3), + else: + page = self.request(hmatch.group(0)).text + _ , pos = text.extract(page, ' class="dev-content-normal "', '') + url , pos = text.extract(page, ' src="', '"', pos) + width , pos = text.extract(page, ' width="', '"', pos) + height, pos = text.extract(page, ' height="', '"', pos) + return url, text.nameext_from_url(url, { + "index": hmatch.group(1), + "title": text.unescape(tmatch.group(1)), + "artist": tmatch.group(2), + "date": tmatch.group(3), "width": width, "height": height, - } - return url, text.nameext_from_url(url, data) + }) @staticmethod def extract_data(txt, attr, pattern): + """Extract a HTML attribute and apply a regex to it""" txt, _ = text.extract(txt, ' %s="' % attr, '"') return re.match(pattern, txt)