[deviantart] fix issue with small images

9 years ago · e4a661fd6b
parent 3ebd126b35
commit e4a661fd6b
1 changed files with 23 additions and 26 deletions
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@ -13,11 +13,11 @@ from .. import text
 import re
 class DeviantArtExtractor(AsynchronousExtractor):
-
+    """Extract all works of an artist on deviantart"""
    category = "deviantart"
    directory_fmt = ["{category}", "{artist}"]
    filename_fmt = "{category}_{index}_{title}.{extension}"
-    pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"]
+    pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"]
    def __init__(self, match):
        AsynchronousExtractor.__init__(self)
@ -57,39 +57,36 @@ class DeviantArtExtractor(AsynchronousExtractor):
    def get_image_metadata(self, image):
        """Collect metadata for an image"""
-        match = self.extract_data(image, 'title',
+        tmatch = self.extract_data(image, 'title',
            r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in')
-        if image.startswith(" ismature"):
+        hmatch = self.extract_data(image, 'href', r'[^"]+-(\d+)')
-            # adult image
+
-            url, _ = text.extract(image, 'href="', '"')
+        url, pos = text.extract(image, ' data-super-full-img="', '"', tmatch.end())
            page = self.request(url).text
            _     , pos = text.extract(page, ' class="dev-content-normal "', '')
            url   , pos = text.extract(page, ' src="', '"', pos)
            index , pos = text.extract(page, ' data-embed-id="', '"', pos)
            width , pos = text.extract(page, ' width="', '"', pos)
            height, pos = text.extract(page, ' height="', '"', pos)
        else:
            # normal image
            index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1)
            url, pos = text.extract(image, ' data-super-full-img="', '"', match.end())
        if url:
            width , pos = text.extract(image, ' data-super-full-width="', '"', pos)
            height, pos = text.extract(image, ' data-super-full-height="', '"', pos)
        else:
            url, pos = text.extract(image, ' data-super-img="', '"', pos)
            if url:
                width , pos = text.extract(image, ' data-super-width="', '"', pos)
                height, pos = text.extract(image, ' data-super-height="', '"', pos)
-        data = {
+            else:
-            "index": index,
+                page = self.request(hmatch.group(0)).text
-            "title": match.group(1),
+                _     , pos = text.extract(page, ' class="dev-content-normal "', '')
-            "artist": match.group(2),
+                url   , pos = text.extract(page, ' src="', '"', pos)
-            "date": match.group(3),
+                width , pos = text.extract(page, ' width="', '"', pos)
                height, pos = text.extract(page, ' height="', '"', pos)
        return url, text.nameext_from_url(url, {
            "index": hmatch.group(1),
            "title": text.unescape(tmatch.group(1)),
            "artist": tmatch.group(2),
            "date": tmatch.group(3),
            "width": width,
            "height": height,
-        }
+        })
        return url, text.nameext_from_url(url, data)
    @staticmethod
    def extract_data(txt, attr, pattern):
        """Extract a HTML attribute and apply a regex to it"""
        txt, _ = text.extract(txt, ' %s="' % attr, '"')
        return re.match(pattern, txt)