[myportfolio] fix extraction

3 years ago · d108421461
parent 8b22d4e667
commit d108421461
1 changed files with 12 additions and 10 deletions
--- a/gallery_dl/extractor/myportfolio.py
+++ b/gallery_dl/extractor/myportfolio.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2021 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@ -51,9 +51,11 @@ class MyportfolioGalleryExtractor(Extractor):
        self.prefix = "myportfolio:" if domain1 else ""
    def items(self):
        yield Message.Version, 1
        url = "https://" + self.domain + (self.path or "")
-        page = self.request(url).text
+        response = self.request(url)
        if response.history and response.url.endswith(".adobe.com/missing"):
            raise exception.NotFoundError()
        page = response.text
        projects = text.extract(
            page, '<section class="project-covers', '</section>')[0]
@ -78,12 +80,12 @@ class MyportfolioGalleryExtractor(Extractor):
        # <user> and <title> can contain a "-" as well, so we get the title
        # from somewhere else and cut that amount from the og:title content
-        user, pos = text.extract(
+        extr = text.extract_from(page)
-            page, 'property=og:title content="', '"')
+        user = extr('property="og:title" content="', '"') or \
-        desc, pos = text.extract(
+            extr('property=og:title content="', '"')
-            page, 'property=og:description content="', '"', pos)
+        descr = extr('property="og:description" content="', '"') or \
-        title, pos = text.extract(
+            extr('property=og:description content="', '"')
-            page, '<h1 ', '</h1>', pos)
+        title = extr('<h1 ', '</h1>')
        if title:
            title = title.partition(">")[2]
@ -96,7 +98,7 @@ class MyportfolioGalleryExtractor(Extractor):
        return {
            "user": text.unescape(user),
            "title": text.unescape(title),
-            "description": text.unescape(desc or ""),
+            "description": text.unescape(descr),
        }
    @staticmethod