[myportfolio] fix extraction

pull/1529/head
Mike Fährmann 3 years ago
parent 8b22d4e667
commit d108421461
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2019 Mike Fährmann
# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -51,9 +51,11 @@ class MyportfolioGalleryExtractor(Extractor):
self.prefix = "myportfolio:" if domain1 else ""
def items(self):
yield Message.Version, 1
url = "https://" + self.domain + (self.path or "")
page = self.request(url).text
response = self.request(url)
if response.history and response.url.endswith(".adobe.com/missing"):
raise exception.NotFoundError()
page = response.text
projects = text.extract(
page, '<section class="project-covers', '</section>')[0]
@ -78,12 +80,12 @@ class MyportfolioGalleryExtractor(Extractor):
# <user> and <title> can contain a "-" as well, so we get the title
# from somewhere else and cut that amount from the og:title content
user, pos = text.extract(
page, 'property=og:title content="', '"')
desc, pos = text.extract(
page, 'property=og:description content="', '"', pos)
title, pos = text.extract(
page, '<h1 ', '</h1>', pos)
extr = text.extract_from(page)
user = extr('property="og:title" content="', '"') or \
extr('property=og:title content="', '"')
descr = extr('property="og:description" content="', '"') or \
extr('property=og:description content="', '"')
title = extr('<h1 ', '</h1>')
if title:
title = title.partition(">")[2]
@ -96,7 +98,7 @@ class MyportfolioGalleryExtractor(Extractor):
return {
"user": text.unescape(user),
"title": text.unescape(title),
"description": text.unescape(desc or ""),
"description": text.unescape(descr),
}
@staticmethod

Loading…
Cancel
Save