|
|
|
@ -23,18 +23,24 @@ class MyportfolioGalleryExtractor(Extractor):
|
|
|
|
|
r"(?:https?://)?([^.]+\.myportfolio\.com))"
|
|
|
|
|
r"(/[^/?&#]+)?")
|
|
|
|
|
test = (
|
|
|
|
|
("https://hannahcosgrove.myportfolio.com/niamh-1", {
|
|
|
|
|
"url": "8cbd73a73e5bf3b4f5d1b1d4a1eb114c01a72a66",
|
|
|
|
|
"keyword": "7a460bb5641e648ae70702ff91c2fb11054b0e0b",
|
|
|
|
|
("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", {
|
|
|
|
|
"url": "acea0690c76db0e5cf267648cefd86e921bc3499",
|
|
|
|
|
"keyword": "6ac6befe2ee0af921d24cf1dd4a4ed71be06db6d",
|
|
|
|
|
}),
|
|
|
|
|
("https://hannahcosgrove.myportfolio.com/lfw", {
|
|
|
|
|
"pattern": r"https://hannahcosgrove\.myportfolio\.com/[^/?&#+]+$",
|
|
|
|
|
"count": ">= 8",
|
|
|
|
|
("https://andrewling.myportfolio.com/", {
|
|
|
|
|
"pattern": r"https://andrewling\.myportfolio\.com/[^/?&#+]+$",
|
|
|
|
|
"count": ">= 6",
|
|
|
|
|
}),
|
|
|
|
|
# no explicit title
|
|
|
|
|
("https://stevenilousphotography.myportfolio.com/society", {
|
|
|
|
|
"keyword": "49e7ff6322645c22b409280656202c2736a380c9",
|
|
|
|
|
}),
|
|
|
|
|
# custom domain
|
|
|
|
|
("myportfolio:https://tooco.com.ar/6-of-diamonds-paradise-bird", {
|
|
|
|
|
"count": 3,
|
|
|
|
|
}),
|
|
|
|
|
("myportfolio:https://tooco.com.ar/", {
|
|
|
|
|
"pattern": pattern,
|
|
|
|
|
"count": ">= 40",
|
|
|
|
|
}),
|
|
|
|
|
)
|
|
|
|
@ -80,8 +86,11 @@ class MyportfolioGalleryExtractor(Extractor):
|
|
|
|
|
title, pos = text.extract(
|
|
|
|
|
page, '<h1 ', '</h1>', pos)
|
|
|
|
|
|
|
|
|
|
title = title.partition(">")[2]
|
|
|
|
|
user = user[:-len(title)-3]
|
|
|
|
|
if title:
|
|
|
|
|
title = title.partition(">")[2]
|
|
|
|
|
user = user[:-len(title)-3]
|
|
|
|
|
else:
|
|
|
|
|
user, _, title = user.partition(" - ")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"user": text.unescape(user),
|
|
|
|
|