[luscious] fix tag extraction

pull/40/head
Mike Fährmann 7 years ago
parent b8862ff15e
commit f98e3e8002
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -290,7 +290,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
pattern = [r"(?:https?://)?([^.]+)\.deviantart\.com" pattern = [r"(?:https?://)?([^.]+)\.deviantart\.com"
r"/favourites/(\d+)/([^/?&#]+)"] r"/favourites/(\d+)/([^/?&#]+)"]
test = [("http://rosuuri.deviantart.com/favourites/58951174/Useful", { test = [("http://rosuuri.deviantart.com/favourites/58951174/Useful", {
"url": "22a3858a1efb150d11c3f4e63cf9082ad70c6ea0", "url": "f0c12581060aab9699289817b39804d9eb88f675",
"keyword": "e0ed920fb3dfdad9294be592be2eeb3dc1258a6a", "keyword": "e0ed920fb3dfdad9294be592be2eeb3dc1258a6a",
})] })]

@ -51,8 +51,8 @@ class LusciousAlbumExtractor(AsynchronousExtractor):
def get_metadata(self, page): def get_metadata(self, page):
"""Collect metadata for extractor-job""" """Collect metadata for extractor-job"""
data = text.extract_all(page, ( data = text.extract_all(page, (
("title" , '"og:title" content="', '"'),
("tags" , '<meta name="keywords" content="', '"'), ("tags" , '<meta name="keywords" content="', '"'),
("title" , '"og:title" content="', '"'),
(None , '<li class="user_info">', ''), (None , '<li class="user_info">', ''),
("count" , '<p>', ' '), ("count" , '<p>', ' '),
(None , '<p>Section:', ''), (None , '<p>Section:', ''),

@ -66,7 +66,7 @@ skip = [
"exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie",
"archivedmoe", "archiveofsins", "thebarchive", "archivedmoe", "archiveofsins", "thebarchive",
# temporary issues # temporary issues
"luscious", # ssl cert issues "imgtrex", # 504
] ]
# enable selective testing for direct calls # enable selective testing for direct calls
if __name__ == '__main__' and len(sys.argv) > 1: if __name__ == '__main__' and len(sys.argv) > 1:

Loading…
Cancel
Save