diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index bea457f4..3521298f 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -30,21 +30,20 @@ class SlidesharePresentationExtractor(GalleryExtractor): "count": 19, "content": "2b6a191eab60b3978fdacfecf2da302dd45bc108", "keyword": { - "comments": "0", "description": "Get Started with SlideShare - " "A Beginngers Guide for Creators", - "likes": r"re:\d{3,}", + "likes": int, "presentation": "get-started-with-slide-share", - "published": "dt:2015-05-20 00:00:00", + "date": "dt:2015-05-20 17:38:21", "title": "Getting Started With SlideShare", "user": "Slideshare", - "views": r"re:\d{7,}", + "views": int, }, }), # long title and description (("https://www.slideshare.net/pragmaticsolutions/warum-sie-nicht-ihren" "-mitarbeitenden-ndern-sollten-sondern-ihr-managementsystem"), { - "url": "cf70ca99f57f61affab47ebf8583eb564b21e3a7", + "url": "d8952260f8bec337dd809a958ec8091350393f6b", "keyword": { "title": "Warum Sie nicht Ihren Mitarbeitenden ändern " "sollten, sondern Ihr Managementsystem", @@ -58,7 +57,7 @@ class SlidesharePresentationExtractor(GalleryExtractor): # mobile URL (("https://www.slideshare.net" "/mobile/uqudent/introduction-to-fixed-prosthodontics"), { - "url": "43eda2adf4dd221a251c8df794dfb82649e94647", + "url": "72c431cb1eccbb6794f608ecbbc01d52e8768159", }), ) @@ -69,43 +68,31 @@ class SlidesharePresentationExtractor(GalleryExtractor): GalleryExtractor.__init__(self, match, url) def metadata(self, page): - extr = text.extract_from(page) - descr = extr('', '') - published = extr('
', '
') - - if descr.endswith("…"): - alt_descr = extr('slideshow-description-text"', '

') - if alt_descr: - descr = text.remove_html(alt_descr.partition(">")[2]).strip() + data = util.json_loads(text.extr( + page, 'id="__NEXT_DATA__" type="application/json">', '')) + self.slideshow = slideshow = data["props"]["pageProps"]["slideshow"] return { - "user": self.user, + "user" : slideshow["username"], "presentation": self.presentation, - "title": text.unescape(title.strip()), - "description": text.unescape(descr), - "views": views, - "likes": likes, - "comments": comments, - "published": text.parse_datetime( - published.strip(), "%b. %d, %Y"), + "title" : slideshow["title"].strip(), + "description" : slideshow["description"].strip(), + "views" : slideshow["views"], + "likes" : slideshow["likes"], + "date" : text.parse_datetime( + slideshow["createdAt"], "%Y-%m-%d %H:%M:%S %Z"), } - @staticmethod - def images(page): - data = util.json_loads(text.extract( - page, "xtend(true, slideshare_object.slideshow_config, ", ");")[0]) + def images(self, page): + parts = self.slideshow["slideImages"][0]["baseUrl"].split("/") - # useing 'stripped_title' here is technically wrong, but it works all - # the same, slideshare doesn't seem to care what characters go there - begin = "https://image.slidesharecdn.com/{}/95/{}-".format( - data["ppt_location"], data["stripped_title"]) - end = "-1024.jpg?cb=" + str(data["timestamp"]) + begin = "{}/95/{}-".format( + "/".join(parts[:4]), + self.slideshow["strippedTitle"], + ) + end = "-1024.jpg?" + parts[-1].rpartition("?")[2] return [ (begin + str(n) + end, None) - for n in range(1, data["slide_count"]+1) + for n in range(1, self.slideshow["totalSlides"]+1) ]