diff --git a/CHANGELOG.md b/CHANGELOG.md index a1a53bf1..a652f561 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased +- Added support for: + - `slideshare` - https://www.slideshare.net/ ([#54](https://github.com/mikf/gallery-dl/issues/54)) - Added pool- and post-extractors for `sankaku` ## 1.1.0 - 2017-12-08 diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index a619d834..8b5cae01 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -66,6 +66,7 @@ Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searc Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sen Manga http://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/ Chapters, Manga +SlideShare https://www.slideshare.net/ Presentations Spectrum Nexus |http://www.thes-0| Chapters, Manga The /b/ Archive https://thebarchive.com/ Threads Tumblr https://www.tumblr.com/ Images from Users, Posts, Tag-Searches diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index 5d1248f9..41bf8651 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -9,7 +9,7 @@ """Extract images from https://www.slideshare.net/""" from .common import Extractor, Message -from .. import text +from .. import text, util class SlideshareExtractor(Extractor): @@ -17,14 +17,21 @@ class SlideshareExtractor(Extractor): category = "slideshare" subcategory = "presentation" directory_fmt = ["{category}", "{user}"] - filename_fmt = "{presentation}-{num}.{extension}" + filename_fmt = "{presentation}-{num:>02}.{extension}" pattern = [r"(?:https?://)?(?:www\.)?slideshare\.net/" - r"([^/]+)/([^/]+)"] + r"([^/?&#]+)/([^/?&#]+)"] test = [ - ("https://www.slideshare.net/Slideshare/get-started-with-slide-share", { + (("https://www.slideshare.net/" + "Slideshare/get-started-with-slide-share"), { "url": "23685fb9b94b32c77a547d45dc3a82fe7579ea18", "content": "ee54e54898778e92696a7afec3ffabdbd98eb0cc", }), + + # long title + (("https://www.slideshare.net/pragmaticsolutions/warum-sie-nicht-ihren" + "-mitarbeitenden-ndern-sollten-sondern-ihr-managementsystem"), { + "url": "cf70ca99f57f61affab47ebf8583eb564b21e3a7", + }), ] def __init__(self, match): @@ -32,7 +39,8 @@ class SlideshareExtractor(Extractor): self.user, self.presentation = match.groups() def items(self): - page = self.request("https://www.slideshare.net/" + self.user + "/" + self.presentation).text + page = self.request("https://www.slideshare.net/" + self.user + + "/" + self.presentation).text data = self.get_job_metadata(page) imgs = self.get_image_urls(page) data["count"] = len(imgs) @@ -43,17 +51,29 @@ class SlideshareExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" - metadata = {} - - text.extract_all(page, ( - ('title', '', ''), - ('description', ''), - ), values=metadata) + descr, pos = text.extract( + page, '', '', pos) + views, pos = text.extract( + page, '', 'views<', pos) + published, pos = text.extract( + page, '