From de14b7ad7ad0d8f61abaaa6c46c1cfc05a93bb9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?=
Date: Thu, 15 Apr 2021 17:15:59 +0200
Subject: [PATCH] [slideshare] fix extraction
---
gallery_dl/extractor/slideshare.py | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 0b970cc3..15dbb859 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.slideshare.net/"""
+"""Extractors for https://www.slideshare.net/"""
from .common import Extractor, Message
from .. import text
@@ -58,15 +58,16 @@ class SlidesharePresentationExtractor(Extractor):
"""Collect metadata for extractor-job"""
descr, pos = text.extract(
page, '', '', pos)
+ category, pos = text.extract(
+ page, '', '
', pos)
views, pos = text.extract(
- page, '', 'views<', pos)
+ page, '', '
', pos)
published, pos = text.extract(
- page, '
', pos)
+ page, '', '
', pos)
if descr.endswith("…") and alt_descr:
descr = text.remove_html(alt_descr).strip()
@@ -76,8 +77,9 @@ class SlidesharePresentationExtractor(Extractor):
"presentation": self.presentation,
"title": text.unescape(title.strip()),
"description": text.unescape(descr),
- "views": text.parse_int(views.replace(",", "")),
- "published": published,
+ "views": text.parse_int(views.rpartition(
+ " views")[0].replace(",", "")),
+ "published": published.strip(),
}
@staticmethod