From 1317625ec42ef532759cd7dc60dc888f76279ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 14 Dec 2022 19:59:07 +0100 Subject: [PATCH] [webmshare] add 'video' extractor (#2410) --- docs/supportedsites.md | 6 +++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/webmshare.py | 84 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + 4 files changed, 92 insertions(+) create mode 100644 gallery_dl/extractor/webmshare.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 77688da4..55252789 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -889,6 +889,12 @@ Consider all sites to be NSFW unless otherwise known. Favorites, Folders, Journals, Submissions API Key + + webmshare + https://webmshare.com/ + Videos + + Webtoon https://www.webtoons.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 4d170687..383c76b3 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -151,6 +151,7 @@ modules = [ "wallpapercave", "warosu", "weasyl", + "webmshare", "webtoons", "weibo", "wikiart", diff --git a/gallery_dl/extractor/webmshare.py b/gallery_dl/extractor/webmshare.py new file mode 100644 index 00000000..b0384259 --- /dev/null +++ b/gallery_dl/extractor/webmshare.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://webmshare.com/""" + +from .common import Extractor, Message +from .. import text + + +class WebmshareVideoExtractor(Extractor): + """Extractor for webmshare videos""" + category = "webmshare" + subcategory = "video" + root = "https://webmshare.com" + filename_fmt = "{id}{title:? //}.{extension}" + archive_fmt = "{id}" + pattern = (r"(?:https?://)?(?:s\d+\.)?webmshare\.com" + r"/(?:play/|download-webm/)?(\w{3,})") + test = ( + ("https://webmshare.com/O9mWY", { + "keyword": { + "date": "dt:2022-12-04 00:00:00", + "extension": "webm", + "filename": "O9mWY", + "height": 568, + "id": "O9mWY", + "thumb": "https://s1.webmshare.com/t/O9mWY.jpg", + "title": "Yeah buddy over here", + "url": "https://s1.webmshare.com/O9mWY.webm", + "views": int, + "width": 320, + }, + }), + ("https://s1.webmshare.com/zBGAg.webm", { + "keyword": { + "date": "dt:2018-12-07 00:00:00", + "height": 1080, + "id": "zBGAg", + "thumb": "https://s1.webmshare.com/t/zBGAg.jpg", + "title": "", + "url": "https://s1.webmshare.com/zBGAg.webm", + "views": int, + "width": 1920, + }, + }), + ("https://webmshare.com/play/zBGAg"), + ("https://webmshare.com/download-webm/zBGAg"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.video_id = match.group(1) + + def items(self): + url = "{}/{}".format(self.root, self.video_id) + extr = text.extract_from(self.request(url).text) + + data = { + "title": text.unescape(extr( + 'property="og:title" content="', '"').rpartition(" — ")[0]), + "thumb": "https:" + extr('property="og:image" content="', '"'), + "url" : "https:" + extr('property="og:video" content="', '"'), + "width": text.parse_int(extr( + 'property="og:video:width" content="', '"')), + "height": text.parse_int(extr( + 'property="og:video:height" content="', '"')), + "date" : text.parse_datetime(extr( + "Added ", "<"), "%B %d, %Y"), + "views": text.parse_int(extr('glyphicon-eye-open">', '<')), + "id" : self.video_id, + "filename" : self.video_id, + "extension": "webm", + } + + if data["title"] == "webmshare": + data["title"] = "" + + yield Message.Directory, data + yield Message.Url, data["url"], data diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 71e31f71..0bcf0e4f 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -114,6 +114,7 @@ CATEGORY_MAP = { "vk" : "VK", "vsco" : "VSCO", "wallpapercave" : "Wallpaper Cave", + "webmshare" : "webmshare", "webtoons" : "Webtoon", "wikiart" : "WikiArt.org", "xbunkr" : "xBunkr",