[webmshare] add 'video' extractor (#2410)

2 years ago · 1317625ec4
parent 90a9c0790f
commit 1317625ec4
4 changed files with 92 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -889,6 +889,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Favorites, Folders, Journals, Submissions</td>
    <td><a href="configuration.rst#extractorweasylapi-key">API Key</a></td>
 </tr>
+<tr>
+    <td>webmshare</td>
+    <td>https://webmshare.com/</td>
+    <td>Videos</td>
+    <td></td>
+</tr>
 <tr>
    <td>Webtoon</td>
    <td>https://www.webtoons.com/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -151,6 +151,7 @@ modules = [
    "wallpapercave",
    "warosu",
    "weasyl",
+    "webmshare",
    "webtoons",
    "weibo",
    "wikiart",
--- a/gallery_dl/extractor/webmshare.py
+++ b/gallery_dl/extractor/webmshare.py
@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://webmshare.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class WebmshareVideoExtractor(Extractor):
+    """Extractor for webmshare videos"""
+    category = "webmshare"
+    subcategory = "video"
+    root = "https://webmshare.com"
+    filename_fmt = "{id}{title:? //}.{extension}"
+    archive_fmt = "{id}"
+    pattern = (r"(?:https?://)?(?:s\d+\.)?webmshare\.com"
+               r"/(?:play/|download-webm/)?(\w{3,})")
+    test = (
+        ("https://webmshare.com/O9mWY", {
+            "keyword": {
+                "date": "dt:2022-12-04 00:00:00",
+                "extension": "webm",
+                "filename": "O9mWY",
+                "height": 568,
+                "id": "O9mWY",
+                "thumb": "https://s1.webmshare.com/t/O9mWY.jpg",
+                "title": "Yeah buddy over here",
+                "url": "https://s1.webmshare.com/O9mWY.webm",
+                "views": int,
+                "width": 320,
+            },
+        }),
+        ("https://s1.webmshare.com/zBGAg.webm", {
+            "keyword": {
+                "date": "dt:2018-12-07 00:00:00",
+                "height": 1080,
+                "id": "zBGAg",
+                "thumb": "https://s1.webmshare.com/t/zBGAg.jpg",
+                "title": "",
+                "url": "https://s1.webmshare.com/zBGAg.webm",
+                "views": int,
+                "width": 1920,
+            },
+        }),
+        ("https://webmshare.com/play/zBGAg"),
+        ("https://webmshare.com/download-webm/zBGAg"),
+    )
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.video_id = match.group(1)
+
+    def items(self):
+        url = "{}/{}".format(self.root, self.video_id)
+        extr = text.extract_from(self.request(url).text)
+
+        data = {
+            "title": text.unescape(extr(
+                'property="og:title" content="', '"').rpartition(" — ")[0]),
+            "thumb": "https:" + extr('property="og:image" content="', '"'),
+            "url"  : "https:" + extr('property="og:video" content="', '"'),
+            "width": text.parse_int(extr(
+                'property="og:video:width" content="', '"')),
+            "height": text.parse_int(extr(
+                'property="og:video:height" content="', '"')),
+            "date" : text.parse_datetime(extr(
+                "<small>Added ", "<"), "%B %d, %Y"),
+            "views": text.parse_int(extr('glyphicon-eye-open"></span>', '<')),
+            "id"       : self.video_id,
+            "filename" : self.video_id,
+            "extension": "webm",
+        }
+
+        if data["title"] == "webmshare":
+            data["title"] = ""
+
+        yield Message.Directory, data
+        yield Message.Url, data["url"], data
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -114,6 +114,7 @@ CATEGORY_MAP = {
    "vk"             : "VK",
    "vsco"           : "VSCO",
    "wallpapercave"  : "Wallpaper Cave",
+    "webmshare"      : "webmshare",
    "webtoons"       : "Webtoon",
    "wikiart"        : "WikiArt.org",
    "xbunkr"         : "xBunkr",