[webmshare] add 'video' extractor (#2410)

pull/3414/head
Mike Fährmann 2 years ago
parent 90a9c0790f
commit 1317625ec4
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -889,6 +889,12 @@ Consider all sites to be NSFW unless otherwise known.
<td>Favorites, Folders, Journals, Submissions</td>
<td><a href="configuration.rst#extractorweasylapi-key">API Key</a></td>
</tr>
<tr>
<td>webmshare</td>
<td>https://webmshare.com/</td>
<td>Videos</td>
<td></td>
</tr>
<tr>
<td>Webtoon</td>
<td>https://www.webtoons.com/</td>

@ -151,6 +151,7 @@ modules = [
"wallpapercave",
"warosu",
"weasyl",
"webmshare",
"webtoons",
"weibo",
"wikiart",

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
# Copyright 2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://webmshare.com/"""
from .common import Extractor, Message
from .. import text
class WebmshareVideoExtractor(Extractor):
"""Extractor for webmshare videos"""
category = "webmshare"
subcategory = "video"
root = "https://webmshare.com"
filename_fmt = "{id}{title:? //}.{extension}"
archive_fmt = "{id}"
pattern = (r"(?:https?://)?(?:s\d+\.)?webmshare\.com"
r"/(?:play/|download-webm/)?(\w{3,})")
test = (
("https://webmshare.com/O9mWY", {
"keyword": {
"date": "dt:2022-12-04 00:00:00",
"extension": "webm",
"filename": "O9mWY",
"height": 568,
"id": "O9mWY",
"thumb": "https://s1.webmshare.com/t/O9mWY.jpg",
"title": "Yeah buddy over here",
"url": "https://s1.webmshare.com/O9mWY.webm",
"views": int,
"width": 320,
},
}),
("https://s1.webmshare.com/zBGAg.webm", {
"keyword": {
"date": "dt:2018-12-07 00:00:00",
"height": 1080,
"id": "zBGAg",
"thumb": "https://s1.webmshare.com/t/zBGAg.jpg",
"title": "",
"url": "https://s1.webmshare.com/zBGAg.webm",
"views": int,
"width": 1920,
},
}),
("https://webmshare.com/play/zBGAg"),
("https://webmshare.com/download-webm/zBGAg"),
)
def __init__(self, match):
Extractor.__init__(self, match)
self.video_id = match.group(1)
def items(self):
url = "{}/{}".format(self.root, self.video_id)
extr = text.extract_from(self.request(url).text)
data = {
"title": text.unescape(extr(
'property="og:title" content="', '"').rpartition("")[0]),
"thumb": "https:" + extr('property="og:image" content="', '"'),
"url" : "https:" + extr('property="og:video" content="', '"'),
"width": text.parse_int(extr(
'property="og:video:width" content="', '"')),
"height": text.parse_int(extr(
'property="og:video:height" content="', '"')),
"date" : text.parse_datetime(extr(
"<small>Added ", "<"), "%B %d, %Y"),
"views": text.parse_int(extr('glyphicon-eye-open"></span>', '<')),
"id" : self.video_id,
"filename" : self.video_id,
"extension": "webm",
}
if data["title"] == "webmshare":
data["title"] = ""
yield Message.Directory, data
yield Message.Url, data["url"], data

@ -114,6 +114,7 @@ CATEGORY_MAP = {
"vk" : "VK",
"vsco" : "VSCO",
"wallpapercave" : "Wallpaper Cave",
"webmshare" : "webmshare",
"webtoons" : "Webtoon",
"wikiart" : "WikiArt.org",
"xbunkr" : "xBunkr",

Loading…
Cancel
Save