You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gallery-dl/gallery_dl/extractor/soundgasm.py

66 lines
2.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
# Copyright 2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://soundgasm.net/"""
from .common import Extractor, Message
from .. import text
class SoundgasmAudioExtractor(Extractor):
"""Extractor for audio clips from soundgasm.net"""
category = "soundgasm"
root = "https://soundgasm.net"
directory_fmt = ("{category}", "{user}")
filename_fmt = "{title}.{extension}"
archive_fmt = "{user}_{filename}"
pattern = r"(?:https?://)?(?:www\.)?soundgasm\.net/u/([^/?#]+)/([^/?#]+)"
test = (
(("https://soundgasm.net/u/ClassWarAndPuppies2"
"/687-Otto-von-Toontown-12822"), {
"pattern": r"https://media\.soundgasm\.net/sounds"
r"/26cb2b23b2f2c6094b40ee3a9167271e274b570a\.m4a",
"keyword": {
"description": "We celebrate todays important prisoner swap, "
"and finally bring the 2022 mid-terms to a clos"
"e with Raphael Warnocks defeat of Herschel Wa"
"lker in Georgia. Then, we take a look at the Q"
"anon-addled attempt to overthrow the German go"
"vernment and install Heinrich XIII Prince of R"
"euss as kaiser.",
"extension": "m4a",
"filename": "26cb2b23b2f2c6094b40ee3a9167271e274b570a",
"slug": "687-Otto-von-Toontown-12822",
"title": "687 - Otto von Toontown (12/8/22)",
"user": "ClassWarAndPuppies2",
},
}),
)
def __init__(self, match):
Extractor.__init__(self, match)
self.user, self.slug = match.groups()
def items(self):
url = "{}/u/{}/{}".format(self.root, self.user, self.slug)
extr = text.extract_from(self.request(url).text)
data = {
"user" : self.user,
"slug" : self.slug,
"title": text.unescape(extr('aria-label="title">', "<")),
"description": text.unescape(text.remove_html(extr(
'class="jp-description">', '</div>'))),
}
formats = extr('"setMedia", {', '}')
url = text.extr(formats, ': "', '"')
yield Message.Directory, data
yield Message.Url, url, text.nameext_from_url(url, data)