You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gallery-dl/gallery_dl/extractor/uploadir.py

91 lines
2.9 KiB

# -*- coding: utf-8 -*-
# Copyright 2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://uploadir.com/"""
from .common import Extractor, Message
from .. import text
class UploadirFileExtractor(Extractor):
"""Extractor for uploadir files"""
category = "uploadir"
subcategory = "file"
root = "https://uploadir.com"
filename_fmt = "{filename} ({id}).{extension}"
archive_fmt = "{id}"
pattern = r"(?:https?://)?uploadir\.com/(?:user/)?u(?:ploads)?/([^/?#]+)"
test = (
# image
("https://uploadir.com/u/rd3t46ry", {
"pattern": r"https://uploadir\.com/u/rd3t46ry",
"count": 1,
"keyword": {
"extension": "jpg",
"filename": "Chloe and Rachel 4K jpg",
"id": "rd3t46ry",
},
}),
# archive
("https://uploadir.com/uploads/gxe8ti9v/downloads/new", {
"pattern": r"https://uploadir\.com/uploads/gxe8ti9v/downloads",
"count": 1,
"keyword": {
"extension": "zip",
"filename": "NYAN-Mods-Pack#1",
"id": "gxe8ti9v",
},
}),
# utf-8 filename
("https://uploadir.com/u/fllda6xl", {
"pattern": r"https://uploadir\.com/u/fllda6xl",
"count": 1,
"keyword": {
"extension": "png",
"filename": "_圖片_🖼_image_",
"id": "fllda6xl",
},
}),
("https://uploadir.com/uploads/rd3t46ry"),
("https://uploadir.com/user/uploads/rd3t46ry"),
)
def __init__(self, match):
Extractor.__init__(self, match)
self.file_id = match.group(1)
def items(self):
url = "{}/u/{}".format(self.root, self.file_id)
response = self.request(url, method="HEAD", allow_redirects=False)
if 300 <= response.status_code < 400:
url = response.headers["Location"]
extr = text.extract_from(self.request(url).text)
name = text.unescape(extr("<h2>", "</h2>").strip())
url = self.root + extr('class="form" action="', '"')
token = extr('name="authenticity_token" value="', '"')
data = text.nameext_from_url(name, {
"_http_method": "POST",
"_http_data" : {
"authenticity_token": token,
"upload_id": self.file_id,
},
})
else:
hcd = response.headers.get("Content-Disposition")
name = (hcd.partition("filename*=UTF-8''")[2] or
text.extr(hcd, 'filename="', '"'))
data = text.nameext_from_url(name)
data["id"] = self.file_id
yield Message.Directory, data
yield Message.Url, url, data