[cien] add 'files' option (#2885)

pull/5951/head
Mike Fährmann 2 months ago
parent f68b0efc1b
commit f87783ff26
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1448,6 +1448,22 @@ Description
Process reposts. Process reposts.
extractor.cien.files
--------------------
Type
``list`` of ``strings``
Default
``["image", "video", "download", "gallery"]``
Description
Determines the type and order of files to be downloaded.
Available types are
``image``,
``video``,
``download``,
``gallery``.
extractor.cyberdrop.domain extractor.cyberdrop.domain
-------------------------- --------------------------
Type Type

@ -82,10 +82,24 @@ class CienArticleExtractor(CienExtractor):
def _extract_files(self, page): def _extract_files(self, page):
files = [] files = []
self._extract_files_image(page, files) filetypes = self.config("files")
self._extract_files_video(page, files) if filetypes is None:
self._extract_files_attachment(page, files) self._extract_files_image(page, files)
self._extract_files_gallery(page, files) self._extract_files_video(page, files)
self._extract_files_download(page, files)
self._extract_files_gallery(page, files)
else:
generators = {
"image" : self._extract_files_image,
"video" : self._extract_files_video,
"download": self._extract_files_download,
"gallery" : self._extract_files_gallery,
"gallerie": self._extract_files_gallery,
}
if isinstance(filetypes, str):
filetypes = filetypes.split(",")
for ft in filetypes:
generators[ft.rstrip("s")](page, files)
return files return files
@ -114,14 +128,14 @@ class CienArticleExtractor(CienExtractor):
file["type"] = "video" file["type"] = "video"
files.append(file) files.append(file)
def _extract_files_attachment(self, page, files): def _extract_files_download(self, page, files):
for download in text.extract_iter( for download in text.extract_iter(
page, 'class="downloadBlock', "</div>"): page, 'class="downloadBlock', "</div>"):
name = text.extr(download, "<p>", "<") name = text.extr(download, "<p>", "<")
file = text.nameext_from_url(name.rpartition(" ")[0]) file = text.nameext_from_url(name.rpartition(" ")[0])
file["url"] = text.extr(download, ' href="', '"') file["url"] = text.extr(download, ' href="', '"')
file["type"] = "attachment" file["type"] = "download"
files.append(file) files.append(file)
def _extract_files_gallery(self, page, files): def _extract_files_gallery(self, page, files):

@ -50,12 +50,12 @@ __tests__ = (
"#url" : "https://ci-en.dlsite.com/creator/25509/article/1172460", "#url" : "https://ci-en.dlsite.com/creator/25509/article/1172460",
"#category": ("", "cien", "article"), "#category": ("", "cien", "article"),
"#class" : cien.CienArticleExtractor, "#class" : cien.CienArticleExtractor,
"#range" : "3", "#options" : {"files": "download"},
"#pattern" : r"https://media\.ci-en\.jp/private/attachment/creator/00025509/7fd3c039d2277ba9541e82592aca6f6751f6c268404038ccbf1112bcf2f93357/upload/.+\.zip\?px-time=.+", "#pattern" : r"https://media\.ci-en\.jp/private/attachment/creator/00025509/7fd3c039d2277ba9541e82592aca6f6751f6c268404038ccbf1112bcf2f93357/upload/.+\.zip\?px-time=.+",
"filename" : "VP 1.05.4 Tim-v9 ENG rec v3", "filename" : "VP 1.05.4 Tim-v9 ENG rec v3",
"extension": "zip", "extension": "zip",
"type" : "attachment", "type" : "download",
}, },
{ {

Loading…
Cancel
Save