- rewrite using BaseExtractor - support most Wiki* domains - update docs/supportedsites - add testspull/2340/head
parent
221f54309c
commit
c3c1635ef3
@ -1,172 +1,144 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2022-2022 Ailothaen
|
# Copyright 2022 Ailothaen
|
||||||
|
# Copyright 2024 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
"""Extractors for Wikimedia and Wikipedia.
|
"""Extractors for Wikimedia and Wikipedia"""
|
||||||
(Other Mediawiki instances use the same API,so a similar extractor
|
|
||||||
could be written)
|
|
||||||
|
|
||||||
Various reference:
|
from .common import BaseExtractor, Message
|
||||||
https://www.mediawiki.org/wiki/API:Query
|
from .. import text
|
||||||
https://opendata.stackexchange.com/questions/13381/wikimedia-commons-api-image-by-category
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .common import Extractor, Message
|
|
||||||
import time
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
class WikimediaExtractor(BaseExtractor):
|
||||||
class WikimediaArticleExtractor(Extractor):
|
"""Base class for wikimedia extractors"""
|
||||||
category = "wikimedia"
|
basecategory = "wikimedia"
|
||||||
subcategory = "article"
|
|
||||||
filename_fmt = "{filename}.{extension}"
|
|
||||||
archive_fmt = "a_{sha1}"
|
|
||||||
pattern = r"https?://([a-z]{2,})\.wikipedia\.org/wiki/([^#/\?]+)"
|
|
||||||
directory_fmt = ("{category}", "{page}")
|
directory_fmt = ("{category}", "{page}")
|
||||||
test = (
|
archive_fmt = "{sha1}"
|
||||||
("https://en.wikipedia.org/wiki/Athena"),
|
request_interval = (1.0, 2.0)
|
||||||
("https://zh.wikipedia.org/wiki/太阳"),
|
|
||||||
("https://simple.wikipedia.org/wiki/Hydrogen", {
|
|
||||||
"count": ">= 2"
|
|
||||||
})
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.lang, self.page = match.groups()
|
self.title = match.group(match.lastindex)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
continuation = None
|
for info in self._pagination(self.params):
|
||||||
gimcontinuation = None
|
image = info["imageinfo"][0]
|
||||||
|
|
||||||
|
image["metadata"] = {
|
||||||
|
m["name"]: m["value"]
|
||||||
|
for m in image["metadata"]}
|
||||||
|
image["commonmetadata"] = {
|
||||||
|
m["name"]: m["value"]
|
||||||
|
for m in image["commonmetadata"]}
|
||||||
|
|
||||||
|
filename = image["canonicaltitle"]
|
||||||
|
image["filename"], _, image["extension"] = \
|
||||||
|
filename.partition(":")[2].rpartition(".")
|
||||||
|
image["date"] = text.parse_datetime(
|
||||||
|
image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
image["page"] = self.title
|
||||||
|
|
||||||
|
yield Message.Directory, image
|
||||||
|
yield Message.Url, image["url"], image
|
||||||
|
|
||||||
|
def _pagination(self, params):
|
||||||
|
"""
|
||||||
|
https://www.mediawiki.org/wiki/API:Query
|
||||||
|
https://opendata.stackexchange.com/questions/13381
|
||||||
|
"""
|
||||||
|
|
||||||
|
url = self.root + "/w/api.php"
|
||||||
|
params["action"] = "query"
|
||||||
|
params["format"] = "json"
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if continuation is None:
|
data = self.request(url, params=params).json()
|
||||||
file_list_request = self.request(
|
|
||||||
"https://{lang}.wikipedia.org/w/api.php?action=query&generator=images&format=json&titles={page}&prop=imageinfo&iiprop=timestamp|user|userid|comment|canonicaltitle|url|size|sha1|mime|metadata|commonmetadata|extmetadata|bitdepth".format( # noqa
|
|
||||||
lang=self.lang, page=self.page
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
file_list_request = self.request(
|
|
||||||
"https://{lang}.wikipedia.org/w/api.php?action=query&generator=images&format=json&titles={page}&prop=imageinfo&iiprop=timestamp|user|userid|comment|canonicaltitle|url|size|sha1|mime|metadata|commonmetadata|extmetadata|bitdepth&continue={continuation}&gimcontinue={gimcontinuation}".format( # noqa
|
|
||||||
lang=self.lang,
|
|
||||||
page=self.page,
|
|
||||||
continuation=continuation,
|
|
||||||
gimcontinuation=gimcontinuation,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
file_list = file_list_request.json()
|
|
||||||
|
|
||||||
for file_index in list(file_list["query"]["pages"]):
|
|
||||||
image = file_list["query"]["pages"][file_index]["imageinfo"][0]
|
|
||||||
|
|
||||||
metadata = image
|
|
||||||
metadata["filename"] = WikimediaUtils.clean_name(
|
|
||||||
image["canonicaltitle"]
|
|
||||||
)[0]
|
|
||||||
metadata["extension"] = WikimediaUtils.clean_name(
|
|
||||||
image["canonicaltitle"]
|
|
||||||
)[1]
|
|
||||||
|
|
||||||
yield Message.Directory, {"page": self.page, "lang": self.lang}
|
|
||||||
yield Message.Url, image["url"], image
|
|
||||||
else:
|
|
||||||
# We arrived at the end of the response
|
|
||||||
# checking if there are more files to retrieve
|
|
||||||
try:
|
|
||||||
continuation_info = file_list["continue"]
|
|
||||||
except KeyError:
|
|
||||||
# No more continuation info: all files were retrieved
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# Continuation info is present
|
|
||||||
# there are still files to retrieve
|
|
||||||
continuation = continuation_info["continue"]
|
|
||||||
gimcontinuation = continuation_info["gimcontinue"]
|
|
||||||
|
|
||||||
# giving a rest to Wikipedia API
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
|
|
||||||
class WikimediaCategoryExtractor(Extractor):
|
|
||||||
category = "wikimedia"
|
|
||||||
subcategory = "category"
|
|
||||||
filename_fmt = "{filename}.{extension}"
|
|
||||||
archive_fmt = "c_{sha1}"
|
|
||||||
pattern = r"https?://commons.wikimedia.org/wiki/Category:([^#/\?]+)"
|
|
||||||
directory_fmt = ("{category}", "{page}")
|
|
||||||
|
|
||||||
test = (
|
try:
|
||||||
("https://commons.wikimedia.org/wiki/Category:Network_maps_of_the_Paris_Metro"), # noqa
|
pages = data["query"]["pages"]
|
||||||
("https://commons.wikimedia.org/wiki/Category:Tyto_alba_in_flight_(captive)", { # noqa
|
except KeyError:
|
||||||
"count": ">= 21"
|
pass
|
||||||
})
|
else:
|
||||||
)
|
yield from pages.values()
|
||||||
|
|
||||||
|
try:
|
||||||
|
continuation = data["continue"]
|
||||||
|
except KeyError:
|
||||||
|
break
|
||||||
|
params.update(continuation)
|
||||||
|
|
||||||
|
|
||||||
|
BASE_PATTERN = WikimediaExtractor.update({
|
||||||
|
"wikipedia": {
|
||||||
|
"root": None,
|
||||||
|
"pattern": r"[a-z]{2,}\.wikipedia\.org",
|
||||||
|
},
|
||||||
|
"wiktionary": {
|
||||||
|
"root": None,
|
||||||
|
"pattern": r"[a-z]{2,}\.wiktionary\.org",
|
||||||
|
},
|
||||||
|
"wikiquote": {
|
||||||
|
"root": None,
|
||||||
|
"pattern": r"[a-z]{2,}\.wikiquote\.org",
|
||||||
|
},
|
||||||
|
"wikibooks": {
|
||||||
|
"root": None,
|
||||||
|
"pattern": r"[a-z]{2,}\.wikibooks\.org",
|
||||||
|
},
|
||||||
|
"wikisource": {
|
||||||
|
"root": None,
|
||||||
|
"pattern": r"[a-z]{2,}\.wikisource\.org",
|
||||||
|
},
|
||||||
|
"wikinews": {
|
||||||
|
"root": None,
|
||||||
|
"pattern": r"[a-z]{2,}\.wikinews\.org",
|
||||||
|
},
|
||||||
|
"wikiversity": {
|
||||||
|
"root": None,
|
||||||
|
"pattern": r"[a-z]{2,}\.wikiversity\.org",
|
||||||
|
},
|
||||||
|
"wikispecies": {
|
||||||
|
"root": "https://species.wikimedia.org",
|
||||||
|
"pattern": r"species\.wikimedia\.org",
|
||||||
|
},
|
||||||
|
"wikimediacommons": {
|
||||||
|
"root": "https://commons.wikimedia.org",
|
||||||
|
"pattern": r"commons\.wikimedia\.org",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class WikimediaArticleExtractor(WikimediaExtractor):
|
||||||
|
"""Extractor for wikimedia articles"""
|
||||||
|
subcategory = "article"
|
||||||
|
pattern = BASE_PATTERN + r"/wiki/(?!Category:)([^/?#]+)"
|
||||||
|
example = "https://en.wikipedia.org/wiki/TITLE"
|
||||||
|
|
||||||
def __init__(self, match):
|
def _init(self):
|
||||||
Extractor.__init__(self, match)
|
self.params = {
|
||||||
self.page = match.groups()[0]
|
"generator": "images",
|
||||||
|
"titles" : self.title,
|
||||||
|
"prop" : "imageinfo",
|
||||||
|
"iiprop": "timestamp|user|userid|comment|canonicaltitle|url|size|"
|
||||||
|
"sha1|mime|metadata|commonmetadata|extmetadata|bitdepth",
|
||||||
|
}
|
||||||
|
|
||||||
def items(self):
|
|
||||||
continuation = None
|
|
||||||
gcmcontinuation = None
|
|
||||||
|
|
||||||
while True:
|
class WikimediaCategoryExtractor(WikimediaExtractor):
|
||||||
if continuation is None:
|
subcategory = "category"
|
||||||
file_list_request = self.request(
|
pattern = BASE_PATTERN + r"/wiki/(Category:[^/?#]+)"
|
||||||
"https://commons.wikimedia.org/w/api.php?action=query&generator=categorymembers&gcmtitle=Category:{page}&gcmtype=file&prop=imageinfo&format=json&iiprop=timestamp|user|userid|comment|canonicaltitle|url|size|sha1|mime|metadata|commonmetadata|extmetadata|bitdepth".format( # noqa
|
example = "https://commons.wikimedia.org/wiki/Category:NAME"
|
||||||
page=self.page
|
|
||||||
)
|
def _init(self):
|
||||||
)
|
self.params = {
|
||||||
else:
|
"generator": "categorymembers",
|
||||||
file_list_request = self.request(
|
"gcmtitle" : self.title,
|
||||||
"https://commons.wikimedia.org/w/api.php?action=query&generator=categorymembers&gcmtitle=Category:{page}&gcmtype=file&prop=imageinfo&format=json&iiprop=timestamp|user|userid|comment|canonicaltitle|url|size|sha1|mime|metadata|commonmetadata|extmetadata|bitdepth&continue={continuation}&gcmcontinue={gcmcontinuation}".format( # noqa
|
"gcmtype" : "file",
|
||||||
page=self.page,
|
"prop" : "imageinfo",
|
||||||
continuation=continuation,
|
"iiprop": "timestamp|user|userid|comment|canonicaltitle|url|size|"
|
||||||
gcmcontinuation=gcmcontinuation,
|
"sha1|mime|metadata|commonmetadata|extmetadata|bitdepth",
|
||||||
)
|
}
|
||||||
)
|
|
||||||
file_list = file_list_request.json()
|
|
||||||
|
|
||||||
for file_index in list(file_list["query"]["pages"]):
|
|
||||||
image = file_list["query"]["pages"][file_index]["imageinfo"][0]
|
|
||||||
|
|
||||||
metadata = image
|
|
||||||
metadata["filename"] = WikimediaUtils.clean_name(
|
|
||||||
image["canonicaltitle"]
|
|
||||||
)[0]
|
|
||||||
metadata["extension"] = WikimediaUtils.clean_name(
|
|
||||||
image["canonicaltitle"]
|
|
||||||
)[1]
|
|
||||||
|
|
||||||
yield Message.Directory, {"page": self.page, "lang": "common"}
|
|
||||||
yield Message.Url, image["url"], image
|
|
||||||
else:
|
|
||||||
# We arrived at the end of the response
|
|
||||||
# checking if there are more files to retrieve
|
|
||||||
try:
|
|
||||||
continuation_info = file_list["continue"]
|
|
||||||
except KeyError:
|
|
||||||
# No more continuation info: all files were retrieved
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# Continuation info is present
|
|
||||||
# there are still files to retrieve
|
|
||||||
continuation = continuation_info["continue"]
|
|
||||||
gcmcontinuation = continuation_info["gcmcontinue"]
|
|
||||||
|
|
||||||
# giving a rest to Wikipedia API
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
|
|
||||||
class WikimediaUtils:
|
|
||||||
@staticmethod
|
|
||||||
def clean_name(name):
|
|
||||||
name = re.sub(r"^\w+:", "", name)
|
|
||||||
filename = ".".join(name.split(".")[:-1])
|
|
||||||
extension = name.split(".")[-1]
|
|
||||||
return filename, extension
|
|
||||||
|
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://www.wikibooks.org/wiki/Title",
|
||||||
|
"#category": ("wikimedia", "wikibooks", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://en.wikibooks.org/wiki/Category:Title",
|
||||||
|
"#category": ("wikimedia", "wikibooks", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://commons.wikimedia.org/wiki/File:Starr-050516-1367-Pimenta_dioica-flowers-Maunaloa-Molokai_(24762757525).jpg",
|
||||||
|
"#category": ("wikimedia", "wikimediacommons", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://commons.wikimedia.org/wiki/Category:Network_maps_of_the_Paris_Metro",
|
||||||
|
"#category": ("wikimedia", "wikimediacommons", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://www.wikinews.org/wiki/Title",
|
||||||
|
"#category": ("wikimedia", "wikinews", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://en.wikinews.org/wiki/Category:Title",
|
||||||
|
"#category": ("wikimedia", "wikinews", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,53 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://www.wikipedia.org/wiki/Title",
|
||||||
|
"#category": ("wikimedia", "wikipedia", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://en.wikipedia.org/wiki/Athena",
|
||||||
|
"#category": ("wikimedia", "wikipedia", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
"#pattern" : r"https://upload.wikimedia.org/wikipedia/.+",
|
||||||
|
"#count" : range(50, 100),
|
||||||
|
|
||||||
|
"bitdepth" : int,
|
||||||
|
"canonicaltitle": str,
|
||||||
|
"comment" : str,
|
||||||
|
"commonmetadata": dict,
|
||||||
|
"date" : "type:datetime",
|
||||||
|
"descriptionshorturl": str,
|
||||||
|
"descriptionurl": str,
|
||||||
|
"extension" : str,
|
||||||
|
"extmetadata" : dict,
|
||||||
|
"filename" : str,
|
||||||
|
"height" : int,
|
||||||
|
"metadata" : dict,
|
||||||
|
"mime" : r"re:image/\w+",
|
||||||
|
"page" : "Athena",
|
||||||
|
"sha1" : r"re:^[0-9a-f]{40}$",
|
||||||
|
"size" : int,
|
||||||
|
"timestamp" : str,
|
||||||
|
"url" : str,
|
||||||
|
"user" : str,
|
||||||
|
"userid" : int,
|
||||||
|
"width" : int,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://en.wikipedia.org/wiki/Category:Physics",
|
||||||
|
"#category": ("wikimedia", "wikipedia", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://www.wikiquote.org/wiki/Title",
|
||||||
|
"#category": ("wikimedia", "wikiquote", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://en.wikiquote.org/wiki/Category:Title",
|
||||||
|
"#category": ("wikimedia", "wikiquote", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://www.wikisource.org/wiki/Title",
|
||||||
|
"#category": ("wikimedia", "wikisource", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://en.wikisource.org/wiki/Category:Title",
|
||||||
|
"#category": ("wikimedia", "wikisource", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,25 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://species.wikimedia.org/wiki/Geranospiza",
|
||||||
|
"#category": ("wikimedia", "wikispecies", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
"#urls" : "https://upload.wikimedia.org/wikipedia/commons/0/01/Geranospiza_caerulescens.jpg",
|
||||||
|
"#sha1_content": "3a17c14b15489928e4154f826af1c42afb5a523e",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://species.wikimedia.org/wiki/Category:Names",
|
||||||
|
"#category": ("wikimedia", "wikispecies", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://www.wikiversity.org/wiki/Title",
|
||||||
|
"#category": ("wikimedia", "wikiversity", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://en.wikiversity.org/wiki/Category:Title",
|
||||||
|
"#category": ("wikimedia", "wikiversity", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import wikimedia
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://www.wiktionary.org/wiki/Word",
|
||||||
|
"#category": ("wikimedia", "wiktionary", "article"),
|
||||||
|
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://en.wiktionary.org/wiki/Category:Words",
|
||||||
|
"#category": ("wikimedia", "wiktionary", "category"),
|
||||||
|
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
Loading…
Reference in new issue