[wikimedia] add 'wiki' extractor

pull/6050/head
ClosedPort22 1 month ago
parent a3b2c88fbe
commit 33d2ddd9fb
No known key found for this signature in database

@ -193,3 +193,19 @@ class WikimediaArticleExtractor(WikimediaExtractor):
def prepare(self, image): def prepare(self, image):
WikimediaExtractor.prepare(image) WikimediaExtractor.prepare(image)
image["page"] = self.title image["page"] = self.title
class WikimediaWikiExtractor(WikimediaExtractor):
"""Extractor for all files on a MediaWiki instance"""
subcategory = "wiki"
pattern = BASE_PATTERN + r"/?$"
example = "https://en.wikipedia.org/"
def __init__(self, match):
WikimediaExtractor.__init__(self, match)
# ref: https://www.mediawiki.org/wiki/API:Allpages
self.params = {
"generator" : "allpages",
"gapnamespace": 6, # "File" namespace
}

@ -98,4 +98,12 @@ __tests__ = (
"#class" : wikimedia.WikimediaArticleExtractor, "#class" : wikimedia.WikimediaArticleExtractor,
}, },
{
"#url" : "https://youtube.fandom.com",
"#category": ("wikimedia", "fandom-youtube", "wiki"),
"#class" : wikimedia.WikimediaWikiExtractor,
"#range" : "1-20",
"#count" : 20,
},
) )

@ -50,4 +50,12 @@ __tests__ = (
"#class" : wikimedia.WikimediaArticleExtractor, "#class" : wikimedia.WikimediaArticleExtractor,
}, },
{
"#url" : "https://en.wikipedia.org",
"#category": ("wikimedia", "wikipedia", "wiki"),
"#class" : wikimedia.WikimediaWikiExtractor,
"#range" : "1-10",
"#count" : 10,
},
) )

Loading…
Cancel
Save