rewrite URL patterns to use only 1 per extractor

pull/170/head
Mike Fährmann 6 years ago
parent 0e46db6f45
commit 34bab080ae
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -24,7 +24,7 @@ class ArtstationExtractor(Extractor):
def __init__(self, match=None):
Extractor.__init__(self)
self.user = match.group(1) if match else None
self.user = match.group(1) or match.group(2) if match else None
self.external = self.config("external", False)
def items(self):
@ -123,10 +123,9 @@ class ArtstationExtractor(Extractor):
class ArtstationUserExtractor(ArtstationExtractor):
"""Extractor for all projects of an artstation user"""
subcategory = "user"
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?&#]+)(?:/albums/all)?/?$",
r"(?:https?://)?((?!www)\w+)\.artstation\.com"
r"(?:/(?:projects/?)?)?$"]
pattern = [r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?&#]+)(?:/albums/all)?"
r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$"]
test = [
("https://www.artstation.com/gaerikim/", {
"pattern": r"https://\w+\.artstation\.com/p/assets"
@ -149,10 +148,9 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
directory_fmt = ["{category}", "{userinfo[username]}", "Albums",
"{album[id]} - {album[title]}"]
archive_fmt = "a_{album[id]}_{asset[id]}"
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?&#]+)/albums/(\d+)",
r"(?:https?://)?((?!www)\w+)\.artstation\.com"
r"/albums/(\d+)"]
pattern = [r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?&#]+)"
r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)"]
test = [
("https://www.artstation.com/huimeiye/albums/770899", {
"count": 2,
@ -165,7 +163,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
self.album_id = text.parse_int(match.group(2))
self.album_id = text.parse_int(match.group(3))
def metadata(self):
userinfo = self.get_user_info(self.user)

@ -21,10 +21,8 @@ class E621Extractor(booru.MoebooruPageMixin, booru.BooruExtractor):
class E621TagExtractor(booru.TagMixin, E621Extractor):
"""Extractor for images from e621.net based on search-tags"""
pattern = [
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/(?P<tags>[^/?&#]+)",
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=(?P<tags>[^&#]+)",
]
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post"
r"(?:/index/\d+/|\?tags=)(?P<tags>[^/?&#]+)"]
test = [
("https://e621.net/post/index/1/anry", {
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",

@ -45,9 +45,10 @@ class FlickrImageExtractor(FlickrExtractor):
"""Extractor for individual images from flickr.com"""
subcategory = "image"
archive_fmt = "{id}"
pattern = [r"(?:https?://)?(?:www\.|m\.)?flickr\.com/photos/[^/]+/(\d+)",
r"(?:https?://)?[^.]+\.static\.?flickr\.com/(?:\d+/)+(\d+)_",
r"(?:https?://)?flic\.kr/(p)/([A-Za-z1-9]+)"]
pattern = [r"(?:https?://)?(?:"
r"(?:(?:www\.|m\.)?flickr\.com/photos/[^/]+/"
r"|[^.]+\.static\.?flickr\.com/(?:\d+/)+)(\d+)"
r"|flic\.kr/p/([A-Za-z1-9]+))"]
test = [
("https://www.flickr.com/photos/departingyyz/16089302239", {
"url": "7f0887f5953f61c8b79a695cb102ea309c0346b0",
@ -66,7 +67,7 @@ class FlickrImageExtractor(FlickrExtractor):
def __init__(self, match):
FlickrExtractor.__init__(self, match)
if self.item_id == "p":
if not self.item_id:
alphabet = ("123456789abcdefghijkmnopqrstu"
"vwxyzABCDEFGHJKLMNPQRSTUVWXYZ")
self.item_id = util.bdecode(match.group(2), alphabet)

@ -133,10 +133,9 @@ class ImagefapUserExtractor(ImagefapExtractor):
"""Extractor for all galleries from a user at imagefap.com"""
subcategory = "user"
categorytransfer = True
pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com"
r"/profile(?:\.php\?user=|/)([^/?&#]+)"),
(r"(?:https?://)?(?:www\.)?imagefap\.com"
r"/usergallery\.php\?userid=(\d+)")]
pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/"
r"(?:profile(?:\.php\?user=|/)([^/?&#]+)"
r"|usergallery\.php\?userid=(\d+))"]
test = [
("https://www.imagefap.com/profile/LucyRae/galleries", {
"url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd",
@ -149,12 +148,7 @@ class ImagefapUserExtractor(ImagefapExtractor):
def __init__(self, match):
ImagefapExtractor.__init__(self)
try:
self.user_id = int(match.group(1))
self.user = None
except ValueError:
self.user_id = None
self.user = match.group(1)
self.user, self.user_id = match.groups()
def items(self):
yield Message.Version, 1

@ -70,9 +70,8 @@ class ImagehostImageExtractor(SharedConfigMixin, Extractor):
class ImxtoImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imx.to"""
category = "imxto"
pattern = [r"(?:https?://)?(?:www\.)?(imx\.to/i/(\w+))",
r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
r"/img-([a-z0-9]+)\.html)"]
pattern = [r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
r"/(?:i/|img-)(\w+)(\.html)?)"]
test = (
("https://imx.to/i/1qdeva", { # new-style URL
"url": "ab2173088a6cdef631d7a47dec4a5da1c6a00130",

@ -53,9 +53,8 @@ class ImgurImageExtractor(ImgurExtractor):
subcategory = "image"
filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
archive_fmt = "{hash}"
pattern = [(r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/(?!gallery)(\w{7}|\w{5})"),
(r"(?:https?://)?i\.imgur\.com/(\w{7}|\w{5})[sbtmlh]?\.")]
pattern = [r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com"
r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?"]
test = [
("https://imgur.com/21yMxCS", {
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",

@ -28,11 +28,7 @@ class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
"""Extractor for manga-chapters from mangapanda.com"""
pattern = [
(r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"),
(r"(?:https?://)?(?:www\.)?mangapanda\.com"
r"(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+)\.html)"),
]
pattern = [r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"]
test = [("https://www.mangapanda.com/red-storm/2", {
"url": "1f633f776e950531ba9b1e81965316458e785261",
"keyword": "32b5e84017c2bf5f122b339ecf40899e41f18cc9",

@ -60,11 +60,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
"""Extractor for manga-chapters from mangareader.net"""
archive_fmt = "{manga}_{chapter}_{page}"
pattern = [
(r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"),
(r"(?:https?://)?(?:www\.)?mangareader\.net"
r"(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+)\.html)"),
]
pattern = [r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"]
test = [(("https://www.mangareader.net/"
"karate-shoukoushi-kohinata-minoru/11"), {
"url": "061cc92a07edf17bb991ce0821fa4c77a147a860",

@ -82,10 +82,9 @@ class PixivExtractor(Extractor):
class PixivUserExtractor(PixivExtractor):
"""Extractor for works of a pixiv-user"""
subcategory = "user"
pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"),
(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+)()")]
pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/"
r"(?:member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))"]
test = [
("http://www.pixiv.net/member_illust.php?id=173530", {
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
@ -107,7 +106,7 @@ class PixivUserExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self)
self.user_id = match.group(1)
self.user_id = match.group(1) or match.group(3)
self.query = text.parse_query(match.group(2))
def works(self):
@ -153,13 +152,11 @@ class PixivMeExtractor(PixivExtractor):
class PixivWorkExtractor(PixivExtractor):
"""Extractor for a single pixiv work/illustration"""
subcategory = "work"
pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/member(?:_illust)?\.php\?(?:[^&]+&)*illust_id=(\d+)"),
(r"(?:https?://)?i(?:\d+\.pixiv|\.pximg)\.net"
r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}"
r"|img\d+/img/[^/]+)/(\d+)"),
(r"(?:https?://)?img\d*\.pixiv\.net/img/[^/]+/(\d+)"),
(r"(?:https?://)?(?:www\.)?pixiv\.net/i/(\d+)")]
pattern = [r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
r"/member(?:_illust)?\.php\?(?:[^&]+&)*illust_id=(\d+)"
r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)"
r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))"]
test = [
(("http://www.pixiv.net/member_illust.php"
"?mode=medium&illust_id=966412"), {
@ -187,7 +184,7 @@ class PixivWorkExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self)
self.illust_id = match.group(1)
self.illust_id = match.group(1) or match.group(2)
self.load_ugoira = True
self.work = None

@ -166,10 +166,9 @@ class SeigaImageExtractor(SeigaExtractor):
"""Extractor for single images from seiga.nicovideo.jp"""
subcategory = "image"
filename_fmt = "{category}_{image_id}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
r"(?:seiga/im|image/source/)(\d+)"),
(r"(?:https?://)?lohas\.nicoseiga\.jp/"
r"(?:priv|o)/[^/]+/\d+/(\d+)")]
pattern = [r"(?:https?://)?(?:"
r"(?:www\.|seiga\.)?nicovideo\.jp/(?:seiga/im|image/source/)"
r"|lohas\.nicoseiga\.jp/(?:priv|o)/[^/]+/\d+/)(\d+)"]
test = [
("http://seiga.nicovideo.jp/seiga/im5977527", {
"keyword": "f66ba5de33d4ce2cb57f23bb37e1e847e0771c10",

Loading…
Cancel
Save