simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable
pull/170/head
Mike Fährmann 6 years ago
parent 34bab080ae
commit 6284731107
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2018 Mike Fährmann
# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,15 +16,15 @@ class FutabaThreadExtractor(Extractor):
"""Extractor for images from threads on www.2chan.net"""
category = "2chan"
subcategory = "thread"
directory_fmt = ["{category}", "{board_name}", "{thread}"]
directory_fmt = ("{category}", "{board_name}", "{thread}")
filename_fmt = "{tim}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
urlfmt = "https://{server}.2chan.net/{board}/src/{filename}"
pattern = [r"(?:https?://)?(([^.]+)\.2chan\.net/([^/]+)/res/(\d+))"]
test = [("http://dec.2chan.net/70/res/947.htm", {
pattern = r"(?:https?://)?(([^.]+)\.2chan\.net/([^/]+)/res/(\d+))"
test = ("http://dec.2chan.net/70/res/947.htm", {
"url": "c5c12b80b290e224b6758507b3bb952044f4595b",
"keyword": "4bd22e7a9c3636faecd6ea7082509e8655e10dd0",
})]
})
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -29,29 +29,29 @@ class ThreedeebooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
class ThreedeebooruTagExtractor(booru.TagMixin,
ThreedeebooruExtractor):
"""Extractor for images from behoimi.org based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post"
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)"]
test = [("http://behoimi.org/post?tags=himekawa_azuru+dress", {
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org/post"
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)")
test = ("http://behoimi.org/post?tags=himekawa_azuru+dress", {
"url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1",
"content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
})]
})
class ThreedeebooruPoolExtractor(booru.PoolMixin,
ThreedeebooruExtractor):
"""Extractor for image-pools from behoimi.org"""
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"]
test = [("http://behoimi.org/pool/show/27", {
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"
test = ("http://behoimi.org/pool/show/27", {
"url": "da75d2d1475449d5ef0c266cb612683b110a30f2",
"content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
})]
})
class ThreedeebooruPostExtractor(booru.PostMixin,
ThreedeebooruExtractor):
"""Extractor for single images from behoimi.org"""
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"]
test = [("http://behoimi.org/post/show/140852", {
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"
test = ("http://behoimi.org/post/show/140852", {
"url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6",
"content": "26549d55b82aa9a6c1686b96af8bfcfa50805cd4",
"options": (("tags", True),),
@ -61,19 +61,19 @@ class ThreedeebooruPostExtractor(booru.PostMixin,
"tags_model": "himekawa_azuru",
"tags_general": str,
},
})]
})
class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin,
ThreedeebooruExtractor):
"""Extractor for popular images from behoimi.org"""
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org"
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?"]
test = [("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
r"(?:\?(?P<query>[^#]*))?")
test = ("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
"url": "a447e115fdab60c25ab71c4fdb1b9f509bc23f99",
"count": 20,
})]
})
def __init__(self, match):
super().__init__(match)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2017 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -15,9 +15,9 @@ from .. import text
class FourchanThreadExtractor(chan.ChanThreadExtractor):
"""Extractor for images from threads from 4chan.org"""
category = "4chan"
pattern = [r"(?:https?://)?boards\.4chan(?:nel)?\.org"
r"/([^/]+)/thread/(\d+)"]
test = [
pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org"
r"/([^/]+)/thread/(\d+)")
test = (
("https://boards.4chan.org/tg/thread/15396072/", {
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
@ -27,7 +27,7 @@ class FourchanThreadExtractor(chan.ChanThreadExtractor):
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
}),
]
)
api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
file_url = "https://i.4cdn.org/{board}/{tim}{ext}"

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2018 Mike Fährmann
# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -15,11 +15,11 @@ class InfinitychanThreadExtractor(chan.ChanThreadExtractor):
"""Extractor for images from threads from 8ch.net"""
category = "8chan"
filename_fmt = "{time}-{filename}{ext}"
pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"]
test = [("https://8ch.net/builders/res/3.html", {
pattern = r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"
test = ("https://8ch.net/builders/res/3.html", {
"url": "5d85c0509f907f217aea379f862b41bf3d01f645",
"keyword": "0c497190c0c0f826925fde09815351d01869c783",
})]
})
api_url = "https://8ch.net/{board}/res/{thread}.json"
file_url = "https://media.8ch.net/{board}/src/{tim}{ext}"
file_url_v2 = "https://media.8ch.net/file_store/{tim}{ext}"

@ -103,16 +103,14 @@ def find(url):
def add(klass):
"""Add 'klass' to the list of available extractors"""
for pattern in klass.pattern:
_cache.append((re.compile(pattern), klass))
_cache.append((re.compile(klass.pattern), klass))
def add_module(module):
"""Add all extractors in 'module' to the list of available extractors"""
tuples = [
(re.compile(pattern), klass)
(re.compile(klass.pattern), klass)
for klass in _get_classes(module)
for pattern in klass.pattern
]
_cache.extend(tuples)
return tuples

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -18,7 +18,7 @@ class ArtstationExtractor(Extractor):
"""Base class for artstation extractors"""
category = "artstation"
filename_fmt = "{category}_{id}_{asset[id]}_{title}.{extension}"
directory_fmt = ["{category}", "{userinfo[username]}"]
directory_fmt = ("{category}", "{userinfo[username]}")
archive_fmt = "{asset[id]}"
root = "https://www.artstation.com"
@ -123,19 +123,19 @@ class ArtstationExtractor(Extractor):
class ArtstationUserExtractor(ArtstationExtractor):
"""Extractor for all projects of an artstation user"""
subcategory = "user"
pattern = [r"(?:https?://)?(?:(?:www\.)?artstation\.com"
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?&#]+)(?:/albums/all)?"
r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$"]
test = [
r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$")
test = (
("https://www.artstation.com/gaerikim/", {
"pattern": r"https://\w+\.artstation\.com/p/assets"
r"/images/images/\d+/\d+/\d+/large/[^/]+",
"count": ">= 6",
}),
("https://www.artstation.com/gaerikim/albums/all/", None),
("https://gaerikim.artstation.com/", None),
("https://gaerikim.artstation.com/projects/", None),
]
("https://www.artstation.com/gaerikim/albums/all/"),
("https://gaerikim.artstation.com/"),
("https://gaerikim.artstation.com/projects/"),
)
def projects(self):
url = "{}/users/{}/projects.json".format(self.root, self.user)
@ -145,21 +145,21 @@ class ArtstationUserExtractor(ArtstationExtractor):
class ArtstationAlbumExtractor(ArtstationExtractor):
"""Extractor for all projects in an artstation album"""
subcategory = "album"
directory_fmt = ["{category}", "{userinfo[username]}", "Albums",
"{album[id]} - {album[title]}"]
directory_fmt = ("{category}", "{userinfo[username]}", "Albums",
"{album[id]} - {album[title]}")
archive_fmt = "a_{album[id]}_{asset[id]}"
pattern = [r"(?:https?://)?(?:(?:www\.)?artstation\.com"
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?&#]+)"
r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)"]
test = [
r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)")
test = (
("https://www.artstation.com/huimeiye/albums/770899", {
"count": 2,
}),
("https://www.artstation.com/huimeiye/albums/770898", {
"exception": exception.NotFoundError,
}),
("https://huimeiye.artstation.com/albums/770899", None),
]
("https://huimeiye.artstation.com/albums/770899"),
)
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@ -189,11 +189,11 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
class ArtstationLikesExtractor(ArtstationExtractor):
"""Extractor for liked projects of an artstation user"""
subcategory = "likes"
directory_fmt = ["{category}", "{userinfo[username]}", "Likes"]
directory_fmt = ("{category}", "{userinfo[username]}", "Likes")
archive_fmt = "f_{userinfo[id]}_{asset[id]}"
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?&#]+)/likes/?"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?&#]+)/likes/?")
test = (
("https://www.artstation.com/mikf/likes", {
"pattern": r"https://\w+\.artstation\.com/p/assets"
r"/images/images/\d+/\d+/\d+/large/[^/]+",
@ -203,7 +203,7 @@ class ArtstationLikesExtractor(ArtstationExtractor):
("https://www.artstation.com/sungchoi/likes", {
"count": 0,
}),
]
)
def projects(self):
url = "{}/users/{}/likes.json".format(self.root, self.user)
@ -214,20 +214,20 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
"""Extractor for submissions of artstation challenges"""
subcategory = "challenge"
filename_fmt = "{submission_id}_{asset_id}_{name}.{extension}"
directory_fmt = ["{category}", "Challenges",
"{challenge[id]} - {challenge[title]}"]
directory_fmt = ("{category}", "Challenges",
"{challenge[id]} - {challenge[title]}")
archive_fmt = "c_{challenge[id]}_{asset_id}"
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
r"/contests/[^/?&#]+/challenges/(\d+)"
r"/?(?:\?sorting=([a-z]+))?"]
test = [
(("https://www.artstation.com/contests/thu-2017/challenges/20"), None),
r"/?(?:\?sorting=([a-z]+))?")
test = (
("https://www.artstation.com/contests/thu-2017/challenges/20"),
(("https://www.artstation.com/contests/beyond-human"
"/challenges/23?sorting=winners"), {
"range": "1-30",
"count": 30,
}),
]
)
def __init__(self, match):
ArtstationExtractor.__init__(self)
@ -270,13 +270,11 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
class ArtstationSearchExtractor(ArtstationExtractor):
"""Extractor for artstation search results"""
subcategory = "search"
directory_fmt = ["{category}", "Searches", "{search[searchterm]}"]
directory_fmt = ("{category}", "Searches", "{search[searchterm]}")
archive_fmt = "s_{search[searchterm]}_{asset[id]}"
pattern = [r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/search/?\?([^#]+)"]
test = [
("https://www.artstation.com/search?sorting=recent&q=ancient", None),
]
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/search/?\?([^#]+)")
test = ("https://www.artstation.com/search?sorting=recent&q=ancient",)
def __init__(self, match):
ArtstationExtractor.__init__(self)
@ -305,9 +303,9 @@ class ArtstationSearchExtractor(ArtstationExtractor):
class ArtstationImageExtractor(ArtstationExtractor):
"""Extractor for images from a single artstation project"""
subcategory = "image"
pattern = [r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/(?:artwork|projects|search)/(\w+)"]
test = [
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/(?:artwork|projects|search)/(\w+)")
test = (
("https://www.artstation.com/artwork/LQVJr", {
"pattern": r"https?://\w+\.artstation\.com/p/assets"
r"/images/images/008/760/279/large/.+",
@ -326,8 +324,8 @@ class ArtstationImageExtractor(ArtstationExtractor):
"pattern": "ytdl:https://www.youtube.com/embed/JNFfJtwwrU0",
}),
# different URL pattern
("https://sungchoi.artstation.com/projects/LQVJr", None),
]
("https://sungchoi.artstation.com/projects/LQVJr"),
)
def __init__(self, match):
ArtstationExtractor.__init__(self)

@ -45,11 +45,11 @@ class BehanceExtractor(Extractor):
class BehanceGalleryExtractor(BehanceExtractor):
"""Extractor for image galleries from www.behance.net"""
subcategory = "gallery"
directory_fmt = ["{category}", "{owners:J, }", "{id} {name}"]
directory_fmt = ("{category}", "{owners:J, }", "{id} {name}")
filename_fmt = "{category}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
pattern = [r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"]
test = [
pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"
test = (
("https://www.behance.net/gallery/17386197/A-Short-Story", {
"count": 2,
"url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2",
@ -67,7 +67,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
"url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d",
"keyword": {"owners": ["Alex Strohl"]},
}),
]
)
def __init__(self, match):
BehanceExtractor.__init__(self)
@ -127,11 +127,11 @@ class BehanceUserExtractor(BehanceExtractor):
"""Extractor for a user's galleries from www.behance.net"""
subcategory = "user"
categorytransfer = True
pattern = [r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$"]
test = [("https://www.behance.net/alexstrohl", {
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$"
test = ("https://www.behance.net/alexstrohl", {
"count": ">= 8",
"pattern": BehanceGalleryExtractor.pattern[0],
})]
"pattern": BehanceGalleryExtractor.pattern,
})
def __init__(self, match):
BehanceExtractor.__init__(self)
@ -155,11 +155,11 @@ class BehanceCollectionExtractor(BehanceExtractor):
"""Extractor for a collection's galleries from www.behance.net"""
subcategory = "collection"
categorytransfer = True
pattern = [r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"]
test = [("https://www.behance.net/collection/170615607/Sky", {
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
test = ("https://www.behance.net/collection/170615607/Sky", {
"count": ">= 13",
"pattern": BehanceGalleryExtractor.pattern[0],
})]
"pattern": BehanceGalleryExtractor.pattern,
})
def __init__(self, match):
BehanceExtractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -22,12 +22,12 @@ class BobxExtractor(Extractor):
class BobxGalleryExtractor(BobxExtractor):
"""Extractor for individual image galleries on bobx.com"""
subcategory = "gallery"
directory_fmt = ["{category}", "{model}", "{title}"]
directory_fmt = ("{category}", "{model}", "{title}")
filename_fmt = "{model}_{image_id}_{num:>03}.{extension}"
archive_fmt = "{image_id}"
pattern = [r"(?:https?://)?(?:www\.)?bobx\.com"
r"/([^/]+/[^/]+/photoset/[\w-]+)-\d+-\d+-\d+\.html"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?bobx\.com"
r"/([^/]+/[^/]+/photoset/[\w-]+)-\d+-\d+-\d+\.html")
test = (
(("http://www.bobx.com/idol/mikoto-hibi"
"/photoset/wpb-2018-_11-0-2-8.html"), {
"url": "93972d6a661f6627e963d62c9d15531e6b36a389",
@ -39,7 +39,7 @@ class BobxGalleryExtractor(BobxExtractor):
"url": "f5d6c0cd0881ae6f504c21a90d86e3464dc54e8e",
"keyword": "43395ac200deaaa50627da666bd02c8f1f86a59d",
}),
]
)
def __init__(self, match):
BobxExtractor.__init__(self)
@ -92,10 +92,10 @@ class BobxGalleryExtractor(BobxExtractor):
class BobxIdolExtractor(BobxExtractor):
"""Extractor for an idol's image galleries on bobx.com"""
subcategory = "idol"
pattern = [r"(?:https?://)?(?:www\.)?bobx\.com/([^/]+/[^/?&#]+)/?$"]
test = [("http://www.bobx.com/idol/rin-okabe/", {
pattern = r"(?:https?://)?(?:www\.)?bobx\.com/([^/]+/[^/?&#]+)/?$"
test = ("http://www.bobx.com/idol/rin-okabe/", {
"url": "74d80bfcd53b738b31909bb42e5cc97c41b475b8",
})]
})
def __init__(self, match):
BobxExtractor.__init__(self)

@ -137,7 +137,7 @@ class GelbooruPageMixin():
class TagMixin():
"""Extraction of images based on search-tags"""
subcategory = "tag"
directory_fmt = ["{category}", "{search_tags}"]
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
def __init__(self, match):
@ -153,7 +153,7 @@ class TagMixin():
class PoolMixin():
"""Extraction of image-pools"""
subcategory = "pool"
directory_fmt = ["{category}", "pool", "{pool}"]
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
def __init__(self, match):
@ -212,7 +212,7 @@ class PostMixin():
class PopularMixin():
"""Extraction and metadata handling for Danbooru v2"""
subcategory = "popular"
directory_fmt = ["{category}", "popular", "{scale}", "{date}"]
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
page_start = None
sort = True

@ -16,7 +16,7 @@ class ChanThreadExtractor(Extractor):
"""Base class for extractors for Futaba Channel-like boards"""
category = "chan"
subcategory = "thread"
directory_fmt = ["{category}", "{board}", "{thread} - {title}"]
directory_fmt = ("{category}", "{board}", "{thread} - {title}")
filename_fmt = "{tim}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
api_url = ""

@ -26,7 +26,7 @@ class Extractor():
category = ""
subcategory = ""
categorytransfer = False
directory_fmt = ["{category}"]
directory_fmt = ("{category}",)
filename_fmt = "{name}.{extension}"
archive_fmt = ""
cookiedomain = ""
@ -193,9 +193,9 @@ class Extractor():
class ChapterExtractor(Extractor):
subcategory = "chapter"
directory_fmt = [
directory_fmt = (
"{category}", "{manga}",
"{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}"]
"{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}")
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
archive_fmt = (

@ -37,8 +37,8 @@ class DanbooruExtractor(booru.DanbooruPageMixin, booru.BooruExtractor):
class DanbooruTagExtractor(booru.TagMixin, DanbooruExtractor):
"""Extractor for images from danbooru based on search-tags"""
pattern = [BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
test = [
pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"
test = (
("https://danbooru.donmai.us/posts?tags=bonocho", {
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
}),
@ -46,38 +46,38 @@ class DanbooruTagExtractor(booru.TagMixin, DanbooruExtractor):
("https://danbooru.donmai.us/posts?tags=canvas_%28cocktail_soft%29", {
"count": ">= 50",
}),
("https://hijiribe.donmai.us/posts?tags=bonocho", None),
("https://sonohara.donmai.us/posts?tags=bonocho", None),
("https://safebooru.donmai.us/posts?tags=bonocho", None),
]
("https://hijiribe.donmai.us/posts?tags=bonocho"),
("https://sonohara.donmai.us/posts?tags=bonocho"),
("https://safebooru.donmai.us/posts?tags=bonocho"),
)
class DanbooruPoolExtractor(booru.PoolMixin, DanbooruExtractor):
"""Extractor for image-pools from danbooru"""
pattern = [BASE_PATTERN + r"/pools/(?P<pool>\d+)"]
test = [("https://danbooru.donmai.us/pools/7659", {
pattern = BASE_PATTERN + r"/pools/(?P<pool>\d+)"
test = ("https://danbooru.donmai.us/pools/7659", {
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
})]
})
class DanbooruPostExtractor(booru.PostMixin, DanbooruExtractor):
"""Extractor for single images from danbooru"""
pattern = [BASE_PATTERN + r"/posts/(?P<post>\d+)"]
test = [("https://danbooru.donmai.us/posts/294929", {
pattern = BASE_PATTERN + r"/posts/(?P<post>\d+)"
test = ("https://danbooru.donmai.us/posts/294929", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
})]
})
class DanbooruPopularExtractor(booru.PopularMixin, DanbooruExtractor):
"""Extractor for popular images from danbooru"""
pattern = [BASE_PATTERN + r"/explore/posts/popular(?:\?(?P<query>[^#]*))?"]
test = [
("https://danbooru.donmai.us/explore/posts/popular", None),
pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?(?P<query>[^#]*))?"
test = (
("https://danbooru.donmai.us/explore/posts/popular"),
(("https://danbooru.donmai.us/explore/posts/popular"
"?date=2013-06-06+03%3A34%3A22+-0400&scale=week"), {
"count": ">= 1",
}),
]
)
def __init__(self, match):
super().__init__(match)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -29,7 +29,7 @@ BASE_PATTERN = (
class DeviantartExtractor(Extractor):
"""Base class for deviantart extractors"""
category = "deviantart"
directory_fmt = ["{category}", "{author[username]!l}"]
directory_fmt = ("{category}", "{author[username]!l}")
filename_fmt = "{category}_{index}_{title}.{extension}"
root = "https://www.deviantart.com"
@ -202,8 +202,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
archive_fmt = "g_{username}_{index}.{extension}"
pattern = [BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$"]
test = [
pattern = BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$"
test = (
("https://www.deviantart.com/shimoda7/gallery/", {
"pattern": r"https://(s3.amazonaws.com/origin-(img|orig)"
r".deviantart.net/|images-wixmp-\w+.wixmp.com/)",
@ -258,11 +258,11 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
("https://www.deviantart.com/shimoda8/gallery/", {
"exception": exception.NotFoundError,
}),
("https://www.deviantart.com/shimoda7/gallery/?catpath=/", None),
("https://shimoda7.deviantart.com/gallery/", None),
("https://yakuzafc.deviantart.com/", None),
("https://shimoda7.deviantart.com/gallery/?catpath=/", None),
]
("https://www.deviantart.com/shimoda7/gallery/?catpath=/"),
("https://shimoda7.deviantart.com/gallery/"),
("https://yakuzafc.deviantart.com/"),
("https://shimoda7.deviantart.com/gallery/?catpath=/"),
)
def deviations(self):
if self.flat and not self.group:
@ -275,10 +275,10 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
class DeviantartFolderExtractor(DeviantartExtractor):
"""Extractor for deviations inside an artist's gallery folder"""
subcategory = "folder"
directory_fmt = ["{category}", "{folder[owner]}", "{folder[title]}"]
directory_fmt = ("{category}", "{folder[owner]}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
pattern = [BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)"]
test = [
pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)"
test = (
("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
"count": 5,
"options": (("original", False),),
@ -287,9 +287,9 @@ class DeviantartFolderExtractor(DeviantartExtractor):
"count": ">= 4",
"options": (("original", False),),
}),
("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous", None),
("https://yakuzafc.deviantart.com/gallery/37412168/Crafts", None),
]
("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous"),
("https://yakuzafc.deviantart.com/gallery/37412168/Crafts"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@ -312,8 +312,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
archive_fmt = "{index}.{extension}"
pattern = [BASE_PATTERN + r"/(?:art|journal)/[^/?&#]+-\d+"]
test = [
pattern = BASE_PATTERN + r"/(?:art|journal)/[^/?&#]+-\d+"
test = (
(("https://www.deviantart.com/shimoda7/art/"
"For-the-sake-of-a-memory-10073852"), {
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
@ -326,12 +326,12 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"pattern": (r"https?://s3\.amazonaws\.com/origin-orig\."
r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"),
}),
(("https://shimoda7.deviantart.com/art/"
"For-the-sake-of-a-memory-10073852"), None),
("https://zzz.deviantart.com/art/zzz-1234567890", None),
(("https://myria-moon.deviantart.com/art/"
"Aime-Moi-part-en-vadrouille-261986576"), None),
]
("https://shimoda7.deviantart.com"
"/art/For-the-sake-of-a-memory-10073852"),
("https://myria-moon.deviantart.com"
"/art/Aime-Moi-part-en-vadrouille-261986576"),
("https://zzz.deviantart.com/art/zzz-1234567890"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@ -351,20 +351,20 @@ class DeviantartStashExtractor(DeviantartDeviationExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
archive_fmt = "{index}.{extension}"
pattern = [r"(?:https?://)?sta\.sh/()()[a-z0-9]+"]
test = [
pattern = r"(?:https?://)?sta\.sh/()()[a-z0-9]+"
test = (
("https://sta.sh/022c83odnaxc", {
"pattern": r"https://s3.amazonaws.com/origin-orig.deviantart.net",
"count": 1,
}),
("https://sta.sh/21jf51j7pzl2", {
"pattern": pattern[0],
"pattern": pattern,
"count": 4,
}),
("https://sta.sh/abcdefghijkl", {
"exception": exception.HttpError,
}),
]
)
def deviations(self):
page = self.request(self.url).text
@ -382,17 +382,17 @@ class DeviantartStashExtractor(DeviantartDeviationExtractor):
class DeviantartFavoriteExtractor(DeviantartExtractor):
"""Extractor for an artist's favorites"""
subcategory = "favorite"
directory_fmt = ["{category}", "{username}", "Favourites"]
directory_fmt = ("{category}", "{username}", "Favourites")
archive_fmt = "f_{username}_{index}.{extension}"
pattern = [BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$"]
test = [
pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$"
test = (
("https://www.deviantart.com/h3813067/favourites/", {
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/h3813067/favourites/?catpath=/", None),
("https://h3813067.deviantart.com/favourites/", None),
("https://h3813067.deviantart.com/favourites/?catpath=/", None),
]
("https://www.deviantart.com/h3813067/favourites/?catpath=/"),
("https://h3813067.deviantart.com/favourites/"),
("https://h3813067.deviantart.com/favourites/?catpath=/"),
)
def deviations(self):
folders = self.api.collections_folders(self.user)
@ -408,19 +408,19 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
class DeviantartCollectionExtractor(DeviantartExtractor):
"""Extractor for a single favorite collection"""
subcategory = "collection"
directory_fmt = ["{category}", "{collection[owner]}",
"Favourites", "{collection[title]}"]
directory_fmt = ("{category}", "{collection[owner]}",
"Favourites", "{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
pattern = [BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)"]
test = [
pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)"
test = (
(("https://www.deviantart.com/pencilshadings"
"/favourites/70595441/3D-Favorites"), {
"count": ">= 20",
"options": (("original", False),),
}),
(("https://pencilshadings.deviantart.com"
"/favourites/70595441/3D-Favorites"), None),
]
("https://pencilshadings.deviantart.com"
"/favourites/70595441/3D-Favorites"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@ -442,10 +442,10 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
class DeviantartJournalExtractor(DeviantartExtractor):
"""Extractor for an artist's journals"""
subcategory = "journal"
directory_fmt = ["{category}", "{username}", "Journal"]
directory_fmt = ("{category}", "{username}", "Journal")
archive_fmt = "j_{username}_{index}.{extension}"
pattern = [BASE_PATTERN + r"/(?:journal|blog)/?(?:\?catpath=/)?$"]
test = [
pattern = BASE_PATTERN + r"/(?:journal|blog)/?(?:\?catpath=/)?$"
test = (
("https://www.deviantart.com/angrywhitewanker/journal/", {
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
}),
@ -457,10 +457,10 @@ class DeviantartJournalExtractor(DeviantartExtractor):
"count": 0,
"options": (("journals", "none"),),
}),
("https://www.deviantart.com/shimoda7/journal/?catpath=/", None),
("https://angrywhitewanker.deviantart.com/journal/", None),
("https://shimoda7.deviantart.com/journal/?catpath=/", None),
]
("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
("https://angrywhitewanker.deviantart.com/journal/"),
("https://shimoda7.deviantart.com/journal/?catpath=/"),
)
def deviations(self):
return self.api.browse_user_journals(self.user, self.offset)
@ -469,17 +469,17 @@ class DeviantartJournalExtractor(DeviantartExtractor):
class DeviantartPopularExtractor(DeviantartExtractor):
"""Extractor for popular deviations"""
subcategory = "popular"
directory_fmt = ["{category}", "Popular",
"{popular[range]}", "{popular[search]}"]
directory_fmt = ("{category}", "Popular",
"{popular[range]}", "{popular[search]}")
archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
pattern = [r"(?:https?://)?www\.deviantart\.com"
r"((?:/\w+)*)/(?:popular-([^/?&#]+))/?(?:\?([^#]*))?"]
test = [
pattern = (r"(?:https?://)?www\.deviantart\.com"
r"((?:/\w+)*)/(?:popular-([^/?&#]+))/?(?:\?([^#]*))?")
test = (
("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
"options": (("original", False),),
}),
("https://www.deviantart.com/artisan/popular-all-time/?q=tree", None),
]
("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2018 Mike Fährmann
# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,10 +17,10 @@ class DirectlinkExtractor(Extractor):
category = "directlink"
filename_fmt = "{domain}/{path}"
archive_fmt = "{domain}/{path}"
pattern = [r"https?://(?P<domain>[^/]+)/(?P<path>[^?&#]+\."
pattern = (r"https?://(?P<domain>[^/]+)/(?P<path>[^?&#]+\."
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$"]
test = [
r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$")
test = (
(("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
"url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
"keyword": "66bce3a0a6872d8497e1984eb49d54a3ed0d3d5e",
@ -35,7 +35,7 @@ class DirectlinkExtractor(Extractor):
"url": "2627e8140727fdf743f86fe18f69f99a052c9718",
"keyword": "c658b8b6213e46be15a25e492df385ece5771bdf",
}),
]
)
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,8 +17,8 @@ import json
class DynastyscansChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from dynasty-scans.com"""
category = "dynastyscans"
pattern = [r"(?:https?://)?(?:www\.)?dynasty-scans\.com/chapters/([^/]+)"]
test = [
pattern = r"(?:https?://)?(?:www\.)?dynasty-scans\.com/chapters/([^/]+)"
test = (
(("http://dynasty-scans.com/chapters/"
"hitoribocchi_no_oo_seikatsu_ch33"), {
"url": "dce64e8c504118f1ab4135c00245ea12413896cb",
@ -29,7 +29,7 @@ class DynastyscansChapterExtractor(ChapterExtractor):
"url": "dbe5bbb74da2edcfb1832895a484e2a40bc8b538",
"keyword": "1208a102d9a1bb0b0c740a67996d9b26a9357b64",
}),
]
)
root = "https://dynasty-scans.com"
def __init__(self, match):

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2018 Mike Fährmann
# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -21,30 +21,30 @@ class E621Extractor(booru.MoebooruPageMixin, booru.BooruExtractor):
class E621TagExtractor(booru.TagMixin, E621Extractor):
"""Extractor for images from e621.net based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post"
r"(?:/index/\d+/|\?tags=)(?P<tags>[^/?&#]+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?e621\.net/post"
r"(?:/index/\d+/|\?tags=)(?P<tags>[^/?&#]+)")
test = (
("https://e621.net/post/index/1/anry", {
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
}),
("https://e621.net/post?tags=anry", None),
]
("https://e621.net/post?tags=anry"),
)
class E621PoolExtractor(booru.PoolMixin, E621Extractor):
"""Extractor for image-pools from e621.net"""
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(?P<pool>\d+)"]
test = [("https://e621.net/pool/show/73", {
pattern = r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(?P<pool>\d+)"
test = ("https://e621.net/pool/show/73", {
"url": "842f2fb065c7c339486a9b1d689020b8569888ed",
"content": "c2c87b7a9150509496cddc75ccab08109922876a",
})]
})
class E621PostExtractor(booru.PostMixin, E621Extractor):
"""Extractor for single images from e621.net"""
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(?P<post>\d+)"]
test = [("https://e621.net/post/show/535", {
pattern = r"(?:https?://)?(?:www\.)?e621\.net/post/show/(?P<post>\d+)"
test = ("https://e621.net/post/show/535", {
"url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
"options": (("tags", True),),
@ -53,17 +53,17 @@ class E621PostExtractor(booru.PostMixin, E621Extractor):
"tags_general": str,
"tags_species": str,
},
})]
})
class E621PopularExtractor(booru.MoebooruPopularMixin, E621Extractor):
"""Extractor for popular images from 621.net"""
pattern = [r"(?:https?://)?(?:www\.)?e621\.net"
pattern = (r"(?:https?://)?(?:www\.)?e621\.net"
r"/post/popular_by_(?P<scale>day|week|month)"
r"(?:\?(?P<query>[^#]*))?"]
test = [("https://e621.net/post/popular_by_month?month=6&year=2013", {
r"(?:\?(?P<query>[^#]*))?")
test = ("https://e621.net/post/popular_by_month?month=6&year=2013", {
"count": 32,
})]
})
def __init__(self, match):
super().__init__(match)

@ -23,7 +23,7 @@ BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org"
class ExhentaiExtractor(Extractor):
"""Base class for exhentai extractors"""
category = "exhentai"
directory_fmt = ["{category}", "{gallery_id}"]
directory_fmt = ("{category}", "{gallery_id}")
filename_fmt = "{gallery_id}_{num:>04}_{image_token}_{name}.{extension}"
archive_fmt = "{gallery_id}_{num}"
cookiedomain = ".exhentai.org"
@ -104,10 +104,10 @@ class ExhentaiExtractor(Extractor):
class ExhentaiGalleryExtractor(ExhentaiExtractor):
"""Extractor for image galleries from exhentai.org"""
subcategory = "gallery"
pattern = [BASE_PATTERN +
pattern = (BASE_PATTERN +
r"(?:/g/(\d+)/([\da-f]{10})"
r"|/s/([\da-f]{10})/(\d+)-(\d+))"]
test = [
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
test = (
("https://exhentai.org/g/960460/4f0e369d82/", {
"keyword": "ba0785e49e3877cfa3f91c1ad9a5ac7816339bf5",
"content": "493d759de534355c9f55f8e365565b62411de146",
@ -121,9 +121,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
("https://exhentai.org/s/3957343c3b/960460-5", {
"count": 2,
}),
("https://e-hentai.org/g/960460/4f0e369d82/", None),
("https://g.e-hentai.org/g/960460/4f0e369d82/", None),
]
("https://e-hentai.org/g/960460/4f0e369d82/"),
("https://g.e-hentai.org/g/960460/4f0e369d82/"),
)
def __init__(self, match):
ExhentaiExtractor.__init__(self)
@ -324,17 +324,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
class ExhentaiSearchExtractor(ExhentaiExtractor):
"""Extractor for exhentai search results"""
subcategory = "search"
pattern = [BASE_PATTERN + r"/?\?(.*)$"]
test = [
("https://exhentai.org/?f_search=touhou", None),
pattern = BASE_PATTERN + r"/?\?(.*)$"
test = (
("https://exhentai.org/?f_search=touhou"),
(("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0"
"&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0"
"&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), {
"pattern": ExhentaiGalleryExtractor.pattern[0],
"pattern": ExhentaiGalleryExtractor.pattern,
"range": "1-30",
"count": 30,
}),
]
)
def __init__(self, match):
ExhentaiExtractor.__init__(self)
@ -388,12 +388,12 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
"""Extractor for favorited exhentai galleries"""
subcategory = "favorite"
pattern = [BASE_PATTERN + r"/favorites\.php(?:\?(.*))?"]
test = [
("https://exhentai.org/favorites.php", None),
pattern = BASE_PATTERN + r"/favorites\.php(?:\?(.*))?"
test = (
("https://exhentai.org/favorites.php"),
("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"
"&f_apply=Search+Favorites", None),
]
"&f_apply=Search+Favorites"),
)
def __init__(self, match):
ExhentaiSearchExtractor.__init__(self, match)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2018 Mike Fährmann
# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,9 +16,9 @@ import json
class FallenangelsChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from fascans.com"""
category = "fallenangels"
pattern = [(r"(?:https?://)?(manga|truyen)\.fascans\.com"
r"/manga/([^/]+)/(\d+)(\.[^/?&#]+)?")]
test = [
pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com"
r"/manga/([^/]+)/(\d+)(\.[^/?&#]+)?")
test = (
("https://manga.fascans.com/manga/chronos-ruler/20/1", {
"url": "4604a7914566cc2da0ff789aa178e2d1c8c241e3",
"keyword": "4e1722cf0ed8ee5fc5c64147ac3f39342e767cd8",
@ -30,7 +30,7 @@ class FallenangelsChapterExtractor(ChapterExtractor):
("http://manga.fascans.com/manga/rakudai-kishi-no-eiyuutan/19.5", {
"keyword": "bf7dd1c462a80ffe50b92fec00b7acda2f8b800e",
}),
]
)
def __init__(self, match):
self.version, self.manga, self.chapter, self.minor = match.groups()
@ -64,9 +64,9 @@ class FallenangelsChapterExtractor(ChapterExtractor):
class FallenangelsMangaExtractor(MangaExtractor):
"""Extractor for manga from fascans.com"""
category = "fallenangels"
pattern = [r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"]
pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
scheme = "https"
test = [
test = (
("http://manga.fascans.com/manga/trinity-seven", {
"url": "92699a250ff7d5adcf4b06e6a45b0c05f3426643",
"keyword": "afc785c37da7c48e639d3a596e8e0401482b628f",
@ -75,7 +75,7 @@ class FallenangelsMangaExtractor(MangaExtractor):
"url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b",
"keyword": "2d2a2a5d9ea5925eb9a47bb13d848967f3af086c",
}),
]
)
def __init__(self, match):
MangaExtractor.__init__(self, match)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2018 Mike Fährmann
# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -45,11 +45,11 @@ class FlickrImageExtractor(FlickrExtractor):
"""Extractor for individual images from flickr.com"""
subcategory = "image"
archive_fmt = "{id}"
pattern = [r"(?:https?://)?(?:"
pattern = (r"(?:https?://)?(?:"
r"(?:(?:www\.|m\.)?flickr\.com/photos/[^/]+/"
r"|[^.]+\.static\.?flickr\.com/(?:\d+/)+)(\d+)"
r"|flic\.kr/p/([A-Za-z1-9]+))"]
test = [
r"|flic\.kr/p/([A-Za-z1-9]+))")
test = (
("https://www.flickr.com/photos/departingyyz/16089302239", {
"url": "7f0887f5953f61c8b79a695cb102ea309c0346b0",
"keyword": "5ecdaf0192802451b7daca9b81f393f207ff7ee9",
@ -63,7 +63,7 @@ class FlickrImageExtractor(FlickrExtractor):
"url": "92c54a00f31040c349cb2abcb1b9abe30cc508ae"}),
("https://www.flickr.com/photos/zzz/16089302238", {
"exception": exception.NotFoundError}),
]
)
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@ -107,12 +107,12 @@ class FlickrImageExtractor(FlickrExtractor):
class FlickrAlbumExtractor(FlickrExtractor):
"""Extractor for photo albums from flickr.com"""
subcategory = "album"
directory_fmt = ["{category}", "{subcategory}s",
"{album[id]} - {album[title]}"]
directory_fmt = ("{category}", "{subcategory}s",
"{album[id]} - {album[title]}")
archive_fmt = "a_{album[id]}_{id}"
pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/"
r"photos/([^/]+)/(?:album|set)s(?:/(\d+))?"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?flickr\.com/"
r"photos/([^/]+)/(?:album|set)s(?:/(\d+))?")
test = (
(("https://www.flickr.com/photos/shona_s/albums/72157633471741607"), {
"url": "baf4a3d1b15afcecf9638000a12c0eb3d5df9024",
"keyword": "b579f19134ab8217f05979e52adf7712898492c7",
@ -121,7 +121,7 @@ class FlickrAlbumExtractor(FlickrExtractor):
"url": "657d541470482e0d69deec33ab97a6d7d4af6fe4",
"keyword": "736a41a7d702f7fe00edc957ae201d84f745e654",
}),
]
)
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@ -156,16 +156,16 @@ class FlickrAlbumExtractor(FlickrExtractor):
class FlickrGalleryExtractor(FlickrExtractor):
"""Extractor for photo galleries from flickr.com"""
subcategory = "gallery"
directory_fmt = ["{category}", "galleries",
"{user[username]} {gallery[id]}"]
directory_fmt = ("{category}", "galleries",
"{user[username]} {gallery[id]}")
archive_fmt = "g_{gallery[id]}_{id}"
pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/"
r"photos/([^/]+)/galleries/(\d+)"]
test = [(("https://www.flickr.com/photos/flickr/"
"galleries/72157681572514792/"), {
pattern = (r"(?:https?://)?(?:www\.)?flickr\.com/"
r"photos/([^/]+)/galleries/(\d+)")
test = (("https://www.flickr.com/photos/flickr/"
"galleries/72157681572514792/"), {
"url": "1d012592bc7ce3a24b2b025b1176a31e947122f6",
"keyword": "30cdec50e125f1cdf2425eab6052590535323c2d",
})]
})
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@ -186,13 +186,12 @@ class FlickrGalleryExtractor(FlickrExtractor):
class FlickrGroupExtractor(FlickrExtractor):
"""Extractor for group pools from flickr.com"""
subcategory = "group"
directory_fmt = ["{category}", "{subcategory}s", "{group[groupname]}"]
directory_fmt = ("{category}", "{subcategory}s", "{group[groupname]}")
archive_fmt = "G_{group[nsid]}_{id}"
pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/groups/([^/]+)"]
test = [("https://www.flickr.com/groups/bird_headshots/", {
"pattern": (r"https?://farm\d+\.staticflickr\.com"
r"/\d+/\d+_[0-9a-f]+(_[a-z])?\.[a-z]+"),
})]
pattern = r"(?:https?://)?(?:www\.)?flickr\.com/groups/([^/]+)"
test = ("https://www.flickr.com/groups/bird_headshots/", {
"pattern": FlickrImageExtractor.pattern,
})
def data(self):
self.group = self.api.urls_lookupGroup(self.item_id)
@ -205,13 +204,13 @@ class FlickrGroupExtractor(FlickrExtractor):
class FlickrUserExtractor(FlickrExtractor):
"""Extractor for the photostream of a flickr user"""
subcategory = "user"
directory_fmt = ["{category}", "{user[username]}"]
directory_fmt = ("{category}", "{user[username]}")
archive_fmt = "u_{user[nsid]}_{id}"
pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/photos/([^/]+)/?$"]
test = [("https://www.flickr.com/photos/shona_s/", {
pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/([^/]+)/?$"
test = ("https://www.flickr.com/photos/shona_s/", {
"url": "d125b536cd8c4229363276b6c84579c394eec3a2",
"keyword": "2cdeae22cd9c3ff19ce905215f3782a7494d8264",
})]
})
def photos(self):
return self.api.people_getPhotos(self.user["nsid"])
@ -220,13 +219,13 @@ class FlickrUserExtractor(FlickrExtractor):
class FlickrFavoriteExtractor(FlickrExtractor):
"""Extractor for favorite photos of a flickr user"""
subcategory = "favorite"
directory_fmt = ["{category}", "{subcategory}s", "{user[username]}"]
directory_fmt = ("{category}", "{subcategory}s", "{user[username]}")
archive_fmt = "f_{user[nsid]}_{id}"
pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/photos/([^/]+)/favorites"]
test = [("https://www.flickr.com/photos/shona_s/favorites", {
pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/([^/]+)/favorites"
test = ("https://www.flickr.com/photos/shona_s/favorites", {
"url": "5129b3f5bfa83cc25bdae3ce476036de1488dad2",
"keyword": "0e1c9521b6051411b585c9b41a4dc0bcde20e616",
})]
})
def photos(self):
return self.api.favorites_getList(self.user["nsid"])
@ -235,14 +234,14 @@ class FlickrFavoriteExtractor(FlickrExtractor):
class FlickrSearchExtractor(FlickrExtractor):
"""Extractor for flickr photos based on search results"""
subcategory = "search"
directory_fmt = ["{category}", "{subcategory}", "{search[text]}"]
directory_fmt = ("{category}", "{subcategory}", "{search[text]}")
archive_fmt = "s_{search}_{id}"
pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/search/?\?([^#]+)"]
test = [
(("https://flickr.com/search/?text=mountain"), None),
(("https://flickr.com/search/?text=tree%20cloud%20house"
"&color_codes=4&styles=minimalism"), None),
]
pattern = r"(?:https?://)?(?:www\.)?flickr\.com/search/?\?([^#]+)"
test = (
("https://flickr.com/search/?text=mountain"),
("https://flickr.com/search/?text=tree%20cloud%20house"
"&color_codes=4&styles=minimalism"),
)
def __init__(self, match):
FlickrExtractor.__init__(self, match)

@ -19,8 +19,8 @@ class FoolfuukaThreadExtractor(SharedConfigMixin, Extractor):
"""Base extractor for FoolFuuka based boards/archives"""
basecategory = "foolfuuka"
subcategory = "thread"
directory_fmt = ["{category}", "{board[shortname]}",
"{thread_num}{title:? - //}"]
directory_fmt = ("{category}", "{board[shortname]}",
"{thread_num}{title:? - //}")
filename_fmt = "{media[media]}"
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
root = ""
@ -96,7 +96,7 @@ def generate_extractors():
Extr.__name__ = Extr.__qualname__ = name + "ThreadExtractor"
Extr.__doc__ = "Extractor for threads on " + domain
Extr.category = category
Extr.pattern = [r"(?:https?://)?" + pattern + r"/([^/]+)/thread/(\d+)"]
Extr.pattern = r"(?:https?://)?" + pattern + r"/([^/]+)/thread/(\d+)"
Extr.test = info.get("test")
Extr.root = root
if info.get("remote") == "simple":
@ -109,13 +109,13 @@ EXTRACTORS = {
"name": "fourplebs",
"root": "https://archive.4plebs.org",
"pattern": r"(?:archive\.)?4plebs\.org",
"test": [("https://archive.4plebs.org/tg/thread/54059290", {
"test": ("https://archive.4plebs.org/tg/thread/54059290", {
"url": "07452944164b602502b02b24521f8cee5c484d2a",
})],
}),
},
"archivedmoe": {
"root": "https://archived.moe",
"test": [
"test": (
("https://archived.moe/gd/thread/309639/", {
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
@ -123,60 +123,60 @@ EXTRACTORS = {
("https://archived.moe/a/thread/159767162/", {
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
}),
],
),
},
"archiveofsins": {
"root": "https://archiveofsins.com",
"pattern": r"(?:www\.)?archiveofsins\.com",
"test": [("https://archiveofsins.com/h/thread/4668813/", {
"test": ("https://archiveofsins.com/h/thread/4668813/", {
"url": "f612d287087e10a228ef69517cf811539db9a102",
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
})],
}),
},
"b4k": {
"root": "https://arch.b4k.co",
"remote": "simple",
"test": [("https://arch.b4k.co/meta/thread/196/", {
"test": ("https://arch.b4k.co/meta/thread/196/", {
"url": "cdd4931ac1cd00264b0b54e2e3b0d8f6ae48957e",
})],
}),
},
"desuarchive": {
"root": "https://desuarchive.org",
"test": [("https://desuarchive.org/a/thread/159542679/", {
"test": ("https://desuarchive.org/a/thread/159542679/", {
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
})],
}),
},
"fireden": {
"root": "https://boards.fireden.net",
"test": [("https://boards.fireden.net/a/thread/159803223/", {
"test": ("https://boards.fireden.net/a/thread/159803223/", {
"url": "01b7baacfb0656a68e566368290e3072b27f86c9",
})],
}),
},
"nyafuu": {
"root": "https://archive.nyafuu.org",
"pattern": r"(?:archive\.)?nyafuu\.org",
"test": [("https://archive.nyafuu.org/c/thread/2849220/", {
"test": ("https://archive.nyafuu.org/c/thread/2849220/", {
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
})],
}),
},
"rbt": {
"root": "https://rbt.asia",
"pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
"test": [
"test": (
("https://rbt.asia/g/thread/61487650/", {
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
],
),
},
"thebarchive": {
"root": "https://thebarchive.com",
"pattern": r"thebarchive\.com",
"test": [("https://thebarchive.com/b/thread/739772332/", {
"test": ("https://thebarchive.com/b/thread/739772332/", {
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
})],
}),
},
}

@ -38,8 +38,8 @@ class FoolslideBase(SharedConfigMixin):
class FoolslideChapterExtractor(FoolslideBase, Extractor):
"""Base class for chapter extractors for FoOlSlide based sites"""
subcategory = "chapter"
directory_fmt = [
"{category}", "{manga}", "{chapter_string}"]
directory_fmt = (
"{category}", "{manga}", "{chapter_string}")
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{id}"
@ -153,8 +153,8 @@ def generate_extractors():
ChExtr.__name__ = ChExtr.__qualname__ = name + "ChapterExtractor"
ChExtr.__doc__ = "Extractor for manga-chapters from " + domain
ChExtr.category = category
ChExtr.pattern = [r"(?:https?://)?" + pattern +
r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"]
ChExtr.pattern = (r"(?:https?://)?" + pattern +
r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)")
ChExtr.test = info.get("test-chapter")
ChExtr.root = root
if "decode" in info:
@ -167,7 +167,7 @@ def generate_extractors():
MaExtr.__name__ = MaExtr.__qualname__ = name + "MangaExtractor"
MaExtr.__doc__ = "Extractor for manga from " + domain
MaExtr.category = category
MaExtr.pattern = [r"(?:https?://)?" + pattern + r"(/series/[^/?&#]+)"]
MaExtr.pattern = r"(?:https?://)?" + pattern + r"(/series/[^/?&#]+)"
MaExtr.test = info.get("test-manga")
MaExtr.root = root
symtable[MaExtr.__name__] = MaExtr
@ -176,65 +176,59 @@ def generate_extractors():
EXTRACTORS = {
"dokireader": {
"root": "https://kobato.hologfx.com/reader",
"test-chapter": [
"test-chapter":
(("https://kobato.hologfx.com/reader/read/"
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
"keyword": "998d1d523da028284b8dd4b7b54ceae4af6cb65a",
}),
],
"test-manga": [
"test-manga":
(("https://kobato.hologfx.com/reader/series/"
"boku_ha_ohimesama_ni_narenai/"), {
"url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
"keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995",
}),
],
},
"jaiminisbox": {
"root": "https://jaiminisbox.com/reader",
"pattern": r"(?:www\.)?jaiminisbox\.com/reader",
"decode": "base64",
"test-chapter": [
"test-chapter": (
("https://jaiminisbox.com/reader/read/uratarou/en/0/1/", {
"keyword": "d8919bc8f0351b44e938862214e654401962b5a5",
}),
("https://jaiminisbox.com/reader/read/dr-stone/en/0/16/", {
"keyword": "9b658599651f1ae87cab3e0e29dd21e8337a362c",
}),
],
"test-manga": [
),
"test-manga":
("https://jaiminisbox.com/reader/series/sora_no_kian/", {
"url": "66612be177dc3b3fa1d1f537ef02f4f701b163ea",
"keyword": "0908a4145bb03acc4210f5d01169988969f5acd1",
}),
]
},
"kireicake": {
"root": "https://reader.kireicake.com",
"test-chapter": [
"test-chapter":
("https://reader.kireicake.com/read/wonderland/en/1/1/", {
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
"keyword": "47e0cf69f95ab3b820bda05014aec38d3b824018",
}),
],
"test-manga": [
"test-manga":
("https://reader.kireicake.com/series/wonderland/", {
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
"keyword": "99caa336a9d48e27e3b8e56a0a1e6faf9fc13a51",
}),
],
},
"powermanga": {
"root": "https://read.powermanga.org",
"pattern": r"read(?:er)?\.powermanga\.org",
"test-chapter": [
"test-chapter":
(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
"keyword": "9985bcb78491dff9c725958b06bba606be51b6d3",
}),
],
"test-manga": [
"test-manga":
(("https://read.powermanga.org"
"/series/one_piece_digital_colour_comics/"), {
"count": ">= 1",
@ -250,50 +244,46 @@ EXTRACTORS = {
"volume": int,
},
}),
],
},
"seaotterscans": {
"root": "https://reader.seaotterscans.com",
"test-chapter": [
"test-chapter":
("https://reader.seaotterscans.com/read/100_days/en/0/5/", {
"url": "63d46b8883cc652dfe8bd5be4492160dd31f06a8",
"keyword": "5349c2fbaa88070e6af600de17a6c4e212243e8e",
}),
],
"test-manga": [
"test-manga":
("https://reader.seaotterscans.com/series/marry_me/", {
"url": "fdbacabfa566a6baeb3f01bb46cbda0577bd4bbe",
"keyword": "61d3388d73df12f64361892b47a9398df4a5947c",
}),
],
},
"sensescans": {
"root": "http://sensescans.com/reader",
"pattern": r"(?:(?:www\.)?sensescans\.com/reader"
r"|reader\.sensescans\.com)",
"test-chapter": [
(("http://reader.sensescans.com/read/"
"test-chapter": (
(("http://sensescans.com/reader/read/"
"magi__labyrinth_of_magic/en/37/369/"), {
"url": "a399ef037cdfbc25b09d435cc2ea1e3e454a6812",
"keyword": "43ba75615d3e77d507808b0f3a8fd7fc72232a60",
}),
(("http://sensescans.com/reader/read/"
(("http://reader.sensescans.com/read/"
"magi__labyrinth_of_magic/en/37/369/"), {
"url": "a399ef037cdfbc25b09d435cc2ea1e3e454a6812",
"keyword": "43ba75615d3e77d507808b0f3a8fd7fc72232a60",
}),
],
"test-manga": [
),
"test-manga":
("http://sensescans.com/reader/series/hakkenden/", {
"url": "2360ccb0ead0ff2f5e27b7aef7eb17b9329de2f2",
"keyword": "122cf92c32e6428c50f56ffaf29d06b96750ed71",
}),
],
},
"worldthree": {
"root": "http://www.slide.world-three.org",
"pattern": r"(?:www\.)?slide\.world-three\.org",
"test-chapter": [
"test-chapter": (
(("http://www.slide.world-three.org"
"/read/black_bullet/en/2/7/page/1"), {
"url": "be2f04f6e2d311b35188094cfd3e768583271584",
@ -304,13 +294,12 @@ EXTRACTORS = {
"url": "6028ea5ca282744f925dfad92eeb98509f9cc78c",
"keyword": "d478e9f20847deb1844dba318acaa8b91c19468a",
}),
],
"test-manga": [
),
"test-manga":
("http://www.slide.world-three.org/series/black_bullet/", {
"url": "5743b93512d26e6b540d90a7a5d69208b6d4a738",
"keyword": "3a24f1088b4d7f3b798a96163f21ca251293a120",
}),
],
},
}

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2018 Mike Fährmann
# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -75,9 +75,9 @@ class GelbooruExtractor(booru.XmlParserMixin,
class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
"""Extractor for images from gelbooru.com based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = (
("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
"count": 5,
}),
@ -85,7 +85,7 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
"options": (("api", False),),
"count": 5,
}),
]
)
def __init__(self, match):
super().__init__(match)
@ -107,11 +107,11 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
class GelbooruPoolExtractor(booru.GelbooruPoolMixin, GelbooruExtractor):
"""Extractor for image-pools from gelbooru.com"""
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)"]
test = [("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
"count": 6,
})]
})
def get_posts(self):
return util.advance(self.posts, self.page_start)
@ -119,12 +119,12 @@ class GelbooruPoolExtractor(booru.GelbooruPoolMixin, GelbooruExtractor):
class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
"""Extractor for single images from gelbooru.com"""
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)"]
test = [("https://gelbooru.com/index.php?page=post&s=view&id=313638", {
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://gelbooru.com/index.php?page=post&s=view&id=313638", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"count": 1,
})]
})
def get_posts(self):
return (self.post,)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2018 Mike Fährmann
# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -39,9 +39,9 @@ class GfycatExtractor(Extractor):
class GfycatImageExtractor(GfycatExtractor):
"""Extractor for individual images from gfycat.com"""
subcategory = "image"
pattern = [r"(?:https?://)?(?:\w+\.)?gfycat\.com"
r"/(?:gifs/detail/|\w+/)?([A-Za-z]+)"]
test = [
pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
r"/(?:gifs/detail/|\w+/)?([A-Za-z]+)")
test = (
("https://gfycat.com/GrayGenerousCowrie", {
"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
"content": "5786028e04b155baa20b87c5f4f77453cd5edc37",
@ -67,10 +67,10 @@ class GfycatImageExtractor(GfycatExtractor):
("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {
"url": "e24c9f69897fd223343782425a429c5cab6a768e",
}),
("https://gfycat.com/gifs/detail/UnequaledHastyAnkole", None),
("https://gfycat.com/ifr/UnequaledHastyAnkole", None),
("https://gfycat.com/ru/UnequaledHastyAnkole", None),
]
("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
("https://gfycat.com/ifr/UnequaledHastyAnkole"),
("https://gfycat.com/ru/UnequaledHastyAnkole"),
)
def __init__(self, match):
GfycatExtractor.__init__(self)

@ -44,12 +44,12 @@ class HbrowseBase():
class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
"""Extractor for manga from hbrowse.com"""
pattern = [r"(?:https?://)?((?:www\.)?hbrowse\.com/\d+)/?$"]
pattern = r"(?:https?://)?((?:www\.)?hbrowse\.com/\d+)/?$"
reverse = False
test = [("https://www.hbrowse.com/10363", {
test = ("https://www.hbrowse.com/10363", {
"url": "b89682bfb86c11d2af0dc47463804ec3ac4aadd6",
"keyword": "4b15fda1858a69de1fbf5afddfe47dd893397312",
})]
})
def chapters(self, page):
results = []
@ -72,16 +72,16 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
"""Extractor for manga-chapters from hbrowse.com"""
directory_fmt = ["{category}", "{manga_id} {manga}", "c{chapter:>05}"]
directory_fmt = ("{category}", "{manga_id} {manga}", "c{chapter:>05}")
filename_fmt = ("{category}_{manga_id}_{chapter:>05}_"
"{page:>03}.{extension}")
archive_fmt = "{manga_id}_{chapter}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/c(\d+)"]
test = [("https://www.hbrowse.com/10363/c00000", {
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/c(\d+)"
test = ("https://www.hbrowse.com/10363/c00000", {
"url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6",
"keyword": "95ec73a58aeac57f4dd20f0fa0c2812b045a30e8",
"content": "44578ebbe176c2c27434966aef22945787e2781e",
})]
})
def __init__(self, match):
self.gid, self.chapter = match.groups()

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -18,8 +18,8 @@ class Hentai2readMangaExtractor(MangaExtractor):
"""Extractor for hmanga from hentai2read.com"""
category = "hentai2read"
scheme = "https"
pattern = [r"(?:https?://)?(?:www\.)?(hentai2read\.com/[^/]+/?)$"]
test = [
pattern = r"(?:https?://)?(?:www\.)?(hentai2read\.com/[^/]+/?)$"
test = (
("http://hentai2read.com/amazon_elixir/", {
"url": "273073752d418ec887d7f7211e42b832e8c403ba",
"keyword": "13c1ce7e15cbb941f01c843b0e89adc993d939ac",
@ -28,7 +28,7 @@ class Hentai2readMangaExtractor(MangaExtractor):
"url": "6595f920a3088a15c2819c502862d45f8eb6bea6",
"keyword": "675c7b7a4fa52cf569c283553bd16b4200a5cd36",
}),
]
)
def chapters(self, page):
results = []
@ -60,11 +60,11 @@ class Hentai2readChapterExtractor(ChapterExtractor):
"""Extractor for a single manga chapter from hentai2read.com"""
category = "hentai2read"
archive_fmt = "{chapter_id}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"]
test = [("http://hentai2read.com/amazon_elixir/1/", {
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"
test = ("http://hentai2read.com/amazon_elixir/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
"keyword": "9845105898d28c6a540cffdea60a1a20fab52431",
})]
})
def __init__(self, match):
url_title, self.chapter = match.groups()

@ -16,13 +16,13 @@ import re
class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from hentai.cafe"""
category = "hentaicafe"
directory_fmt = ["{category}", "{manga}"]
pattern = [r"(?:https?://)?(?:www\.)?hentai\.cafe"
r"(/manga/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"]
test = [("https://hentai.cafe/manga/read/saitom-box/en/0/1/", {
directory_fmt = ("{category}", "{manga}")
pattern = (r"(?:https?://)?(?:www\.)?hentai\.cafe"
r"(/manga/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)")
test = ("https://hentai.cafe/manga/read/saitom-box/en/0/1/", {
"url": "8c6a8c56875ba3ed7ab0a74a64f9960077767fc2",
"keyword": "1b24a3e8625b89d160d01ce3eb5e5eb12fbbf648",
})]
})
root = "https://hentai.cafe"
def get_metadata(self, page):
@ -37,9 +37,9 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from hentai.cafe"""
category = "hentaicafe"
pattern = [r"(?:https?://)?" + r"(?:www\.)?hentai\.cafe"
r"((?:/manga/series)?/[^/?&#]+)/?$"]
test = [
pattern = (r"(?:https?://)?" + r"(?:www\.)?hentai\.cafe"
r"((?:/manga/series)?/[^/?&#]+)/?$")
test = (
# single chapter
("https://hentai.cafe/hazuki-yuuto-summer-blues/", {
"url": "f8e24a07d6fbb7c6a6ec5ad8ad8faf2436f8751b",
@ -53,7 +53,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
"url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
"keyword": "46012b857eb1a1394bc55c0efe7aa4e7f704d10d",
}),
]
)
root = "https://hentai.cafe"
reverse = False

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -15,7 +15,7 @@ from .. import text, util, exception
class HentaifoundryExtractor(Extractor):
"""Base class for hentaifoundry extractors"""
category = "hentaifoundry"
directory_fmt = ["{category}", "{user}"]
directory_fmt = ("{category}", "{user}")
filename_fmt = "{category}_{index}_{title}.{extension}"
archive_fmt = "{index}"
root = "https://www.hentai-foundry.com"
@ -120,17 +120,17 @@ class HentaifoundryExtractor(Extractor):
class HentaifoundryUserExtractor(HentaifoundryExtractor):
"""Extractor for all images of a hentai-foundry-user"""
subcategory = "user"
pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/(?:pictures/user/([^/]+)(?:/page/(\d+))?/?$"
r"|user/([^/]+)/profile)"]
test = [
r"|user/([^/]+)/profile)")
test = (
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
"keyword": "d56e75566dc7dfe71d2ebd08c056a47f8832372d",
}),
("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3", None),
("https://www.hentai-foundry.com/user/Tenpura/profile", None),
]
("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"),
("https://www.hentai-foundry.com/user/Tenpura/profile"),
)
def __init__(self, match):
HentaifoundryExtractor.__init__(
@ -146,17 +146,17 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
"""Extractor for scrap images of a hentai-foundry-user"""
subcategory = "scraps"
directory_fmt = ["{category}", "{user}", "Scraps"]
pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/pictures/user/([^/]+)/scraps(?:/page/(\d+))?"]
test = [
directory_fmt = ("{category}", "{user}", "Scraps")
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/pictures/user/([^/]+)/scraps(?:/page/(\d+))?")
test = (
("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", {
"url": "00a11e30b73ff2b00a1fba0014f08d49da0a68ec",
"keyword": "8c9a2ad4bf20247bcebb7aef3cfe7016f35da4a7",
}),
(("https://www.hentai-foundry.com"
"/pictures/user/Evulchibi/scraps/page/3"), None),
]
("https://www.hentai-foundry.com"
"/pictures/user/Evulchibi/scraps/page/3"),
)
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match.group(1), match.group(2))
@ -171,18 +171,18 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
"""Extractor for favorite images of a hentai-foundry-user"""
subcategory = "favorite"
directory_fmt = ["{category}", "{user}", "Favorites"]
directory_fmt = ("{category}", "{user}", "Favorites")
archive_fmt = "f_{user}_{index}"
pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/user/([^/]+)/faves/pictures(?:/page/(\d+))?"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/user/([^/]+)/faves/pictures(?:/page/(\d+))?")
test = (
("https://www.hentai-foundry.com/user/Tenpura/faves/pictures", {
"url": "56f9ae2e89fe855e9fe1da9b81e5ec6212b0320b",
"keyword": "0ab79552ae2fbfcf501ebbebcf19c2dfc9b5eb4e",
}),
("https://www.hentai-foundry.com"
"/user/Tenpura/faves/pictures/page/3", None),
]
"/user/Tenpura/faves/pictures/page/3"),
)
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match.group(1), match.group(2))
@ -192,11 +192,11 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
class HentaifoundryRecentExtractor(HentaifoundryExtractor):
"""Extractor for 'Recent Pictures' on hentaifoundry.com"""
subcategory = "recent"
directory_fmt = ["{category}", "Recent Pictures", "{date}"]
directory_fmt = ("{category}", "Recent Pictures", "{date}")
archive_fmt = "r_{index}"
pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/pictures/recent/(\d+-\d+-\d+)(?:/page/(\d+))?"]
test = [("http://www.hentai-foundry.com/pictures/recent/2018-09-20", None)]
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/pictures/recent/(\d+-\d+-\d+)(?:/page/(\d+))?")
test = ("http://www.hentai-foundry.com/pictures/recent/2018-09-20",)
def __init__(self, match):
HentaifoundryExtractor.__init__(self, "", match.group(2))
@ -211,11 +211,11 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
class HentaifoundryPopularExtractor(HentaifoundryExtractor):
"""Extractor for popular images on hentaifoundry.com"""
subcategory = "popular"
directory_fmt = ["{category}", "Popular Pictures"]
directory_fmt = ("{category}", "Popular Pictures")
archive_fmt = "p_{index}"
pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/pictures/popular(?:/page/(\d+))?"]
test = [("http://www.hentai-foundry.com/pictures/popular", None)]
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/pictures/popular(?:/page/(\d+))?")
test = ("http://www.hentai-foundry.com/pictures/popular",)
def __init__(self, match):
HentaifoundryExtractor.__init__(self, "", match.group(1))
@ -225,9 +225,9 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
class HentaifoundryImageExtractor(HentaifoundryExtractor):
"""Extractor for a single image from hentaifoundry.com"""
subcategory = "image"
pattern = [r"(?:https?://)?(?:www\.|pictures\.)?hentai-foundry\.com"
r"/(?:pictures/user|[^/])/([^/]+)/(\d+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.|pictures\.)?hentai-foundry\.com"
r"/(?:pictures/user|[^/])/([^/]+)/(\d+)")
test = (
(("https://www.hentai-foundry.com"
"/pictures/user/Tenpura/407501/shimakaze"), {
"url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
@ -237,9 +237,9 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
("https://www.hentai-foundry.com/pictures/user/Tenpura/340853/", {
"exception": exception.HttpError,
}),
(("https://pictures.hentai-foundry.com"
"/t/Tenpura/407501/Tenpura-407501-shimakaze.png"), None),
]
("https://pictures.hentai-foundry.com"
"/t/Tenpura/407501/Tenpura-407501-shimakaze.png"),
)
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match.group(1))

@ -17,14 +17,14 @@ class HentaifoxGalleryExtractor(ChapterExtractor):
category = "hentaifox"
subcategory = "gallery"
filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
directory_fmt = ("{category}", "{gallery_id} {title}")
archive_fmt = "{gallery_id}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?hentaifox\.com/gallery/(\d+)"]
test = [("https://hentaifox.com/gallery/56622/", {
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com/gallery/(\d+)"
test = ("https://hentaifox.com/gallery/56622/", {
"pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
"count": 24,
"keyword": "80fc0fb5db9626fffb078dd2e4f9aff4a9348686",
})]
})
root = "https://hentaifox.com"
def __init__(self, match):
@ -62,16 +62,15 @@ class HentaifoxSearchExtractor(Extractor):
"""Extractor for search results and listings on hentaifox.com"""
category = "hentaifox"
subcategory = "search"
pattern = [r"(?:https?://)?(?:www\.)?hentaifox\.com"
r"(/(?:parody|tag|artist|character|search)/[^/?%#]+)"]
test = [
("https://hentaifox.com/parody/touhou-project/", None),
("https://hentaifox.com/tag/full-color/", None),
("https://hentaifox.com/character/reimu-hakurei/", None),
("https://hentaifox.com/artist/distance/", None),
("https://hentaifox.com/search/touhou/", None,),
pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com"
r"(/(?:parody|tag|artist|character|search)/[^/?%#]+)")
test = (
("https://hentaifox.com/parody/touhou-project/"),
("https://hentaifox.com/character/reimu-hakurei/"),
("https://hentaifox.com/artist/distance/"),
("https://hentaifox.com/search/touhou/"),
("https://hentaifox.com/tag/full-colour/", {
"pattern": HentaifoxGalleryExtractor.pattern[0],
"pattern": HentaifoxGalleryExtractor.pattern,
"count": ">= 40",
"keyword": {
"url": str,
@ -81,7 +80,7 @@ class HentaifoxSearchExtractor(Extractor):
"tags": list,
},
}),
]
)
root = "https://hentaifox.com"
def __init__(self, match):

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -10,16 +10,16 @@
from .common import ChapterExtractor, MangaExtractor
from .. import text
import re
import json
import re
class HentaihereMangaExtractor(MangaExtractor):
"""Extractor for hmanga from hentaihere.com"""
category = "hentaihere"
pattern = [r"(?:https?://)?(?:www\.)?(hentaihere\.com/m/S\d+)/?$"]
pattern = r"(?:https?://)?(?:www\.)?(hentaihere\.com/m/S\d+)/?$"
scheme = "https"
test = [
test = (
("https://hentaihere.com/m/S13812", {
"url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559",
"keyword": "13c1ce7e15cbb941f01c843b0e89adc993d939ac",
@ -28,7 +28,7 @@ class HentaihereMangaExtractor(MangaExtractor):
"url": "6c5239758dc93f6b1b4175922836c10391b174f7",
"keyword": "675c7b7a4fa52cf569c283553bd16b4200a5cd36",
}),
]
)
def chapters(self, page):
results = []
@ -60,11 +60,11 @@ class HentaihereChapterExtractor(ChapterExtractor):
"""Extractor for a single manga chapter from hentaihere.com"""
category = "hentaihere"
archive_fmt = "{chapter_id}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"]
test = [("https://hentaihere.com/m/S13812/1/1/", {
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"
test = ("https://hentaihere.com/m/S13812/1/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
"keyword": "e9382a9be337abce3db2b1132e85751379dc05c5",
})]
})
def __init__(self, match):
self.manga_id, self.chapter = match.groups()

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,11 +17,11 @@ class HitomiGalleryExtractor(ChapterExtractor):
"""Extractor for image galleries from hitomi.la"""
category = "hitomi"
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery_id} {title}"]
directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = "{category}_{gallery_id}_{page:>03}_{name}.{extension}"
archive_fmt = "{gallery_id}_{page}"
pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)"]
test = [
pattern = r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)"
test = (
("https://hitomi.la/galleries/867789.html", {
"url": "cb759868d090fe0e2655c3e29ebf146054322b6d",
"keyword": "85e453d01ee7f137669e75a764ccdc65ca092ad2",
@ -30,8 +30,8 @@ class HitomiGalleryExtractor(ChapterExtractor):
# "aa" subdomain for gallery-id ending in 1 (#142)
"pattern": r"https://aa\.hitomi\.la/",
}),
("https://hitomi.la/reader/867789.html", None),
]
("https://hitomi.la/reader/867789.html"),
)
def __init__(self, match):
self.gid = text.parse_int(match.group(1))

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -21,32 +21,32 @@ class IdolcomplexExtractor(sankaku.SankakuExtractor):
class IdolcomplexTagExtractor(IdolcomplexExtractor,
sankaku.SankakuTagExtractor):
"""Extractor for images from idol.sankakucomplex.com by search-tags"""
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"]
test = [
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
test = (
("https://idol.sankakucomplex.com/?tags=lyumos+wreath", {
"count": ">= 6",
"pattern": (r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+"),
"pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
}),
(("https://idol.sankakucomplex.com/"
"?tags=lyumos+wreath&page=3&next=694215"), None),
]
("https://idol.sankakucomplex.com"
"/?tags=lyumos+wreath&page=3&next=694215"),
)
class IdolcomplexPoolExtractor(IdolcomplexExtractor,
sankaku.SankakuPoolExtractor):
"""Extractor for image-pools from idol.sankakucomplex.com"""
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/pool/show/(\d+)"]
test = [("https://idol.sankakucomplex.com/pool/show/145", {
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pool/show/(\d+)"
test = ("https://idol.sankakucomplex.com/pool/show/145", {
"count": 3,
})]
})
class IdolcomplexPostExtractor(IdolcomplexExtractor,
sankaku.SankakuPostExtractor):
"""Extractor for single images from idol.sankakucomplex.com"""
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"]
test = [("https://idol.sankakucomplex.com/post/show/694215", {
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"
test = ("https://idol.sankakucomplex.com/post/show/694215", {
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"options": (("tags", True),),
"keyword": {
@ -56,4 +56,4 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor,
"tags_medium": str,
"tags_general": str,
},
})]
})

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2018 Mike Fährmann
# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -30,11 +30,11 @@ class ImagebamExtractor(Extractor):
class ImagebamGalleryExtractor(ImagebamExtractor):
"""Extractor for image galleries from imagebam.com"""
subcategory = "gallery"
directory_fmt = ["{category}", "{title} - {gallery_key}"]
directory_fmt = ("{category}", "{title} - {gallery_key}")
filename_fmt = "{num:>03}-{image_key}.{extension}"
archive_fmt = "{gallery_key}_{image_key}"
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)"]
test = [
pattern = r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)"
test = (
("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
"url": "fb01925129a1ff1941762eaa3a2783a66de6847f",
"keyword": "9e25b8827474ac93c54855e798d60aa3cbecbd7a",
@ -43,7 +43,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
"exception": exception.NotFoundError,
}),
]
)
def __init__(self, match):
ImagebamExtractor.__init__(self)
@ -87,16 +87,16 @@ class ImagebamImageExtractor(ImagebamExtractor):
subcategory = "image"
filename_fmt = "{image_key}.{extension}"
archive_fmt = "{image_key}"
pattern = [r"(?:https?://)?(?:\w+\.)?imagebam\.com"
r"/(?:image/|(?:[0-9a-f]{2}/){3})([0-9a-f]+)"]
test = [
pattern = (r"(?:https?://)?(?:\w+\.)?imagebam\.com"
r"/(?:image/|(?:[0-9a-f]{2}/){3})([0-9a-f]+)")
test = (
("http://www.imagebam.com/image/94d56c502511890", {
"url": "b384893c35a01a09c58018db71ddc4cf2480be95",
"keyword": "4263d4840007524129792b8587a562b5d20c2687",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
("http://images3.imagebam.com/1d/8c/44/94d56c502511890.png", None),
]
("http://images3.imagebam.com/1d/8c/44/94d56c502511890.png"),
)
def __init__(self, match):
ImagebamExtractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,7 +16,7 @@ import json
class ImagefapExtractor(Extractor):
"""Base class for imagefap extractors"""
category = "imagefap"
directory_fmt = ["{category}", "{gallery_id} {title}"]
directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = "{category}_{gallery_id}_{name}.{extension}"
archive_fmt = "{gallery_id}_{image_id}"
root = "https://www.imagefap.com"
@ -25,9 +25,9 @@ class ImagefapExtractor(Extractor):
class ImagefapGalleryExtractor(ImagefapExtractor):
"""Extractor for image galleries from imagefap.com"""
subcategory = "gallery"
pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com/"
r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)")]
test = [
pattern = (r"(?:https?://)?(?:www\.)?imagefap\.com/"
r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)")
test = (
("https://www.imagefap.com/pictures/7102714", {
"url": "268995eac5d01ddecd0fe58cfa9828390dc85a84",
"keyword": "3b90205f434bd1e0461bdbd5d2d9c34056b50fe6",
@ -37,8 +37,8 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
"url": "14906b4f0b8053d1d69bc730a325acb793cbc898",
"keyword": "66ccb98b69cb52f89540224260641002f41f6ece",
}),
("https://www.imagefap.com/gallery.php?gid=7102714", None),
]
("https://www.imagefap.com/gallery.php?gid=7102714"),
)
def __init__(self, match):
ImagefapExtractor.__init__(self)
@ -94,11 +94,11 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
class ImagefapImageExtractor(ImagefapExtractor):
"""Extractor for single images from imagefap.com"""
subcategory = "image"
pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)"]
test = [("https://www.imagefap.com/photo/1369341772/", {
pattern = r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)"
test = ("https://www.imagefap.com/photo/1369341772/", {
"url": "b31ee405b61ff0450020a1bf11c0581ca9adb471",
"keyword": "b49940c04ed30bfc1c28ec39eb08b3be5753ce8a",
})]
})
def __init__(self, match):
ImagefapExtractor.__init__(self)
@ -133,18 +133,18 @@ class ImagefapUserExtractor(ImagefapExtractor):
"""Extractor for all galleries from a user at imagefap.com"""
subcategory = "user"
categorytransfer = True
pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/"
pattern = (r"(?:https?://)?(?:www\.)?imagefap\.com/"
r"(?:profile(?:\.php\?user=|/)([^/?&#]+)"
r"|usergallery\.php\?userid=(\d+))"]
test = [
r"|usergallery\.php\?userid=(\d+))")
test = (
("https://www.imagefap.com/profile/LucyRae/galleries", {
"url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd",
}),
("https://www.imagefap.com/usergallery.php?userid=1862791", {
"url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd",
}),
("https://www.imagefap.com/profile.php?user=LucyRae", None),
]
("https://www.imagefap.com/profile.php?user=LucyRae"),
)
def __init__(self, match):
ImagefapExtractor.__init__(self)

@ -70,8 +70,8 @@ class ImagehostImageExtractor(SharedConfigMixin, Extractor):
class ImxtoImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imx.to"""
category = "imxto"
pattern = [r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
r"/(?:i/|img-)(\w+)(\.html)?)"]
pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
r"/(?:i/|img-)(\w+)(\.html)?)")
test = (
("https://imx.to/i/1qdeva", { # new-style URL
"url": "ab2173088a6cdef631d7a47dec4a5da1c6a00130",
@ -115,7 +115,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
class AcidimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from acidimg.cc"""
category = "acidimg"
pattern = [r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)"]
pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)"
test = ("https://acidimg.cc/img-5acb6b9de4640.html", {
"url": "f132a630006e8d84f52d59555191ed82b3b64c04",
"keyword": "183098c59d9244650f666b6cb4df96d76d2aeae8",
@ -135,8 +135,8 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
class ImagevenueImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imagevenue.com"""
category = "imagevenue"
pattern = [r"(?:https?://)?(img\d+\.imagevenue\.com"
r"/img\.php\?image=(?:[a-z]+_)?(\d+)_[^&#]+)"]
pattern = (r"(?:https?://)?(img\d+\.imagevenue\.com"
r"/img\.php\?image=(?:[a-z]+_)?(\d+)_[^&#]+)")
test = (("http://img28116.imagevenue.com/img.php"
"?image=th_52709_test_122_64lo.jpg"), {
"url": "46812995d557f2c6adf0ebd0e631e6e4e45facde",
@ -152,7 +152,7 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
class ImagetwistImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imagetwist.com"""
category = "imagetwist"
pattern = [r"(?:https?://)?((?:www\.)?imagetwist\.com/([a-z0-9]{12}))"]
pattern = r"(?:https?://)?((?:www\.)?imagetwist\.com/([a-z0-9]{12}))"
test = ("https://imagetwist.com/4e46hv31tu0q/test.jpg", {
"url": "c999dc1a5dec0525ac9eb8c092f173dfe6dba0b0",
"keyword": "30dd34dcb06b5b51c6cfff199c610b24edb7b9bc",
@ -175,7 +175,7 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
class ImgspiceImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imgspice.com"""
category = "imgspice"
pattern = [r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?&#]+))"]
pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?&#]+))"
test = ("https://imgspice.com/zop38mvvq29u/", {
"url": "a45833733c02b64d105363ffd8fd19f06992a2f7",
})
@ -191,8 +191,8 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
class PixhostImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from pixhost.to"""
category = "pixhost"
pattern = [r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
r"/show/\d+/(\d+)_[^/?&#]+)"]
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
r"/show/\d+/(\d+)_[^/?&#]+)")
test = ("https://pixhost.to/show/224/96246707_test-.png", {
"url": "8f3d41fdd2dbec4c844e5ee45bf49961fbd79c67",
"keyword": "d7b19630acf8da39036581d3d5597f97da883626",
@ -211,8 +211,8 @@ class PixhostImageExtractor(ImagehostImageExtractor):
class PostimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from postimages.org"""
category = "postimg"
pattern = [r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
r"/(?:image/)?([^/?&#]+)/?)"]
pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
r"/(?:image/)?([^/?&#]+)/?)")
test = ("https://postimg.cc/Wtn2b3hC", {
"url": "0794cfda9b8951a8ac3aa692472484200254ab86",
"keyword": "dd8822e7d359c33dba85280fe31bea7d098cd1d1",
@ -230,8 +230,8 @@ class PostimgImageExtractor(ImagehostImageExtractor):
class TurboimagehostImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from turboimagehost.com"""
category = "turboimagehost"
pattern = [r"(?:https?://)?((?:www\.)?turboimagehost\.com"
r"/p/(\d+)/[^/?&#]+\.html)"]
pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com"
r"/p/(\d+)/[^/?&#]+\.html)")
test = ("https://www.turboimagehost.com/p/39078423/test--.png.html", {
"url": "b94de43612318771ced924cb5085976f13b3b90e",
"keyword": "c1391465dc7b590b0eb8ea2a8cd235733c6fce2b",

@ -61,11 +61,11 @@ class ImgboxExtractor(Extractor):
class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
"""Extractor for image galleries from imgbox.com"""
subcategory = "gallery"
directory_fmt = ["{category}", "{title} - {gallery_key}"]
directory_fmt = ("{category}", "{title} - {gallery_key}")
filename_fmt = "{num:>03}-{name}.{extension}"
archive_fmt = "{gallery_key}_{image_key}"
pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"]
test = [
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"
test = (
("https://imgbox.com/g/JaX5V5HX7g", {
"url": "678f0bca1251d810372326ea4f16582cafa800e4",
"keyword": "92499344257cf8c72695a8dab4ccc15ca7655c1e",
@ -78,7 +78,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
("https://imgbox.com/g/JaX5V5HX7h", {
"exception": exception.NotFoundError,
}),
]
)
def __init__(self, match):
ImgboxExtractor.__init__(self)
@ -107,8 +107,8 @@ class ImgboxImageExtractor(ImgboxExtractor):
"""Extractor for single images from imgbox.com"""
subcategory = "image"
archive_fmt = "{image_key}"
pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"]
test = [
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"
test = (
("https://imgbox.com/qHhw7lpG", {
"url": "d931f675a9b848fa7cb9077d6c2b14eb07bdb80f",
"keyword": "a7a65a05a49d9a0eae95d637019af55faad09c5e",
@ -117,7 +117,7 @@ class ImgboxImageExtractor(ImgboxExtractor):
("https://imgbox.com/qHhw7lpH", {
"exception": exception.NotFoundError,
}),
]
)
def __init__(self, match):
ImgboxExtractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,14 +16,14 @@ class ImgthGalleryExtractor(Extractor):
"""Extractor for image galleries from imgth.com"""
category = "imgth"
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery_id} {title}"]
directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
archive_fmt = "{gallery_id}_{num}"
pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
test = [("http://imgth.com/gallery/37/wallpaper-anime", {
pattern = r"(?:https?://)?imgth\.com/gallery/(\d+)"
test = ("http://imgth.com/gallery/37/wallpaper-anime", {
"url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
"keyword": "e62d14f20ded393d28c2789fcc34ea2c30bc6a7c",
})]
})
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -53,9 +53,9 @@ class ImgurImageExtractor(ImgurExtractor):
subcategory = "image"
filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
archive_fmt = "{hash}"
pattern = [r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com"
r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?"]
test = [
pattern = (r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com"
r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?")
test = (
("https://imgur.com/21yMxCS", {
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
@ -92,14 +92,14 @@ class ImgurImageExtractor(ImgurExtractor):
("https://imgur.com/zzzzzzz", { # not found
"exception": exception.NotFoundError,
}),
("https://www.imgur.com/21yMxCS", None), # www
("https://m.imgur.com/21yMxCS", None), # mobile
("https://imgur.com/zxaY6", None), # 5 character key
("https://i.imgur.com/21yMxCS.png", None), # direct link
("https://i.imgur.com/21yMxCSh.png", None), # direct link thumbnail
("https://i.imgur.com/zxaY6.gif", None), # direct link (short)
("https://i.imgur.com/zxaY6s.gif", None), # direct link (short; thumb)
]
("https://www.imgur.com/21yMxCS"), # www
("https://m.imgur.com/21yMxCS"), # mobile
("https://imgur.com/zxaY6"), # 5 character key
("https://i.imgur.com/21yMxCS.png"), # direct link
("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
("https://i.imgur.com/zxaY6.gif"), # direct link (short)
("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
)
def items(self):
image = self._get_data(self.item_id)
@ -113,12 +113,12 @@ class ImgurImageExtractor(ImgurExtractor):
class ImgurAlbumExtractor(ImgurExtractor):
"""Extractor for image albums from imgur.com"""
subcategory = "album"
directory_fmt = ["{category}", "{album[hash]}{album[title]:? - //}"]
directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}")
filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
archive_fmt = "{album[hash]}_{hash}"
pattern = [r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/(?:a|gallery|t/unmuted)/(\w{7}|\w{5})"]
test = [
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/(?:a|gallery|t/unmuted)/(\w{7}|\w{5})")
test = (
("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
"keyword": {
@ -160,9 +160,9 @@ class ImgurAlbumExtractor(ImgurExtractor):
("https://imgur.com/a/TcBmQ", {
"exception": exception.NotFoundError,
}),
("https://www.imgur.com/a/TcBmP", None), # www
("https://m.imgur.com/a/TcBmP", None), # mobile
]
("https://www.imgur.com/a/TcBmP"), # www
("https://m.imgur.com/a/TcBmP"), # mobile
)
def items(self):
album = self._get_data("a/" + self.item_id + "/all")

@ -17,7 +17,7 @@ from .. import text
class InstagramExtractor(Extractor):
"""Base class for instagram extractors"""
category = "instagram"
directory_fmt = ["{category}", "{username}"]
directory_fmt = ("{category}", "{username}")
filename_fmt = "{media_id}.{extension}"
archive_fmt = "{media_id}"
root = "https://www.instagram.com"
@ -111,8 +111,8 @@ class InstagramExtractor(Extractor):
class InstagramImageExtractor(InstagramExtractor):
"""Extractor for PostPage"""
subcategory = "image"
pattern = [r"(?:https?://)?(?:www\.)?instagram\.com/p/([^/]+)/?"]
test = [
pattern = r"(?:https?://)?(?:www\.)?instagram\.com/p/([^/]+)/?"
test = (
# GraphImage
("https://www.instagram.com/p/BqvsDleB3lV/", {
"pattern": r"https://[^/]+\.(cdninstagram\.com|fbcdn\.net)"
@ -161,7 +161,7 @@ class InstagramImageExtractor(InstagramExtractor):
"width": int,
}
}),
]
)
def __init__(self, match):
InstagramExtractor.__init__(self)
@ -175,13 +175,11 @@ class InstagramImageExtractor(InstagramExtractor):
class InstagramUserExtractor(InstagramExtractor):
"""Extractor for ProfilePage"""
subcategory = "user"
pattern = [r"(?:https?://)?(?:www\.)?instagram\.com/(?!p/)([^/?&#]+)"]
test = [
("https://www.instagram.com/instagram/", {
"range": "1-12",
"count": ">= 12",
}),
]
pattern = r"(?:https?://)?(?:www\.)?instagram\.com/(?!p/)([^/?&#]+)"
test = ("https://www.instagram.com/instagram/", {
"range": "1-12",
"count": ">= 12",
})
def __init__(self, match):
InstagramExtractor.__init__(self)

@ -16,17 +16,17 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
"""Extractor for soundtracks from khinsider.com"""
category = "khinsider"
subcategory = "soundtrack"
directory_fmt = ["{category}", "{album}"]
directory_fmt = ("{category}", "{album}")
archive_fmt = "{album}_{name}.{extension}"
pattern = [r"(?:https?://)?downloads\.khinsider\.com/"
r"game-soundtracks/album/([^/?&#]+)"]
test = [(("https://downloads.khinsider.com/game-soundtracks/"
"album/horizon-riders-wii"), {
"pattern": (r"https?://\d+\.\d+\.\d+\.\d+/ost/horizon-riders-wii/[^/]+"
r"/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack\.mp3"),
pattern = (r"(?:https?://)?downloads\.khinsider\.com"
r"/game-soundtracks/album/([^/?&#]+)")
test = (("https://downloads.khinsider.com"
"/game-soundtracks/album/horizon-riders-wii"), {
"pattern": r"https?://\d+\.\d+\.\d+\.\d+/ost/horizon-riders-wii/[^/]+"
r"/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack\.mp3",
"count": 1,
"keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68",
})]
})
root = "https://downloads.khinsider.com"
def __init__(self, match):

@ -69,15 +69,15 @@ class KissmangaBase():
class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
"""Extractor for manga from kissmanga.com"""
pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
r"(/Manga/[^/?&#]+/?)$"]
test = [
pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
r"(/Manga/[^/?&#]+/?)$")
test = (
("https://kissmanga.com/Manga/Dropout", {
"url": "9e3a6f715b229aa3fafa42a1d5da5d65614cb532",
"keyword": "32b09711c28b481845acc32e3bb6054cfc90224d",
}),
("https://kissmanga.com/manga/feng-shen-ji", None),
]
("https://kissmanga.com/manga/feng-shen-ji"), # lowercase
)
def __init__(self, match):
MangaExtractor.__init__(self, match, self.root + match.group(1))
@ -104,9 +104,9 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
"""Extractor for manga-chapters from kissmanga.com"""
pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
r"(/Manga/[^/?&#]+/[^/?&#]+\?id=(\d+))"]
test = [
pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
r"(/Manga/[^/?&#]+/[^/?&#]+\?id=(\d+))")
test = (
("https://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
"url": "46e63fd63e9e16f19bc1e6c7a45dc060815642fd",
"keyword": "4a3a9341d453541de0dbfa24cd6b2e3ed39c0182",
@ -123,8 +123,8 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
"count": 49,
"keyword": "7835a19c9fc54ec4f2b345e8be3e865cfa57da5c",
}),
("https://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608", None),
]
("https://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608"),
)
def __init__(self, match):
ChapterExtractor.__init__(self, self.root + match.group(1))

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -48,19 +48,19 @@ class KomikcastBase():
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
"""Extractor for manga-chapters from komikcast.com"""
pattern = [r"(?:https?://)?(?:www\.)?komikcast\.com(/chapter/[^/?&#]+/)"]
test = [
pattern = r"(?:https?://)?(?:www\.)?komikcast\.com(/chapter/[^/?&#]+/)"
test = (
(("https://komikcast.com/chapter/"
"apotheosis-chapter-02-2-bahasa-indonesia/"), {
"url": "978d3c053d34a77f6ea6e60cbba3deda1e369be8",
"url": "2a108bf8a96753266610afef625d248f858e13f3",
"keyword": "9964a7ce7c8a518aebdccdea0e05858439c7ad92",
}),
(("https://komikcast.com/chapter/"
"tonari-no-kashiwagi-san-chapter-18b/"), {
"url": "db5594b025f9d81e4987da538b8599b8dee8851b",
"url": "aff90dd21dbb945a726778b10bdef522af7c42fe",
"keyword": "94bb85aec6654ab5af0c10419ca388fcd9c73b47",
}),
]
)
def __init__(self, match):
ChapterExtractor.__init__(self, self.root + match.group(1))
@ -90,15 +90,15 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
"""Extractor for manga from komikcast.com"""
pattern = [r"(?:https?://)?(?:www\.)?(komikcast\.com"
r"/(?:komik/)?[^/?&#]+/?)$"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?(komikcast\.com"
r"/(?:komik/)?[^/?&#]+/?)$")
test = (
("https://komikcast.com/komik/090-eko-to-issho/", {
"url": "dc798d107697d1f2309b14ca24ca9dba30c6600f",
"keyword": "3db7e23e3c108031608fbbeb9334badecd967f95",
}),
("https://komikcast.com/tonari-no-kashiwagi-san/", None),
]
("https://komikcast.com/tonari-no-kashiwagi-san/"),
)
def chapters(self, page):
results = []

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -24,33 +24,33 @@ class KonachanExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
class KonachanTagExtractor(booru.TagMixin, KonachanExtractor):
"""Extractor for images from konachan.com based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)")
test = (
("https://konachan.com/post?tags=patata", {
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
}),
("https://konachan.net/post?tags=patata", None),
]
("https://konachan.net/post?tags=patata"),
)
class KonachanPoolExtractor(booru.PoolMixin, KonachanExtractor):
"""Extractor for image-pools from konachan.com"""
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/pool/show/(?P<pool>\d+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/pool/show/(?P<pool>\d+)")
test = (
("https://konachan.com/pool/show/95", {
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
}),
("https://konachan.net/pool/show/95", None),
]
("https://konachan.net/pool/show/95"),
)
class KonachanPostExtractor(booru.PostMixin, KonachanExtractor):
"""Extractor for single images from konachan.com"""
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/post/show/(?P<post>\d+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/post/show/(?P<post>\d+)")
test = (
("https://konachan.com/post/show/205189", {
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
"options": (("tags", True),),
@ -61,22 +61,22 @@ class KonachanPostExtractor(booru.PostMixin, KonachanExtractor):
"tags_general": str,
},
}),
("https://konachan.net/post/show/205189", None),
]
("https://konachan.net/post/show/205189"),
)
class KonachanPopularExtractor(booru.MoebooruPopularMixin, KonachanExtractor):
"""Extractor for popular images from konachan.com"""
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
pattern = (r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?"]
test = [
r"(?:\?(?P<query>[^#]*))?")
test = (
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
"count": 20,
}),
("https://konachan.com/post/popular_recent", None),
("https://konachan.net/post/popular_recent", None),
]
("https://konachan.com/post/popular_recent"),
("https://konachan.net/post/popular_recent"),
)
def __init__(self, match):
super().__init__(match)

@ -49,12 +49,12 @@ class LusciousExtractor(Extractor):
class LusciousAlbumExtractor(AsynchronousMixin, LusciousExtractor):
"""Extractor for image albums from luscious.net"""
subcategory = "album"
directory_fmt = ["{category}", "{gallery_id} {title}"]
directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
archive_fmt = "{gallery_id}_{image_id}"
pattern = [(r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
r"/(?:albums|pictures/c/[^/?&#]+/album)/([^/?&#]+_(\d+))")]
test = [
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
r"/(?:albums|pictures/c/[^/?&#]+/album)/([^/?&#]+_(\d+))")
test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
"url": "7e4984a271a1072ac6483e4228a045895aff86f3",
"keyword": "5ab53959f25a468455f79149461d26547669e50e",
@ -64,11 +64,11 @@ class LusciousAlbumExtractor(AsynchronousMixin, LusciousExtractor):
"url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c",
"keyword": "3de82f61ad4afd0f546ab5ae5bf9c5388cc9c3db",
}),
("https://www.luscious.net/albums/okinami_277031/", None),
("https://members.luscious.net/albums/okinami_277031/", None),
("https://www.luscious.net/albums/okinami_277031/"),
("https://members.luscious.net/albums/okinami_277031/"),
("https://luscious.net/pictures/c/video_game_manga/album"
"/okinami-no-koigokoro_277031/sorted/position/id/16528978/@_1", None),
]
"/okinami-no-koigokoro_277031/sorted/position/id/16528978/@_1"),
)
def __init__(self, match):
LusciousExtractor.__init__(self)
@ -147,17 +147,17 @@ class LusciousAlbumExtractor(AsynchronousMixin, LusciousExtractor):
class LusciousSearchExtractor(LusciousExtractor):
"""Extractor for album searches on luscious.net"""
subcategory = "search"
pattern = [(r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
r"/((?:albums|c)(?:/(?![^/?&#]+_\d+)[^/?&#]+)+)")]
test = [
("https://luscious.net/c/hentai/", None),
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
r"/((?:albums|c)(?:/(?![^/?&#]+_\d+)[^/?&#]+)+)")
test = (
("https://luscious.net/c/hentai/"),
("https://luscious.net/albums/t2/2/c/hentai/sorted/updated"
"/tagged/+full_color/page/2/", {
"pattern": r"https://(members\.)?luscious.net/albums/[^_]+_\d+/",
"range": "20-40",
"count": 21,
}),
]
)
def __init__(self, match):
LusciousExtractor.__init__(self)

@ -41,14 +41,14 @@ class MangadexExtractor(Extractor):
class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter"
directory_fmt = [
directory_fmt = (
"{category}", "{manga}",
"{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}"]
"{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{chapter_id}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
test = [
pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"
test = (
("https://mangadex.org/chapter/122094", {
"keyword": "7bd7f82ab9d3f06976c4b68afe78d0040851ac3c",
"content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
@ -58,7 +58,7 @@ class MangadexChapterExtractor(MangadexExtractor):
"count": 64,
"keyword": "435e157dc5529d152458ba751ffe5bfbaf4850fb",
}),
]
)
def __init__(self, match):
MangadexExtractor.__init__(self)
@ -110,9 +110,9 @@ class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org"""
subcategory = "manga"
categorytransfer = True
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)"
r"/(?:title|manga)/(\d+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)"
r"/(?:title|manga)/(\d+)")
test = (
("https://mangadex.org/manga/2946/souten-no-koumori", {
"pattern": r"https://mangadex.org/chapter/\d+",
"keywords": {
@ -135,8 +135,8 @@ class MangadexMangaExtractor(MangadexExtractor):
("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", {
"count": 0,
}),
("https://mangadex.org/title/2946/souten-no-koumori", None),
]
("https://mangadex.org/title/2946/souten-no-koumori"),
)
def __init__(self, match):
MangadexExtractor.__init__(self)

@ -15,15 +15,15 @@ from .. import text
class MangafoxChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from fanfox.net"""
category = "mangafox"
pattern = [(r"(?:https?://)?(?:www\.|m\.)?(?:mangafox\.me|fanfox\.net)"
r"(/manga/[^/]+/((?:v(\d+)/)?c(\d+)([^/?&#]*)))")]
test = [
pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:mangafox\.me|fanfox\.net)"
r"(/manga/[^/]+/((?:v(\d+)/)?c(\d+)([^/?&#]*)))")
test = (
("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
"keyword": "36b570e9ef11b4748407324fe08bebbe4856e6fd",
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
}),
("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/", None),
]
("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/"),
)
root = "https://m.fanfox.net"
def __init__(self, match):

@ -23,9 +23,9 @@ class MangahereBase():
class MangahereMangaExtractor(MangahereBase, MangaExtractor):
"""Extractor for manga from mangahere.cc"""
pattern = [r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
r"([^/]+)/?(?:#.*)?$"]
test = [
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]"
r"/manga/([^/]+)/?(?:#.*)?$")
test = (
("https://www.mangahere.cc/manga/aria/", {
"url": "e8971b1605d9888d978ebb2895adb1c7c37d663c",
"keyword": "951eef36a3775525a31ca78c9d9cea546f4cf2f5",
@ -34,9 +34,9 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
"url": "6df27c0e105d9ee0b78a7aa77340d0891e6c7fc6",
"keyword": "9542283639bd082fabf3a14b6695697d3ef15111",
}),
("http://www.mangahere.co/manga/aria/", None),
("http://m.mangahere.co/manga/aria/", None),
]
("http://www.mangahere.co/manga/aria/"),
("http://m.mangahere.co/manga/aria/"),
)
def __init__(self, match):
url = "{}/manga/{}/".format(self.root, match.group(1))
@ -68,16 +68,16 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
"""Extractor for manga-chapters from mangahere.cc"""
pattern = [(r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
r"([^/]+(?:/v0*(\d+))?/c([^/?&#]+))")]
test = [
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
r"([^/]+(?:/v0*(\d+))?/c([^/?&#]+))")
test = (
("https://www.mangahere.cc/manga/dongguo_xiaojie/c004.2/", {
"keyword": "0e1cee6dd377da02ad51aa810ba65db3e811aef9",
"content": "708d475f06893b88549cbd30df1e3f9428f2c884",
}),
("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", None),
("http://m.mangahere.co/manga/dongguo_xiaojie/c003.2/", None),
]
("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
("http://m.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
)
def __init__(self, match):
self.part, self.volume, self.chapter = match.groups()

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -19,17 +19,17 @@ class MangapandaBase():
class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
"""Extractor for manga from mangapanda.com"""
pattern = [r"(?:https?://)?((?:www\.)?mangapanda\.com/[^/?&#]+)/?$"]
test = [("https://www.mangapanda.com/mushishi", {
pattern = r"(?:https?://)?((?:www\.)?mangapanda\.com/[^/?&#]+)/?$"
test = ("https://www.mangapanda.com/mushishi", {
"url": "357f965732371cac1990fee8b480f62e29141a42",
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
})]
})
class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
"""Extractor for manga-chapters from mangapanda.com"""
pattern = [r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"]
test = [("https://www.mangapanda.com/red-storm/2", {
pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"
test = ("https://www.mangapanda.com/red-storm/2", {
"url": "1f633f776e950531ba9b1e81965316458e785261",
"keyword": "32b5e84017c2bf5f122b339ecf40899e41f18cc9",
})]
})

@ -40,16 +40,16 @@ class MangaparkBase():
class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
"""Extractor for manga from mangapark.me"""
pattern = [r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
r"(/manga/[^/?&#]+)/?$"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
r"(/manga/[^/?&#]+)/?$")
test = (
("https://mangapark.me/manga/aria", {
"url": "a58be23ef3874fe9705b0b41dd462b67eaaafd9a",
"keyword": "b3b5a30aa2a326bc0ca8b74c65b5ecd4bf676ebf",
}),
("https://mangapark.net/manga/aria", None),
("https://mangapark.com/manga/aria", None),
]
("https://mangapark.net/manga/aria"),
("https://mangapark.com/manga/aria"),
)
def __init__(self, match):
self.root = self.root_fmt.format(match.group(1))
@ -79,9 +79,9 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.me"""
pattern = [(r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
r"/manga/([^?&#]+/i\d+)")]
test = [
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
r"/manga/([^?&#]+/i\d+)")
test = (
("https://mangapark.me/manga/gosu/i811615/c55/1", {
"count": 50,
"keyword": "a18e07119b3317d7e795ef37ee69ce0bbb806350",
@ -95,9 +95,9 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"count": 15,
"keyword": "3abb13e6d1ea7f8808b0ec415270b3afac97f98b",
}),
("https://mangapark.net/manga/gosu/i811615/c55/1", None),
("https://mangapark.com/manga/gosu/i811615/c55/1", None),
]
("https://mangapark.net/manga/gosu/i811615/c55/1"),
("https://mangapark.com/manga/gosu/i811615/c55/1"),
)
def __init__(self, match):
tld, self.path = match.groups()

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -34,12 +34,12 @@ class MangareaderBase():
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
"""Extractor for manga from mangareader.net"""
pattern = [r"(?:https?://)?((?:www\.)?mangareader\.net/[^/?&#]+)/?$"]
pattern = r"(?:https?://)?((?:www\.)?mangareader\.net/[^/?&#]+)/?$"
reverse = False
test = [("https://www.mangareader.net/mushishi", {
test = ("https://www.mangareader.net/mushishi", {
"url": "bc203b858b4ad76e5d77e39118a7be0350e357da",
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
})]
})
def chapters(self, page):
results = []
@ -60,12 +60,12 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
"""Extractor for manga-chapters from mangareader.net"""
archive_fmt = "{manga}_{chapter}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"]
test = [(("https://www.mangareader.net/"
"karate-shoukoushi-kohinata-minoru/11"), {
pattern = r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"
test = (("https://www.mangareader.net"
"/karate-shoukoushi-kohinata-minoru/11"), {
"url": "061cc92a07edf17bb991ce0821fa4c77a147a860",
"keyword": "2893cfcd1916859fb498f3345f1929f868fe667f",
})]
})
def __init__(self, match):
self.part, self.url_title, self.chapter = match.groups()

@ -16,12 +16,12 @@ class MangastreamChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from mangastream.com"""
category = "mangastream"
archive_fmt = "{chapter_id}_{page}"
pattern = [(r"(?:https?://)?(?:www\.)?(?:readms\.net|mangastream\.com)/"
r"r(?:ead)?/([^/]*/([^/]+)/(\d+))")]
test = [
("https://readms.net/r/onepunch_man/087/4874/1", None),
("https://mangastream.com/r/onepunch_man/087/4874/1", None),
]
pattern = (r"(?:https?://)?(?:www\.)?(?:readms\.net|mangastream\.com)/"
r"r(?:ead)?/([^/]*/([^/]+)/(\d+))")
test = (
("https://readms.net/r/onepunch_man/087/4874/1"),
("https://mangastream.com/r/onepunch_man/087/4874/1"),
)
root = "https://readms.net"
def __init__(self, match):

@ -16,7 +16,7 @@ import re
class MastodonExtractor(Extractor):
"""Base class for mastodon extractors"""
basecategory = "mastodon"
directory_fmt = ["mastodon", "{instance}", "{account[username]}"]
directory_fmt = ("mastodon", "{instance}", "{account[username]}")
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
archive_fmt = "{media[id]}"
instance = None
@ -168,8 +168,8 @@ def generate_extractors():
Extr.__doc__ = "Extractor for all images of a user on " + instance
Extr.category = category
Extr.instance = instance
Extr.pattern = [r"(?:https?://)?" + pattern +
r"/@([^/?&#]+)(?:/media)?/?$"]
Extr.pattern = (r"(?:https?://)?" + pattern +
r"/@([^/?&#]+)(?:/media)?/?$")
Extr.root = root
Extr.access_token = token
symtable[Extr.__name__] = Extr
@ -181,7 +181,7 @@ def generate_extractors():
Extr.__doc__ = "Extractor for images from a status on " + instance
Extr.category = category
Extr.instance = instance
Extr.pattern = [r"(?:https?://)?" + pattern + r"/@[^/?&#]+/(\d+)"]
Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?&#]+/(\d+)"
Extr.root = root
Extr.access_token = token
symtable[Extr.__name__] = Extr

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -21,11 +21,11 @@ class MyportfolioGalleryExtractor(Extractor):
"""Extractor for an image gallery on www.myportfolio.com"""
category = "myportfolio"
subcategory = "gallery"
directory_fmt = ["{category}", "{user}", "{title}"]
directory_fmt = ("{category}", "{user}", "{title}")
filename_fmt = "{num:>02}.{extension}"
archive_fmt = "{user}_{name}"
pattern = [BASE_PATTERN + r"/(?!projects/?$)([^/?&#]+)"]
test = [
pattern = BASE_PATTERN + r"/(?!projects/?$)([^/?&#]+)"
test = (
("https://hannahcosgrove.myportfolio.com/chloe", {
"url": "d5cf993a05439a9d8a99590aa61e14e5ac8d0cd0",
"keyword": "cdb9ca8bdc16efa6ce04aba384f7932d1610b22f",
@ -33,7 +33,7 @@ class MyportfolioGalleryExtractor(Extractor):
("myportfolio:https://tooco.com.ar/6-of-diamonds-paradise-bird", {
"count": 3,
}),
]
)
def __init__(self, match):
Extractor.__init__(self)
@ -86,8 +86,8 @@ class MyportfolioUserExtractor(Extractor):
"""Extractor for a user's galleries on www.myportfolio.com"""
category = "myportfolio"
subcategory = "user"
pattern = [BASE_PATTERN + r"/?$"]
test = [
pattern = BASE_PATTERN + r"/?$"
test = (
("https://hannahcosgrove.myportfolio.com/", {
"pattern": r"https://hannahcosgrove\.myportfolio\.com/[^/?&#+]+$",
"count": ">= 23",
@ -95,7 +95,7 @@ class MyportfolioUserExtractor(Extractor):
("myportfolio:https://tooco.com.ar/", {
"count": ">= 40",
}),
]
)
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,7 +16,7 @@ import json
class NewgroundsExtractor(Extractor):
"""Base class for newgrounds extractors"""
category = "newgrounds"
directory_fmt = ["{category}", "{user}"]
directory_fmt = ("{category}", "{user}")
filename_fmt = "{category}_{index}_{title}.{extension}"
archive_fmt = "{index}"
@ -93,14 +93,14 @@ class NewgroundsExtractor(Extractor):
class NewgroundsUserExtractor(NewgroundsExtractor):
"""Extractor for all images of a newgrounds user"""
subcategory = "user"
pattern = [r"(?:https?://)?([^.]+)\.newgrounds\.com(?:/art)?/?$"]
test = [
pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com(?:/art)?/?$"
test = (
("https://blitzwuff.newgrounds.com/art", {
"url": "24b19c4a135a09889fac7b46a74e427e4308d02b",
"keyword": "68c235e5c4ce94f2f9e001d84fe801441e5500f1",
}),
("https://blitzwuff.newgrounds.com/", None),
]
("https://blitzwuff.newgrounds.com/"),
)
def get_page_urls(self):
return self._pagination(self.root + "/art/page/1")
@ -109,10 +109,10 @@ class NewgroundsUserExtractor(NewgroundsExtractor):
class NewgroundsImageExtractor(NewgroundsExtractor):
"""Extractor for a single image from newgrounds.com"""
subcategory = "image"
pattern = [r"(?:https?://)?(?:"
pattern = (r"(?:https?://)?(?:"
r"(?:www\.)?newgrounds\.com/art/view/([^/?&#]+)/[^/?&#]+"
r"|art\.ngfiles\.com/images/\d+/\d+_([^_]+)_([^.]+))"]
test = [
r"|art\.ngfiles\.com/images/\d+/\d+_([^_]+)_([^.]+))")
test = (
("https://www.newgrounds.com/art/view/blitzwuff/ffx", {
"url": "e7778c4597a2fb74b46e5f04bb7fa1d80ca02818",
"keyword": "5738e2bf19137898204f36c5ae573826672b612c",
@ -122,7 +122,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
"url": "e7778c4597a2fb74b46e5f04bb7fa1d80ca02818",
"keyword": "5738e2bf19137898204f36c5ae573826672b612c",
}),
]
)
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
@ -141,11 +141,11 @@ class NewgroundsVideoExtractor(NewgroundsExtractor):
"""Extractor for all videos of a newgrounds user"""
subcategory = "video"
filename_fmt = "{category}_{index}.{extension}"
pattern = [r"(?:https?://)?([^.]+)\.newgrounds\.com/movies/?$"]
test = [("https://twistedgrim.newgrounds.com/movies", {
pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/movies/?$"
test = ("https://twistedgrim.newgrounds.com/movies", {
"pattern": r"ytdl:https?://www\.newgrounds\.com/portal/view/\d+",
"count": ">= 29",
})]
})
def get_page_urls(self):
return self._pagination(self.root + "/movies/page/1")

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,13 +16,13 @@ class NgomikChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from ngomik.in"""
category = "ngomik"
root = "http://ngomik.in"
pattern = [r"(?:https?://)?(?:www\.)?ngomik\.in"
r"/manga/([^/?&#]+/chapter-[^/?&#]+)"]
test = [(("http://ngomik.in/manga/chuuko-demo-koi-ga-shitai"
"/chapter-21-5?style=list"), {
pattern = (r"(?:https?://)?(?:www\.)?ngomik\.in"
r"/manga/([^/?&#]+/chapter-[^/?&#]+)")
test = (("http://ngomik.in/manga/chuuko-demo-koi-ga-shitai"
"/chapter-21-5?style=list"), {
"url": "e87ed713f31d576013f179b50b4e10d7c678e53a",
"keyword": "a774caea148fc18a7d889f453dadbe3def9e0c2c",
})]
})
def __init__(self, match):
url = "{}/manga/{}?style=list".format(self.root, match.group(1))

@ -39,14 +39,14 @@ class NHentaiExtractor(Extractor):
class NhentaiGalleryExtractor(NHentaiExtractor):
"""Extractor for image galleries from nhentai.net"""
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery_id} {title}"]
directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
archive_fmt = "{gallery_id}_{num}"
pattern = [r"(?:https?://)?nhentai\.net/g/(\d+)"]
test = [("https://nhentai.net/g/147850/", {
pattern = r"(?:https?://)?nhentai\.net/g/(\d+)"
test = ("https://nhentai.net/g/147850/", {
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
"keyword": "2f94976e657f3043a89997e22f4de8e1b22d9175",
})]
})
def __init__(self, match):
NHentaiExtractor.__init__(self)
@ -78,12 +78,12 @@ class NhentaiSearchExtractor(NHentaiExtractor):
"""Extractor for nhentai search results"""
category = "nhentai"
subcategory = "search"
pattern = [r"(?:https?://)?nhentai\.net/search/?\?([^#]+)"]
test = [("https://nhentai.net/search/?q=touhou", {
"pattern": NhentaiGalleryExtractor.pattern[0],
pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)"
test = ("https://nhentai.net/search/?q=touhou", {
"pattern": NhentaiGalleryExtractor.pattern,
"count": 30,
"range": "1-30",
})]
})
def __init__(self, match):
NHentaiExtractor.__init__(self)

@ -16,7 +16,7 @@ from ..cache import cache
class NijieExtractor(AsynchronousMixin, Extractor):
"""Base class for nijie extractors"""
category = "nijie"
directory_fmt = ["{category}", "{user_id}"]
directory_fmt = ("{category}", "{user_id}")
filename_fmt = "{category}_{artist_id}_{image_id}_p{index:>02}.{extension}"
archive_fmt = "{image_id}_{index}"
cookiedomain = "nijie.info"
@ -122,9 +122,9 @@ class NijieExtractor(AsynchronousMixin, Extractor):
class NijieUserExtractor(NijieExtractor):
"""Extractor for works of a nijie-user"""
subcategory = "user"
pattern = [(r"(?:https?://)?(?:www\.)?nijie\.info"
r"/members(?:_illust)?\.php\?id=(\d+)")]
test = [
pattern = (r"(?:https?://)?(?:www\.)?nijie\.info"
r"/members(?:_illust)?\.php\?id=(\d+)")
test = (
("https://nijie.info/members_illust.php?id=44", {
"url": "585d821df4716b1098660a0be426d01db4b65f2a",
"keyword": "1eb3387196f1f30d6d74a41f4c77faaadd588e52",
@ -132,8 +132,8 @@ class NijieUserExtractor(NijieExtractor):
("https://nijie.info/members_illust.php?id=43", {
"exception": exception.NotFoundError,
}),
("https://nijie.info/members.php?id=44", None),
]
("https://nijie.info/members.php?id=44"),
)
def get_image_ids(self):
return self._pagination("members_illust")
@ -142,13 +142,11 @@ class NijieUserExtractor(NijieExtractor):
class NijieDoujinExtractor(NijieExtractor):
"""Extractor for doujin entries of a nijie-user"""
subcategory = "doujin"
pattern = [(r"(?:https?://)?(?:www\.)?nijie\.info/"
r"members_dojin\.php\?id=(\d+)")]
test = [
("https://nijie.info/members_dojin.php?id=6782", {
"count": ">= 18",
}),
]
pattern = (r"(?:https?://)?(?:www\.)?nijie\.info/"
r"members_dojin\.php\?id=(\d+)")
test = ("https://nijie.info/members_dojin.php?id=6782", {
"count": ">= 18",
})
def get_image_ids(self):
return self._pagination("members_dojin")
@ -157,15 +155,13 @@ class NijieDoujinExtractor(NijieExtractor):
class NijieFavoriteExtractor(NijieExtractor):
"""Extractor for all favorites/bookmarks of a nijie-user"""
subcategory = "favorite"
directory_fmt = ["{category}", "bookmarks", "{user_id}"]
directory_fmt = ("{category}", "bookmarks", "{user_id}")
archive_fmt = "f_{user_id}_{image_id}_{index}"
pattern = [(r"(?:https?://)?(?:www\.)?nijie\.info"
r"/user_like_illust_view\.php\?id=(\d+)")]
test = [
("https://nijie.info/user_like_illust_view.php?id=44", {
"count": ">= 16",
}),
]
pattern = (r"(?:https?://)?(?:www\.)?nijie\.info"
r"/user_like_illust_view\.php\?id=(\d+)")
test = ("https://nijie.info/user_like_illust_view.php?id=44", {
"count": ">= 16",
})
def get_image_ids(self):
return self._pagination("user_like_illust_view")
@ -174,9 +170,9 @@ class NijieFavoriteExtractor(NijieExtractor):
class NijieImageExtractor(NijieExtractor):
"""Extractor for a work/image from nijie.info"""
subcategory = "image"
pattern = [r"(?:https?://)?(?:www\.)?nijie\.info"
r"/view(?:_popup)?\.php\?id=(\d+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?nijie\.info"
r"/view(?:_popup)?\.php\?id=(\d+)")
test = (
("https://nijie.info/view.php?id=70720", {
"url": "a10d4995645b5f260821e32c60a35f73546c2699",
"keyword": "0728fc3bbef1e192abfd59f88f07921d3d336804",
@ -185,8 +181,8 @@ class NijieImageExtractor(NijieExtractor):
("https://nijie.info/view.php?id=70724", {
"exception": exception.NotFoundError,
}),
("https://nijie.info/view_popup.php?id=70720", None),
]
("https://nijie.info/view_popup.php?id=70720"),
)
def __init__(self, match):
NijieExtractor.__init__(self)

@ -162,7 +162,7 @@ class OAuthBase(Extractor):
class OAuthDeviantart(OAuthBase):
subcategory = "deviantart"
pattern = ["oauth:deviantart$"]
pattern = "oauth:deviantart$"
redirect_uri = "https://mikf.github.io/gallery-dl/oauth-redirect.html"
def items(self):
@ -181,7 +181,7 @@ class OAuthDeviantart(OAuthBase):
class OAuthFlickr(OAuthBase):
subcategory = "flickr"
pattern = ["oauth:flickr$"]
pattern = "oauth:flickr$"
def __init__(self, match):
OAuthBase.__init__(self, match)
@ -202,7 +202,7 @@ class OAuthFlickr(OAuthBase):
class OAuthReddit(OAuthBase):
subcategory = "reddit"
pattern = ["oauth:reddit$"]
pattern = "oauth:reddit$"
def items(self):
yield Message.Version, 1
@ -219,7 +219,7 @@ class OAuthReddit(OAuthBase):
class OAuthSmugmug(OAuthBase):
subcategory = "smugmug"
pattern = ["oauth:smugmug$"]
pattern = "oauth:smugmug$"
def __init__(self, match):
OAuthBase.__init__(self, match)
@ -240,7 +240,7 @@ class OAuthSmugmug(OAuthBase):
class OAuthTumblr(OAuthBase):
subcategory = "tumblr"
pattern = ["oauth:tumblr$"]
pattern = "oauth:tumblr$"
def __init__(self, match):
OAuthBase.__init__(self, match)
@ -261,7 +261,7 @@ class OAuthTumblr(OAuthBase):
class OAuthMastodon(OAuthBase):
subcategory = "mastodon"
pattern = ["oauth:mastodon:(?:https?://)?([^/?&#]+)"]
pattern = "oauth:mastodon:(?:https?://)?([^/?&#]+)"
def __init__(self, match):
OAuthBase.__init__(self, match)

@ -42,13 +42,13 @@ class PahealExtractor(SharedConfigMixin, Extractor):
class PahealTagExtractor(PahealExtractor):
"""Extractor for images from rule34.paheal.net by search-tags"""
subcategory = "tag"
directory_fmt = ["{category}", "{tags}"]
pattern = [r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/list/([^/?&#]+)"]
test = [("https://rule34.paheal.net/post/list/k-on/1", {
directory_fmt = ("{category}", "{tags}")
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/list/([^/?&#]+)")
test = ("https://rule34.paheal.net/post/list/k-on/1", {
"url": "0f5a777cea524635760de32dd85a3de5ac5f3f43",
"keyword": "fddaa6329bae5b99e8a666eeeb1cb7721d21bf6d",
})]
})
per_page = 70
def __init__(self, match):
@ -92,13 +92,13 @@ class PahealTagExtractor(PahealExtractor):
class PahealPostExtractor(PahealExtractor):
"""Extractor for single images from rule34.paheal.net"""
subcategory = "post"
pattern = [r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/view/(\d+)"]
test = [("https://rule34.paheal.net/post/view/481609", {
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/view/(\d+)")
test = ("https://rule34.paheal.net/post/view/481609", {
"url": "3aa2189c8d1fa952a4d3420def93fd2bd54d6741",
"keyword": "d7a0bd6d8b0a5bd8300857044ed2d53d481d37cf",
"content": "7b924bcf150b352ac75c9d281d061e174c851a11",
})]
})
def __init__(self, match):
PahealExtractor.__init__(self)

@ -18,35 +18,34 @@ class PhotobucketAlbumExtractor(Extractor):
"""Extractor for albums on photobucket.com"""
category = "photobucket"
subcategory = "album"
directory_fmt = ["{category}", "{username}", "{location}"]
directory_fmt = ("{category}", "{username}", "{location}")
filename_fmt = "{offset:>03}{pictureId:?_//}_{titleOrFilename}.{extension}"
archive_fmt = "{id}"
pattern = [r"(?:https?://)?((?:[^.]+\.)?photobucket\.com)"
r"/user/[^/?&#]+/library/[^?&#]*"]
test = [
pattern = (r"(?:https?://)?((?:[^.]+\.)?photobucket\.com)"
r"/user/[^/?&#]+/library/[^?&#]*")
test = (
("http://s258.photobucket.com/user/focolandia/library/", {
"pattern": r"http://i\d+.photobucket.com/albums/hh280/focolandia",
"count": ">= 39"
}),
# subalbums from main "directory"
# subalbums of main "directory"
("http://s271.photobucket.com/user/lakerfanryan/library/", {
"options": (("image-filter", "False"),),
"pattern": pattern[0],
"pattern": pattern,
"count": 1,
}),
# subalbums from subalbum without images
# subalbums of subalbum without images
("http://s271.photobucket.com/user/lakerfanryan/library/Basketball", {
"pattern": pattern[0],
"pattern": pattern,
"count": ">= 9",
}),
# private (missing JSON data)
("http://s1277.photobucket.com/user/sinisterkat44/library/", {
"count": 0,
}),
("http://s1110.photobucket.com/user/chndrmhn100/library/"
"Chandu%20is%20the%20King?sort=3&page=1", None),
]
"Chandu%20is%20the%20King?sort=3&page=1"),
)
def __init__(self, match):
Extractor.__init__(self)
@ -108,13 +107,13 @@ class PhotobucketImageExtractor(Extractor):
"""Extractor for individual images from photobucket.com"""
category = "photobucket"
subcategory = "image"
directory_fmt = ["{category}", "{username}"]
directory_fmt = ("{category}", "{username}")
filename_fmt = "{pictureId:?/_/}{titleOrFilename}.{extension}"
archive_fmt = "{username}_{id}"
pattern = [r"(?:https?://)?(?:[^.]+\.)?photobucket\.com"
pattern = (r"(?:https?://)?(?:[^.]+\.)?photobucket\.com"
r"(?:/gallery/user/([^/?&#]+)/media/([^/?&#]+)"
r"|/user/([^/?&#]+)/media/[^?&#]+\.html)"]
test = [
r"|/user/([^/?&#]+)/media/[^?&#]+\.html)")
test = (
(("http://s271.photobucket.com/user/lakerfanryan"
"/media/Untitled-3-1.jpg.html"), {
"url": "3b647deeaffc184cc48c89945f67574559c9051f",
@ -125,7 +124,7 @@ class PhotobucketImageExtractor(Extractor):
"url": "12c1890c09c9cdb8a88fba7eec13f324796a8d7b",
"keyword": "61200a223df6c06f45ac3d30c88b3f5b048ce9a8",
}),
]
)
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -15,7 +15,7 @@ from .. import text
class PiczelExtractor(Extractor):
"""Base class for piczel extractors"""
category = "piczel"
directory_fmt = ["{category}", "{user[username]}"]
directory_fmt = ("{category}", "{user[username]}")
filename_fmt = "{category}_{id}_{title}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
root = "https://piczel.tv"
@ -57,10 +57,10 @@ class PiczelExtractor(Extractor):
class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
pattern = [r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$"]
test = [("https://piczel.tv/gallery/Lulena", {
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$"
test = ("https://piczel.tv/gallery/Lulena", {
"count": ">= 13",
})]
})
def get_images(self):
url = "{}/api/users/{}/gallery".format(self.api_root, self.item_id)
@ -70,13 +70,13 @@ class PiczelUserExtractor(PiczelExtractor):
class PiczelFolderExtractor(PiczelExtractor):
"""Extractor for images inside a user's folder"""
subcategory = "folder"
directory_fmt = ["{category}", "{user[username]}", "{folder[name]}"]
directory_fmt = ("{category}", "{user[username]}", "{folder[name]}")
archive_fmt = "f{folder[id]}_{id}_{num}"
pattern = [r"(?:https?://)?(?:www\.)?piczel\.tv"
r"/gallery/(?!image)[^/?&#]+/(\d+)"]
test = [("https://piczel.tv/gallery/Lulena/1114", {
pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv"
r"/gallery/(?!image)[^/?&#]+/(\d+)")
test = ("https://piczel.tv/gallery/Lulena/1114", {
"count": ">= 4",
})]
})
def get_images(self):
url = "{}/api/gallery/folder/{}".format(self.api_root, self.item_id)
@ -88,8 +88,8 @@ class PiczelFolderExtractor(PiczelExtractor):
class PiczelImageExtractor(PiczelExtractor):
"""Extractor for individual images"""
subcategory = "image"
pattern = [r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)"]
test = [("https://piczel.tv/gallery/image/7807", {
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)"
test = ("https://piczel.tv/gallery/image/7807", {
"url": "9b9e416b6ab7e58676fab84453d5028f306ece34",
"content": "df9a053a24234474a19bce2b7e27e0dec23bff87",
"keyword": {
@ -111,7 +111,7 @@ class PiczelImageExtractor(PiczelExtractor):
"user": dict,
"views": int,
},
})]
})
def get_images(self):
url = "{}/api/gallery/image/{}".format(self.api_root, self.item_id)

@ -56,8 +56,8 @@ class PinterestExtractor(Extractor):
class PinterestPinExtractor(PinterestExtractor):
"""Extractor for images from a single pin from pinterest.com"""
subcategory = "pin"
pattern = [BASE_PATTERN + r"/pin/([^/?#&]+)(?!.*#related$)"]
test = [
pattern = BASE_PATTERN + r"/pin/([^/?#&]+)(?!.*#related$)"
test = (
("https://www.pinterest.com/pin/858146903966145189/", {
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
# image version depends on CDN server used
@ -67,7 +67,7 @@ class PinterestPinExtractor(PinterestExtractor):
("https://www.pinterest.com/pin/858146903966145188/", {
"exception": exception.NotFoundError,
}),
]
)
def __init__(self, match):
PinterestExtractor.__init__(self)
@ -85,10 +85,10 @@ class PinterestPinExtractor(PinterestExtractor):
class PinterestBoardExtractor(PinterestExtractor):
"""Extractor for images from a board from pinterest.com"""
subcategory = "board"
directory_fmt = ["{category}", "{board[owner][username]}", "{board[name]}"]
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}"
pattern = [BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)(?!.*#related$)"]
test = [
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)(?!.*#related$)"
test = (
("https://www.pinterest.com/g1952849/test-/", {
"pattern": r"https://i\.pinimg\.com/originals/",
"count": 2,
@ -96,7 +96,7 @@ class PinterestBoardExtractor(PinterestExtractor):
("https://www.pinterest.com/g1952848/test/", {
"exception": exception.GalleryDLException,
}),
]
)
def __init__(self, match):
PinterestExtractor.__init__(self)
@ -116,14 +116,12 @@ class PinterestBoardExtractor(PinterestExtractor):
class PinterestRelatedPinExtractor(PinterestPinExtractor):
"""Extractor for related pins of another pin from pinterest.com"""
subcategory = "related-pin"
directory_fmt = ["{category}", "related {original_pin[id]}"]
pattern = [BASE_PATTERN + r"/pin/([^/?#&]+).*#related$"]
test = [
("https://www.pinterest.com/pin/858146903966145189/#related", {
"range": "31-50",
"count": 20,
}),
]
directory_fmt = ("{category}", "related {original_pin[id]}")
pattern = BASE_PATTERN + r"/pin/([^/?#&]+).*#related$"
test = ("https://www.pinterest.com/pin/858146903966145189/#related", {
"range": "31-50",
"count": 20,
})
def metadata(self):
pin = self.api.pin(self.pin_id)
@ -136,15 +134,13 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor):
class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
"""Extractor for related pins of a board from pinterest.com"""
subcategory = "related-board"
directory_fmt = ["{category}", "{board[owner][username]}",
"{board[name]}", "related"]
pattern = [BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+).*#related$"]
test = [
("https://www.pinterest.com/g1952849/test-/#related", {
"range": "31-50",
"count": 20,
}),
]
directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "related")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+).*#related$"
test = ("https://www.pinterest.com/g1952849/test-/#related", {
"range": "31-50",
"count": 20,
})
def pins(self):
return self.api.board_related(self.board_id)
@ -153,16 +149,16 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
class PinterestPinitExtractor(PinterestExtractor):
"""Extractor for images from a pin.it URL"""
subcategory = "pinit"
pattern = [r"(?:https?://)?pin\.it/([^/?#&]+)"]
pattern = r"(?:https?://)?pin\.it/([^/?#&]+)"
test = [
test = (
("https://pin.it/Hvt8hgT", {
"url": "8daad8558382c68f0868bdbd17d05205184632fa",
}),
("https://pin.it/Hvt8hgS", {
"exception": exception.NotFoundError,
}),
]
)
def __init__(self, match):
PinterestExtractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2018 Mike Fährmann
# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,7 +17,7 @@ from datetime import datetime, timedelta
class PixivExtractor(Extractor):
"""Base class for pixiv extractors"""
category = "pixiv"
directory_fmt = ["{category}", "{user[id]} {user[account]}"]
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{category}_{user[id]}_{id}{num}.{extension}"
archive_fmt = "{id}{num}.{extension}"
@ -82,10 +82,10 @@ class PixivExtractor(Extractor):
class PixivUserExtractor(PixivExtractor):
"""Extractor for works of a pixiv-user"""
subcategory = "user"
pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/"
r"(?:member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))"]
test = [
r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
test = (
("http://www.pixiv.net/member_illust.php?id=173530", {
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
}),
@ -97,12 +97,12 @@ class PixivUserExtractor(PixivExtractor):
("http://www.pixiv.net/member_illust.php?id=173531", {
"exception": exception.NotFoundError,
}),
("https://www.pixiv.net/u/173530", None),
("https://www.pixiv.net/user/173530", None),
("https://www.pixiv.net/mypage.php#id=173530", None),
("https://www.pixiv.net/#id=173530", None),
("https://touch.pixiv.net/member_illust.php?id=173530", None),
]
("https://www.pixiv.net/u/173530"),
("https://www.pixiv.net/user/173530"),
("https://www.pixiv.net/mypage.php#id=173530"),
("https://www.pixiv.net/#id=173530"),
("https://touch.pixiv.net/member_illust.php?id=173530"),
)
def __init__(self, match):
PixivExtractor.__init__(self)
@ -125,15 +125,15 @@ class PixivUserExtractor(PixivExtractor):
class PixivMeExtractor(PixivExtractor):
"""Extractor for pixiv.me URLs"""
subcategory = "me"
pattern = [r"(?:https?://)?pixiv\.me/([^/?&#]+)"]
test = [
pattern = r"(?:https?://)?pixiv\.me/([^/?&#]+)"
test = (
("https://pixiv.me/del_shannon", {
"url": "0b1a18c3e3553c44ee6e0ccc36a7fd906c498e8f",
}),
("https://pixiv.me/del_shanno", {
"exception": exception.NotFoundError,
}),
]
)
def __init__(self, match):
PixivExtractor.__init__(self)
@ -152,12 +152,12 @@ class PixivMeExtractor(PixivExtractor):
class PixivWorkExtractor(PixivExtractor):
"""Extractor for a single pixiv work/illustration"""
subcategory = "work"
pattern = [r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
r"/member(?:_illust)?\.php\?(?:[^&]+&)*illust_id=(\d+)"
r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)"
r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))"]
test = [
r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))")
test = (
(("http://www.pixiv.net/member_illust.php"
"?mode=medium&illust_id=966412"), {
"url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
@ -173,14 +173,14 @@ class PixivWorkExtractor(PixivExtractor):
"url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef",
"keywords": {"frames": list},
}),
(("http://i1.pixiv.net/c/600x600/img-master/"
"img/2008/06/13/00/29/13/966412_p0_master1200.jpg"), None),
(("https://i.pximg.net/img-original/"
"img/2017/04/25/07/33/29/62568267_p0.png"), None),
("https://www.pixiv.net/i/966412", None),
("http://img.pixiv.net/img/soundcross/42626136.jpg", None),
("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg", None),
]
("http://i1.pixiv.net/c/600x600/img-master"
"/img/2008/06/13/00/29/13/966412_p0_master1200.jpg"),
("https://i.pximg.net/img-original"
"/img/2017/04/25/07/33/29/62568267_p0.png"),
("https://www.pixiv.net/i/966412"),
("http://img.pixiv.net/img/soundcross/42626136.jpg"),
("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg"),
)
def __init__(self, match):
PixivExtractor.__init__(self)
@ -199,12 +199,12 @@ class PixivWorkExtractor(PixivExtractor):
class PixivFavoriteExtractor(PixivExtractor):
"""Extractor for all favorites/bookmarks of a pixiv-user"""
subcategory = "favorite"
directory_fmt = ["{category}", "bookmarks",
"{user_bookmark[id]} {user_bookmark[account]}"]
directory_fmt = ("{category}", "bookmarks",
"{user_bookmark[id]} {user_bookmark[account]}")
archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/bookmark\.php(?:\?([^#]*))?"]
test = [
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/bookmark\.php(?:\?([^#]*))?")
test = (
("https://www.pixiv.net/bookmark.php?id=173530", {
"url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
}),
@ -218,9 +218,9 @@ class PixivFavoriteExtractor(PixivExtractor):
"url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
}),
# touch URLs
("https://touch.pixiv.net/bookmark.php?id=173530", None),
("https://touch.pixiv.net/bookmark.php", None),
]
("https://touch.pixiv.net/bookmark.php?id=173530"),
("https://touch.pixiv.net/bookmark.php"),
)
def __init__(self, match):
PixivExtractor.__init__(self)
@ -254,15 +254,15 @@ class PixivRankingExtractor(PixivExtractor):
"""Extractor for pixiv ranking pages"""
subcategory = "ranking"
archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
directory_fmt = ["{category}", "rankings",
"{ranking[mode]}", "{ranking[date]}"]
pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/ranking\.php(?:\?([^#]*))?"]
test = [
("https://www.pixiv.net/ranking.php?mode=daily&date=20170818", None),
("https://www.pixiv.net/ranking.php", None),
("https://touch.pixiv.net/ranking.php", None),
]
directory_fmt = ("{category}", "rankings",
"{ranking[mode]}", "{ranking[date]}")
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/ranking\.php(?:\?([^#]*))?")
test = (
("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"),
("https://www.pixiv.net/ranking.php"),
("https://touch.pixiv.net/ranking.php"),
)
def __init__(self, match):
PixivExtractor.__init__(self)
@ -316,13 +316,13 @@ class PixivSearchExtractor(PixivExtractor):
"""Extractor for pixiv search results"""
subcategory = "search"
archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
directory_fmt = ["{category}", "search", "{search[word]}"]
pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/search\.php\?([^#]+)"]
test = [
("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original", None),
("https://touch.pixiv.net/search.php?word=Original", None),
]
directory_fmt = ("{category}", "search", "{search[word]}")
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/search\.php\?([^#]+)")
test = (
("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original"),
("https://touch.pixiv.net/search.php?word=Original"),
)
def __init__(self, match):
PixivExtractor.__init__(self)
@ -373,13 +373,13 @@ class PixivFollowExtractor(PixivExtractor):
"""Extractor for new illustrations from your followed artists"""
subcategory = "follow"
archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
directory_fmt = ["{category}", "following"]
pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/bookmark_new_illust\.php"]
test = [
("https://www.pixiv.net/bookmark_new_illust.php", None),
("https://touch.pixiv.net/bookmark_new_illust.php", None),
]
directory_fmt = ("{category}", "following")
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/bookmark_new_illust\.php")
test = (
("https://www.pixiv.net/bookmark_new_illust.php"),
("https://touch.pixiv.net/bookmark_new_illust.php"),
)
def __init__(self, _):
PixivExtractor.__init__(self)

@ -22,7 +22,6 @@ BASE_PATTERN = r"(?:https?://)?([^/.]+\.reactor\.cc)"
class ReactorExtractor(SharedConfigMixin, Extractor):
"""Base class for *reactor.cc extractors"""
basecategory = "reactor"
directory_fmt = ["{category}"]
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
archive_fmt = "{post_id}_{num}"
@ -144,10 +143,10 @@ class ReactorExtractor(SharedConfigMixin, Extractor):
class ReactorTagExtractor(ReactorExtractor):
"""Extractor for tag searches on *reactor.cc sites"""
subcategory = "tag"
directory_fmt = ["{category}", "{search_tags}"]
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "{search_tags}_{post_id}_{num}"
pattern = [BASE_PATTERN + r"/tag/([^/?&#]+)"]
test = [("http://anime.reactor.cc/tag/Anime+Art", None)]
pattern = BASE_PATTERN + r"/tag/([^/?&#]+)"
test = ("http://anime.reactor.cc/tag/Anime+Art",)
def __init__(self, match):
ReactorExtractor.__init__(self, match)
@ -160,18 +159,18 @@ class ReactorTagExtractor(ReactorExtractor):
class ReactorSearchExtractor(ReactorTagExtractor):
"""Extractor for search results on *reactor.cc sites"""
subcategory = "search"
directory_fmt = ["{category}", "search", "{search_tags}"]
directory_fmt = ("{category}", "search", "{search_tags}")
archive_fmt = "s_{search_tags}_{post_id}_{num}"
pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
test = [("http://anime.reactor.cc/search?q=Art", None)]
pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"
test = ("http://anime.reactor.cc/search?q=Art",)
class ReactorUserExtractor(ReactorExtractor):
"""Extractor for all posts of a user on *reactor.cc sites"""
subcategory = "user"
directory_fmt = ["{category}", "user", "{user}"]
pattern = [BASE_PATTERN + r"/user/([^/?&#]+)"]
test = [("http://anime.reactor.cc/user/Shuster", None)]
directory_fmt = ("{category}", "user", "{user}")
pattern = BASE_PATTERN + r"/user/([^/?&#]+)"
test = ("http://anime.reactor.cc/user/Shuster",)
def __init__(self, match):
ReactorExtractor.__init__(self, match)
@ -184,8 +183,8 @@ class ReactorUserExtractor(ReactorExtractor):
class ReactorPostExtractor(ReactorExtractor):
"""Extractor for single posts on *reactor.cc sites"""
subcategory = "post"
pattern = [BASE_PATTERN + r"/post/(\d+)"]
test = [("http://anime.reactor.cc/post/3576250", None)]
pattern = BASE_PATTERN + r"/post/(\d+)"
test = ("http://anime.reactor.cc/post/3576250",)
def __init__(self, match):
ReactorExtractor.__init__(self, match)
@ -211,22 +210,22 @@ JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
class JoyreactorTagExtractor(ReactorTagExtractor):
"""Extractor for tag searches on joyreactor.cc"""
category = "joyreactor"
pattern = [JR_BASE_PATTERN + r"/tag/([^/?&#]+)"]
test = [
pattern = JR_BASE_PATTERN + r"/tag/([^/?&#]+)"
test = (
("http://joyreactor.cc/tag/Advent+Cirno", {
"count": ">= 17",
}),
("http://joyreactor.com/tag/Cirno", {
"url": "a81382a3146da50b647c475f87427a6ca1d737df",
}),
]
)
class JoyreactorSearchExtractor(ReactorSearchExtractor):
"""Extractor for search results on joyreactor.cc"""
category = "joyreactor"
pattern = [JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
test = [
pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"
test = (
("http://joyreactor.cc/search/Cirno+Gifs", {
"range": "1-25",
"count": ">= 20",
@ -234,26 +233,26 @@ class JoyreactorSearchExtractor(ReactorSearchExtractor):
("http://joyreactor.com/search?q=Cirno+Gifs", {
"count": 0, # no search results on joyreactor.com
}),
]
)
class JoyreactorUserExtractor(ReactorUserExtractor):
"""Extractor for all posts of a user on joyreactor.cc"""
category = "joyreactor"
pattern = [JR_BASE_PATTERN + r"/user/([^/?&#]+)"]
test = [
("http://joyreactor.cc/user/hemantic", None),
pattern = JR_BASE_PATTERN + r"/user/([^/?&#]+)"
test = (
("http://joyreactor.cc/user/hemantic"),
("http://joyreactor.com/user/Tacoman123", {
"url": "0444158f17c22f08515ad4e7abf69ad2f3a63b35",
}),
]
)
class JoyreactorPostExtractor(ReactorPostExtractor):
"""Extractor for single posts on joyreactor.cc"""
category = "joyreactor"
pattern = [JR_BASE_PATTERN + r"/post/(\d+)"]
test = [
pattern = JR_BASE_PATTERN + r"/post/(\d+)"
test = (
("http://joyreactor.com/post/3721876", { # single image
"url": "904779f6571436f3d5adbce30c2c272f6401e14a",
"keyword": "0d231f6ae36c5dca1f7eb71443bab3b2659fcacc",
@ -273,7 +272,7 @@ class JoyreactorPostExtractor(ReactorPostExtractor):
("http://joyreactor.cc/post/1299", { # "malformed" JSON
"url": "d45337fec926159afe11c59e32d259d793dd00b3",
}),
]
)
# --------------------------------------------------------------------
@ -285,48 +284,48 @@ PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)"
class PornreactorTagExtractor(ReactorTagExtractor):
"""Extractor for tag searches on pornreactor.cc"""
category = "pornreactor"
pattern = [PR_BASE_PATTERN + r"/tag/([^/?&#]+)"]
test = [
pattern = PR_BASE_PATTERN + r"/tag/([^/?&#]+)"
test = (
("http://pornreactor.cc/tag/RiceGnat", {
"range": "1-25",
"count": ">= 25",
}),
("http://fapreactor.com/tag/RiceGnat", None),
]
("http://fapreactor.com/tag/RiceGnat"),
)
class PornreactorSearchExtractor(ReactorSearchExtractor):
"""Extractor for search results on pornreactor.cc"""
category = "pornreactor"
pattern = [PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
test = [
pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"
test = (
("http://pornreactor.cc/search?q=ecchi+hentai", {
"range": "1-25",
"count": ">= 25",
}),
("http://fapreactor.com/search/ecchi+hentai", None),
]
("http://fapreactor.com/search/ecchi+hentai"),
)
class PornreactorUserExtractor(ReactorUserExtractor):
"""Extractor for all posts of a user on pornreactor.cc"""
category = "pornreactor"
pattern = [PR_BASE_PATTERN + r"/user/([^/?&#]+)"]
test = [
pattern = PR_BASE_PATTERN + r"/user/([^/?&#]+)"
test = (
("http://pornreactor.cc/user/Disillusion", {
"range": "1-25",
"count": ">= 25",
}),
("http://fapreactor.com/user/Disillusion", None),
]
("http://fapreactor.com/user/Disillusion"),
)
class PornreactorPostExtractor(ReactorPostExtractor):
"""Extractor for single posts on pornreactor.cc"""
category = "pornreactor"
subcategory = "post"
pattern = [PR_BASE_PATTERN + r"/post/(\d+)"]
test = [
pattern = PR_BASE_PATTERN + r"/post/(\d+)"
test = (
("http://pornreactor.cc/post/863166", {
"url": "9e5f7b374605cbbd413f4f4babb9d1af6f95b843",
"content": "3e2a09f8b5e5ed7722f51c5f423ff4c9260fb23e",
@ -334,4 +333,4 @@ class PornreactorPostExtractor(ReactorPostExtractor):
("http://fapreactor.com/post/863166", {
"url": "83ff7c87741c05bcf1de6825e2b4739afeb87ed5",
}),
]
)

@ -16,7 +16,7 @@ import re
class ReadcomiconlineBase():
"""Base class for readcomiconline extractors"""
category = "readcomiconline"
directory_fmt = ["{category}", "{comic}", "{issue:>03}"]
directory_fmt = ("{category}", "{comic}", "{issue:>03}")
filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
archive_fmt = "{issue_id}_{page}"
root = "https://readcomiconline.to"
@ -27,9 +27,9 @@ class ReadcomiconlineBase():
class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
"""Extractor for comics from readcomiconline.to"""
subcategory = "comic"
pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
r"(/Comic/[^/?&#]+/?)$"]
test = [
pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
r"(/Comic/[^/?&#]+/?)$")
test = (
("https://readcomiconline.to/Comic/W-i-t-c-h", {
"url": "e231bc2a293edb465133c37a8e36a7e7d94cab14",
"keyword": "3986248e4458fa44a201ec073c3684917f48ee0c",
@ -38,7 +38,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
"url": "711674cb78ed10bd2557315f7a67552d01b33985",
"keyword": "f5ba5246cd787bb750924d9690cb1549199bd516",
}),
]
)
def __init__(self, match):
MangaExtractor.__init__(self, match, self.root + match.group(1))
@ -68,12 +68,12 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""
subcategory = "issue"
pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
r"(/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+))"]
test = [("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
r"(/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+))")
test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
"url": "2bbab6ec4fbc05d269cca420a82a9b5acda28682",
"keyword": "c6de1c9c8a307dc4be56783c4ac6f1338ffac6fc",
})]
})
def __init__(self, match):
ChapterExtractor.__init__(self, self.root + match.group(1))

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,10 +17,10 @@ import re
class RecursiveExtractor(Extractor):
"""Extractor that fetches URLs from a remote or local source"""
category = "recursive"
pattern = [r"r(?:ecursive)?:(.+)"]
test = [("recursive:https://pastebin.com/raw/FLwrCYsT", {
pattern = r"r(?:ecursive)?:(.+)"
test = ("recursive:https://pastebin.com/raw/FLwrCYsT", {
"url": "eee86d65c346361b818e8f4b2b307d9429f136a2",
})]
})
def __init__(self, match):
Extractor.__init__(self)

@ -27,7 +27,7 @@ class RedditExtractor(Extractor):
self._visited = set()
def items(self):
subre = re.compile(RedditSubmissionExtractor.pattern[0])
subre = re.compile(RedditSubmissionExtractor.pattern)
submissions = self.submissions()
depth = 0
@ -79,16 +79,16 @@ class RedditExtractor(Extractor):
class RedditSubredditExtractor(RedditExtractor):
"""Extractor for images from subreddits on reddit.com"""
subcategory = "subreddit"
pattern = [r"(?:https?://)?(?:\w+\.)?reddit\.com/r/([^/?&#]+)"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/([^/?&#]+)"
r"(/[a-z]+)?/?"
r"(?:\?.*?(?:\bt=([a-z]+))?)?$"]
test = [
("https://www.reddit.com/r/lavaporn/", None),
("https://www.reddit.com/r/lavaporn/top/?sort=top&t=month", None),
("https://old.reddit.com/r/lavaporn/", None),
("https://np.reddit.com/r/lavaporn/", None),
("https://m.reddit.com/r/lavaporn/", None),
]
r"(?:\?.*?(?:\bt=([a-z]+))?)?$")
test = (
("https://www.reddit.com/r/lavaporn/"),
("https://www.reddit.com/r/lavaporn/top/?sort=top&t=month"),
("https://old.reddit.com/r/lavaporn/"),
("https://np.reddit.com/r/lavaporn/"),
("https://m.reddit.com/r/lavaporn/"),
)
def __init__(self, match):
RedditExtractor.__init__(self)
@ -103,19 +103,19 @@ class RedditSubredditExtractor(RedditExtractor):
class RedditSubmissionExtractor(RedditExtractor):
"""Extractor for images from a submission on reddit.com"""
subcategory = "submission"
pattern = [(r"(?:https?://)?(?:"
r"(?:\w+\.)?reddit\.com/r/[^/?&#]+/comments|"
r"redd\.it"
r")/([a-z0-9]+)")]
test = [
pattern = (r"(?:https?://)?(?:"
r"(?:\w+\.)?reddit\.com/r/[^/?&#]+/comments|"
r"redd\.it"
r")/([a-z0-9]+)")
test = (
("https://www.reddit.com/r/lavaporn/comments/2a00np/", {
"pattern": r"https?://i\.imgur\.com/AaAUCgy\.jpg",
}),
("https://old.reddit.com/r/lavaporn/comments/2a00np/", None),
("https://np.reddit.com/r/lavaporn/comments/2a00np/", None),
("https://m.reddit.com/r/lavaporn/comments/2a00np/", None),
("https://redd.it/2a00np/", None),
]
("https://old.reddit.com/r/lavaporn/comments/2a00np/"),
("https://np.reddit.com/r/lavaporn/comments/2a00np/"),
("https://m.reddit.com/r/lavaporn/comments/2a00np/"),
("https://redd.it/2a00np/"),
)
def __init__(self, match):
RedditExtractor.__init__(self)
@ -130,9 +130,9 @@ class RedditImageExtractor(Extractor):
category = "reddit"
subcategory = "image"
archive_fmt = "{name}"
pattern = [r"(?:https?://)?i\.redd(?:\.it|ituploads\.com)"
r"/[^/?&#]+(?:\?[^#]*)?"]
test = [
pattern = (r"(?:https?://)?i\.redd(?:\.it|ituploads\.com)"
r"/[^/?&#]+(?:\?[^#]*)?")
test = (
("https://i.redd.it/upjtjcx2npzz.jpg", {
"url": "0de614900feef103e580b632190458c0b62b641a",
"content": "cc9a68cf286708d5ce23c68e79cd9cf7826db6a3",
@ -142,7 +142,7 @@ class RedditImageExtractor(Extractor):
"url": "f24f25efcedaddeec802e46c60d77ef975dc52a5",
"content": "541dbcc3ad77aa01ee21ca49843c5e382371fae7",
}),
]
)
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -28,29 +28,29 @@ class Rule34Extractor(booru.XmlParserMixin,
class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
"""Extractor for images from rule34.xxx based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)"]
test = [("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": "a01768c6f86f32eb7ebbdeb87c30b0d9968d7f97",
"pattern": r"https?://([^.]+\.)?rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 2,
})]
})
class Rule34PoolExtractor(booru.GelbooruPoolMixin, Rule34Extractor):
"""Extractor for image-pools from rule34.xxx"""
pattern = [r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)"]
test = [("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
"count": 3,
})]
})
class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
"""Extractor for single images from rule34.xxx"""
pattern = [r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)"]
test = [("https://rule34.xxx/index.php?page=post&s=view&id=1974854", {
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://rule34.xxx/index.php?page=post&s=view&id=1974854", {
"content": "fd2820df78fb937532da0a46f7af6cefc4dc94be",
"options": (("tags", True),),
"keyword": {
@ -59,4 +59,4 @@ class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
"tags_copyright": "phantasy_star phantasy_star_online_2",
"tags_general": str,
},
})]
})

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -27,28 +27,28 @@ class SafebooruExtractor(booru.XmlParserMixin,
class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
"""Extractor for images from safebooru.org based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)"]
test = [("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
test = ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
})]
})
class SafebooruPoolExtractor(booru.GelbooruPoolMixin, SafebooruExtractor):
"""Extractor for image-pools from safebooru.org"""
pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)"]
test = [("https://safebooru.org/index.php?page=pool&s=show&id=11", {
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
"count": 5,
})]
})
class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
"""Extractor for single images from safebooru.org"""
pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)"]
test = [("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=view&id=(?P<post>\d+)")
test = ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
"options": (("tags", True),),
@ -58,4 +58,4 @@ class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
"tags_general": str,
},
})]
})

@ -145,14 +145,14 @@ class SankakuExtractor(SharedConfigMixin, Extractor):
class SankakuTagExtractor(SankakuExtractor):
"""Extractor for images from chan.sankakucomplex.com by search-tags"""
subcategory = "tag"
directory_fmt = ["{category}", "{search_tags}"]
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/\?([^#]*)"]
test = [
pattern = r"(?:https?://)?chan\.sankakucomplex\.com/\?([^#]*)"
test = (
("https://chan.sankakucomplex.com/?tags=bonocho", {
"count": 5,
"pattern": (r"https://cs\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+"),
"pattern": r"https://cs\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
}),
# respect 'page' query parameter
("https://chan.sankakucomplex.com/?tags=bonocho&page=2", {
@ -168,9 +168,9 @@ class SankakuTagExtractor(SankakuExtractor):
"exception": exception.StopExtraction,
}),
# match arbitrary query parameters
(("https://chan.sankakucomplex.com/"
"?tags=marie_rose&page=98&next=3874906&commit=Search"), None),
]
("https://chan.sankakucomplex.com"
"/?tags=marie_rose&page=98&next=3874906&commit=Search"),
)
per_page = 20
def __init__(self, match):
@ -232,12 +232,12 @@ class SankakuTagExtractor(SankakuExtractor):
class SankakuPoolExtractor(SankakuExtractor):
"""Extractor for image-pools from chan.sankakucomplex.com"""
subcategory = "pool"
directory_fmt = ["{category}", "pool", "{pool}"]
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/pool/show/(\d+)"]
test = [("https://chan.sankakucomplex.com/pool/show/90", {
pattern = r"(?:https?://)?chan\.sankakucomplex\.com/pool/show/(\d+)"
test = ("https://chan.sankakucomplex.com/pool/show/90", {
"count": 5,
})]
})
per_page = 24
def __init__(self, match):
@ -272,8 +272,8 @@ class SankakuPostExtractor(SankakuExtractor):
"""Extractor for single images from chan.sankakucomplex.com"""
subcategory = "post"
archive_fmt = "{id}"
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"]
test = [("https://chan.sankakucomplex.com/post/show/360451", {
pattern = r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"
test = ("https://chan.sankakucomplex.com/post/show/360451", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"options": (("tags", True),),
"keyword": {
@ -284,7 +284,7 @@ class SankakuPostExtractor(SankakuExtractor):
"tags_character": str,
"tags_general": str,
},
})]
})
def __init__(self, match):
SankakuExtractor.__init__(self)

@ -69,11 +69,11 @@ class SeigaExtractor(Extractor):
class SeigaUserExtractor(SeigaExtractor):
"""Extractor for images of a user from seiga.nicovideo.jp"""
subcategory = "user"
directory_fmt = ["{category}", "{user[id]}"]
directory_fmt = ("{category}", "{user[id]}")
filename_fmt = "{category}_{user[id]}_{image_id}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
r"user/illust/(\d+)(?:\?(?:[^&]+&)*sort=([^&#]+))?")]
test = [
pattern = (r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
r"user/illust/(\d+)(?:\?(?:[^&]+&)*sort=([^&#]+))?")
test = (
("http://seiga.nicovideo.jp/user/illust/39537793", {
"pattern": r"https://lohas\.nicoseiga\.jp/priv/[0-9a-f]+/\d+/\d+",
"count": ">= 4",
@ -95,9 +95,9 @@ class SeigaUserExtractor(SeigaExtractor):
("http://seiga.nicovideo.jp/user/illust/79433", {
"exception": exception.NotFoundError,
}),
(("http://seiga.nicovideo.jp/user/illust/39537793"
"?sort=image_view&target=illust_all"), None),
]
("http://seiga.nicovideo.jp/user/illust/39537793"
"?sort=image_view&target=illust_all"),
)
def __init__(self, match):
SeigaExtractor.__init__(self)
@ -166,10 +166,10 @@ class SeigaImageExtractor(SeigaExtractor):
"""Extractor for single images from seiga.nicovideo.jp"""
subcategory = "image"
filename_fmt = "{category}_{image_id}.{extension}"
pattern = [r"(?:https?://)?(?:"
pattern = (r"(?:https?://)?(?:"
r"(?:www\.|seiga\.)?nicovideo\.jp/(?:seiga/im|image/source/)"
r"|lohas\.nicoseiga\.jp/(?:priv|o)/[^/]+/\d+/)(\d+)"]
test = [
r"|lohas\.nicoseiga\.jp/(?:priv|o)/[^/]+/\d+/)(\d+)")
test = (
("http://seiga.nicovideo.jp/seiga/im5977527", {
"keyword": "f66ba5de33d4ce2cb57f23bb37e1e847e0771c10",
"content": "d9202292012178374d57fb0126f6124387265297",
@ -177,7 +177,7 @@ class SeigaImageExtractor(SeigaExtractor):
("http://seiga.nicovideo.jp/seiga/im123", {
"exception": exception.NotFoundError,
}),
]
)
def __init__(self, match):
SeigaExtractor.__init__(self)

@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from from http://raw.senmanga.com/"""
"""Extract manga-chapters from from https://raw.senmanga.com/"""
from .common import Extractor, Message
from .. import text
@ -16,11 +16,11 @@ class SenmangaChapterExtractor(Extractor):
"""Extractor for manga-chapters from raw.senmanga.com"""
category = "senmanga"
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "{chapter_string}"]
directory_fmt = ("{category}", "{manga}", "{chapter_string}")
filename_fmt = "{manga}_{chapter_string}_{page:>03}.{extension}"
archive_fmt = "{manga}_{chapter_string}_{page}"
pattern = [r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)"]
test = [
pattern = r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)"
test = (
("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", {
"url": "5f95140ff511d8497e2ec08fa7267c6bb231faec",
"keyword": "705d941a150765edb33cd2707074bd703a93788c",
@ -30,7 +30,7 @@ class SenmangaChapterExtractor(Extractor):
"url": "8347b9f00c14b864dd3c19a1f5ae52adb2ef00de",
"keyword": "8a8ab2529ba2edfc83a6b3a8bede1d6c580db7b4",
}),
]
)
root = "https://raw.senmanga.com"
def __init__(self, match):

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,13 +16,13 @@ class SimplyhentaiGalleryExtractor(ChapterExtractor):
"""Extractor for image galleries from simply-hentai.com"""
category = "simplyhentai"
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery_id} {title}"]
directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}"
archive_fmt = "{image_id}"
pattern = [r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com"
pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com"
r"(?!/(?:album|gifs?|images?|series)(?:/|$))"
r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)"]
test = [
r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)")
test = (
(("https://original-work.simply-hentai.com"
"/amazon-no-hiyaku-amazon-elixir"), {
"url": "258289249990502c3138719cb89e995a60861e49",
@ -32,10 +32,10 @@ class SimplyhentaiGalleryExtractor(ChapterExtractor):
"exception": exception.GalleryDLException,
}),
# custom subdomain
("https://pokemon.simply-hentai.com/mao-friends-9bc39", None),
("https://pokemon.simply-hentai.com/mao-friends-9bc39"),
# www subdomain, two path segments
("https://www.simply-hentai.com/vocaloid/black-magnet", None),
]
("https://www.simply-hentai.com/vocaloid/black-magnet"),
)
def __init__(self, match):
url = "https://" + match.group(1)
@ -81,12 +81,12 @@ class SimplyhentaiImageExtractor(Extractor):
"""Extractor for individual images from simply-hentai.com"""
category = "simplyhentai"
subcategory = "image"
directory_fmt = ["{category}", "{type}s"]
directory_fmt = ("{category}", "{type}s")
filename_fmt = "{category}_{token}{title:?_//}.{extension}"
archive_fmt = "{token}"
pattern = [r"(?:https?://)?(?:www\.)?(simply-hentai\.com"
r"/(image|gif)/[^/?&#]+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com"
r"/(image|gif)/[^/?&#]+)")
test = (
(("https://www.simply-hentai.com/image"
"/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), {
"url": "0338eb137830ab6f81e5f410d3936ef785d063d9",
@ -96,7 +96,7 @@ class SimplyhentaiImageExtractor(Extractor):
"url": "11c060d7ec4dfd0bd105300b6e1fd454674a5af1",
"keyword": "fbfd5c418f3d9d7d0b0ba0cda0602240820da693",
}),
]
)
def __init__(self, match):
Extractor.__init__(self)
@ -127,11 +127,11 @@ class SimplyhentaiVideoExtractor(Extractor):
"""Extractor for hentai videos from simply-hentai.com"""
category = "simplyhentai"
subcategory = "video"
directory_fmt = ["{category}", "{type}s"]
directory_fmt = ("{category}", "{type}s")
filename_fmt = "{title}{episode:?_//>02}.{extension}"
archive_fmt = "{title}_{episode}"
pattern = [r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)"]
test = [
pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)"
test = (
("https://videos.simply-hentai.com/creamy-pie-episode-02", {
"pattern": r"https://www\.googleapis\.com/drive/v3/files"
r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+",
@ -143,7 +143,7 @@ class SimplyhentaiVideoExtractor(Extractor):
"url": "ad9a36ae06c601b6490e3c401834b4949d947eb0",
"keyword": "fef03513d5e1a9958d63e45a1d583e2f658b1168",
}),
]
)
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann, Leonardo Taccari
# Copyright 2016-2019 Mike Fährmann, Leonardo Taccari
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,30 +16,28 @@ class SlidesharePresentationExtractor(Extractor):
"""Extractor for images from a presentation on slideshare.net"""
category = "slideshare"
subcategory = "presentation"
directory_fmt = ["{category}", "{user}"]
directory_fmt = ("{category}", "{user}")
filename_fmt = "{presentation}-{num:>02}.{extension}"
archive_fmt = "{presentation}_{num}"
pattern = [r"(?:https?://)?(?:www\.)?slideshare\.net"
r"/(?:mobile/)?([^/?&#]+)/([^/?&#]+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?slideshare\.net"
r"/(?:mobile/)?([^/?&#]+)/([^/?&#]+)")
test = (
(("https://www.slideshare.net"
"/Slideshare/get-started-with-slide-share"), {
"url": "23685fb9b94b32c77a547d45dc3a82fe7579ea18",
"content": "ee54e54898778e92696a7afec3ffabdbd98eb0cc",
}),
# long title
(("https://www.slideshare.net/pragmaticsolutions/warum-sie-nicht-ihren"
"-mitarbeitenden-ndern-sollten-sondern-ihr-managementsystem"), {
"url": "cf70ca99f57f61affab47ebf8583eb564b21e3a7",
}),
# mobile URL
(("https://www.slideshare.net"
"/mobile/uqudent/introduction-to-fixed-prosthodontics"), {
"url": "59993ad7b0cb93c73011547eedcd02c622649e9d",
}),
]
)
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -51,10 +51,10 @@ class SmugmugExtractor(Extractor):
class SmugmugAlbumExtractor(SmugmugExtractor):
"""Extractor for smugmug albums"""
subcategory = "album"
directory_fmt = ["{category}", "{User[NickName]}", "{Album[Name]}"]
directory_fmt = ("{category}", "{User[NickName]}", "{Album[Name]}")
archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
pattern = [r"smugmug:album:([^:]+)$"]
test = [
pattern = r"smugmug:album:([^:]+)$"
test = (
("smugmug:album:ddvxpg", {
"url": "0429e9bf50ee600674e448934e3882ca1761ae7b",
}),
@ -66,7 +66,7 @@ class SmugmugAlbumExtractor(SmugmugExtractor):
("smugmug:album:6VRT8G", {
"url": "c4a0f4c4bfd514b93cbdeb02b3345bf7ef6604df",
}),
]
)
def __init__(self, match):
SmugmugExtractor.__init__(self)
@ -92,10 +92,10 @@ class SmugmugAlbumExtractor(SmugmugExtractor):
class SmugmugImageExtractor(SmugmugExtractor):
"""Extractor for individual smugmug images"""
subcategory = "image"
directory_fmt = ["{category}", "{User[NickName]}"]
directory_fmt = ("{category}", "{User[NickName]}")
archive_fmt = "{Image[ImageKey]}"
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"]
test = [
pattern = BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"
test = (
("https://acapella.smugmug.com/Micro-Macro/Drops/i-g2Dmf9z", {
"url": "78f0bf3516b6d670b7319216bdeccb35942ca4cf",
"keyword": "8ebb25fb493d3cd5cfcb8f3a4601fa721afe1d83",
@ -106,7 +106,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
"url": "d4047637947b35e4ef49e3c7cb70303cc224a3a0",
"keyword": "0a1b12efd789c42d9b061f01b2a1fcfd6af32003",
}),
]
)
def __init__(self, match):
SmugmugExtractor.__init__(self)
@ -129,8 +129,8 @@ class SmugmugImageExtractor(SmugmugExtractor):
class SmugmugPathExtractor(SmugmugExtractor):
"""Extractor for smugmug albums from URL paths and users"""
subcategory = "path"
pattern = [BASE_PATTERN + r"((?:/[^/?&#a-fh-mo-z][^/?&#]*)*)/?$"]
test = [
pattern = BASE_PATTERN + r"((?:/[^/?&#a-fh-mo-z][^/?&#]*)*)/?$"
test = (
("https://acapella.smugmug.com/Micro-Macro/Drops/", {
"pattern": "smugmug:album:ddvxpg$",
}),
@ -149,8 +149,8 @@ class SmugmugPathExtractor(SmugmugExtractor):
("smugmug:www.creativedogportraits.com/", {
"pattern": "smugmug:album:txWXzs$",
}),
("smugmug:https://www.creativedogportraits.com/", None),
]
("smugmug:https://www.creativedogportraits.com/"),
)
def __init__(self, match):
SmugmugExtractor.__init__(self)

@ -32,8 +32,12 @@ class TestExtractor(Extractor):
run all tests
"""
category = "test"
pattern = [r"t(?:est)?:([^:]*)(?::([^:]*)(?::(\*|[\d,]*))?)?$"]
test = ("test:pixiv", "test:pixiv:user,favorite:0", "test:")
pattern = r"t(?:est)?:([^:]*)(?::([^:]*)(?::(\*|[\d,]*))?)?$"
test = (
("test:pixiv"),
("test:pixiv:user,favorite:0"),
("test:"),
)
def __init__(self, match):
Extractor.__init__(self)

@ -44,12 +44,11 @@ class TsuminoGalleryExtractor(TsuminoBase, ChapterExtractor):
"""Extractor for image galleries on tsumino.com"""
subcategory = "gallery"
filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
directory_fmt = ("{category}", "{gallery_id} {title}")
archive_fmt = "{gallery_id}_{page}"
pattern = [r"(?i)(?:https?://)?(?:www\.)?tsumino\.com"
r"/(?:Book/Info|Read/View)/(\d+)"]
test = [
pattern = (r"(?i)(?:https?://)?(?:www\.)?tsumino\.com"
r"/(?:Book/Info|Read/View)/(\d+)")
test = (
("https://www.tsumino.com/Book/Info/45834", {
"url": "ed3e39bc21221fbd21b9a2ba711e8decb6fdc6bc",
"keyword": {
@ -73,8 +72,8 @@ class TsuminoGalleryExtractor(TsuminoBase, ChapterExtractor):
"uploader": "NHNL1"
},
}),
("https://www.tsumino.com/Read/View/45834", None),
]
("https://www.tsumino.com/Read/View/45834"),
)
def __init__(self, match):
self.gallery_id = match.group(1)
@ -139,21 +138,21 @@ class TsuminoGalleryExtractor(TsuminoBase, ChapterExtractor):
class TsuminoSearchExtractor(TsuminoBase, Extractor):
"""Extractor for search results on tsumino.com"""
subcategory = "search"
pattern = [r"(?i)(?:https?://)?(?:www\.)?tsumino\.com"
r"/(?:Books/?)?#(.+)"]
test = [
pattern = (r"(?i)(?:https?://)?(?:www\.)?tsumino\.com"
r"/(?:Books/?)?#(.+)")
test = (
("https://www.tsumino.com/Books#?Character=Reimu+Hakurei", {
"pattern": TsuminoGalleryExtractor.pattern[0],
"pattern": TsuminoGalleryExtractor.pattern,
"range": "1-40",
"count": 40,
}),
(("http://www.tsumino.com/Books#~(Tags~(~"
"(Type~7~Text~'Reimu*20Hakurei~Exclude~false)~"
"(Type~'1~Text~'Pantyhose~Exclude~false)))#"), {
"pattern": TsuminoGalleryExtractor.pattern[0],
"pattern": TsuminoGalleryExtractor.pattern,
"count": ">= 3",
}),
]
)
def __init__(self, match):
Extractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -42,7 +42,7 @@ BASE_PATTERN = (
class TumblrExtractor(Extractor):
"""Base class for tumblr extractors"""
category = "tumblr"
directory_fmt = ["{category}", "{name}"]
directory_fmt = ("{category}", "{name}")
filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
@ -191,8 +191,8 @@ class TumblrExtractor(Extractor):
class TumblrUserExtractor(TumblrExtractor):
"""Extractor for all images from a tumblr-user"""
subcategory = "user"
pattern = [BASE_PATTERN + r"(?:/page/\d+|/archive)?/?$"]
test = [
pattern = BASE_PATTERN + r"(?:/page/\d+|/archive)?/?$"
test = (
("http://demo.tumblr.com/", {
"pattern": r"https://\d+\.media\.tumblr\.com"
r"/tumblr_[^/_]+_\d+\.jpg",
@ -218,11 +218,11 @@ class TumblrUserExtractor(TumblrExtractor):
"count": 2,
"keyword": {"tags": ["test", "private", "hidden"]},
}),
("https://demo.tumblr.com/page/2", None),
("https://demo.tumblr.com/archive", None),
("tumblr:http://www.b-authentique.com/", None),
("tumblr:www.b-authentique.com", None),
]
("https://demo.tumblr.com/page/2"),
("https://demo.tumblr.com/archive"),
("tumblr:http://www.b-authentique.com/"),
("tumblr:www.b-authentique.com"),
)
def posts(self):
return self.api.posts(self.blog, {})
@ -231,8 +231,8 @@ class TumblrUserExtractor(TumblrExtractor):
class TumblrPostExtractor(TumblrExtractor):
"""Extractor for images from a single post on tumblr"""
subcategory = "post"
pattern = [BASE_PATTERN + r"/(?:post|image)/(\d+)"]
test = [
pattern = BASE_PATTERN + r"/(?:post|image)/(\d+)"
test = (
("http://demo.tumblr.com/post/459265350", {
"pattern": (r"https://\d+\.media\.tumblr\.com"
r"/tumblr_[^/_]+_1280.jpg"),
@ -262,8 +262,8 @@ class TumblrPostExtractor(TumblrExtractor):
("https://mikf123.tumblr.com/post/181022380064/chat-post", {
"count": 0,
}),
("http://demo.tumblr.com/image/459265350", None),
]
("http://demo.tumblr.com/image/459265350"),
)
def __init__(self, match):
TumblrExtractor.__init__(self, match)
@ -281,11 +281,11 @@ class TumblrPostExtractor(TumblrExtractor):
class TumblrTagExtractor(TumblrExtractor):
"""Extractor for images from a tumblr-user by tag"""
subcategory = "tag"
pattern = [BASE_PATTERN + r"/tagged/([^/?&#]+)"]
test = [("http://demo.tumblr.com/tagged/Times%20Square", {
pattern = BASE_PATTERN + r"/tagged/([^/?&#]+)"
test = ("http://demo.tumblr.com/tagged/Times%20Square", {
"pattern": (r"https://\d+\.media\.tumblr\.com/tumblr_[^/_]+_1280.jpg"),
"count": 1,
})]
})
def __init__(self, match):
TumblrExtractor.__init__(self, match)
@ -298,12 +298,12 @@ class TumblrTagExtractor(TumblrExtractor):
class TumblrLikesExtractor(TumblrExtractor):
"""Extractor for images from a tumblr-user's liked posts"""
subcategory = "likes"
directory_fmt = ["{category}", "{name}", "likes"]
directory_fmt = ("{category}", "{name}", "likes")
archive_fmt = "f_{blog[name]}_{id}_{num}"
pattern = [BASE_PATTERN + r"/likes"]
test = [("http://mikf123.tumblr.com/likes", {
pattern = BASE_PATTERN + r"/likes"
test = ("http://mikf123.tumblr.com/likes", {
"count": 1,
})]
})
def posts(self):
return self.api.likes(self.blog)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -15,7 +15,7 @@ from .. import text
class TwitterExtractor(Extractor):
"""Base class for twitter extractors"""
category = "twitter"
directory_fmt = ["{category}", "{user}"]
directory_fmt = ("{category}", "{user}")
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{retweet_id}_{num}"
root = "https://twitter.com"
@ -100,13 +100,13 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline"""
subcategory = "timeline"
pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/?$"]
test = [("https://twitter.com/PicturesEarth", {
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/?$")
test = ("https://twitter.com/PicturesEarth", {
"range": "1-40",
"url": "2f4d51cbba81e56c1c755677b3ad58fc167c9771",
"keyword": "cbae53b6f4ba133078bb13c95dbd3cbb4fa40b9f",
})]
})
def tweets(self):
url = "{}/i/profiles/show/{}/timeline/tweets".format(
@ -117,12 +117,12 @@ class TwitterTimelineExtractor(TwitterExtractor):
class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for all images from a user's Media Tweets"""
subcategory = "media"
pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/media(?!\w)"]
test = [("https://twitter.com/PicturesEarth/media", {
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/media(?!\w)")
test = ("https://twitter.com/PicturesEarth/media", {
"range": "1-40",
"url": "2f4d51cbba81e56c1c755677b3ad58fc167c9771",
})]
})
def tweets(self):
url = "{}/i/profiles/show/{}/media_timeline".format(
@ -133,9 +133,9 @@ class TwitterMediaExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets"""
subcategory = "tweet"
pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/status/(\d+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/status/(\d+)")
test = (
("https://twitter.com/PicturesEarth/status/672897688871018500", {
"url": "d9e68d41301d2fe382eb27711dea28366be03b1a",
"keyword": "46c8e739a892000848a8a2184da91346c9cbe4bf",
@ -145,7 +145,7 @@ class TwitterTweetExtractor(TwitterExtractor):
"url": "c8a262a9698cb733fb27870f5a8f75faf77d79f6",
"keyword": "7729cd3ff16a5647b0b5ffdec9d428c91eedafbe",
}),
]
)
def __init__(self, match):
TwitterExtractor.__init__(self, match)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -84,17 +84,17 @@ class WallhavenExtractor(Extractor):
class WallhavenSearchExtractor(WallhavenExtractor):
"""Extractor for search results on wallhaven.cc"""
subcategory = "search"
directory_fmt = ["{category}", "{search[q]}"]
directory_fmt = ("{category}", "{search[q]}")
archive_fmt = "s_{search[q]}_{id}"
pattern = [r"(?:https?://)?alpha\.wallhaven\.cc/search\?([^/?#]+)"]
test = [
("https://alpha.wallhaven.cc/search?q=touhou", None),
pattern = r"(?:https?://)?alpha\.wallhaven\.cc/search\?([^/?#]+)"
test = (
("https://alpha.wallhaven.cc/search?q=touhou"),
(("https://alpha.wallhaven.cc/search?q=id%3A87"
"&categories=111&purity=100&sorting=date_added&order=asc&page=3"), {
"url": "29b54803e3fae5e337fdd29d47d51302d78bec9a",
"range": "1-3",
}),
]
)
per_page = 24
def __init__(self, match):
@ -136,9 +136,9 @@ class WallhavenImageExtractor(WallhavenExtractor):
"""Extractor for individual wallpaper on wallhaven.cc"""
subcategory = "image"
archive_fmt = "{id}"
pattern = [r"(?:https?://)?(?:alpha\.wallhaven\.cc/wallpaper"
r"|whvn\.cc)/(\d+)"]
test = [
pattern = (r"(?:https?://)?(?:alpha\.wallhaven\.cc/wallpaper"
r"|whvn\.cc)/(\d+)")
test = (
("https://alpha.wallhaven.cc/wallpaper/8114", {
"pattern": "https://[^.]+.wallhaven.cc/[^/]+/full/[^-]+-8114.jpg",
"content": "497212679383a465da1e35bd75873240435085a2",
@ -160,8 +160,8 @@ class WallhavenImageExtractor(WallhavenExtractor):
("https://alpha.wallhaven.cc/wallpaper/8536", {
"url": "8431c6f1eec3a6f113980eeec9dfcb707de7ddcf",
}),
("https://whvn.cc/8114", None),
]
("https://whvn.cc/8114"),
)
def __init__(self, match):
WallhavenExtractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2018 Mike Fährmann
# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,11 +16,11 @@ class WarosuThreadExtractor(Extractor):
"""Extractor for images from threads on warosu.org"""
category = "warosu"
subcategory = "thread"
directory_fmt = ["{category}", "{board}", "{thread} - {title}"]
directory_fmt = ("{category}", "{board}", "{thread} - {title}")
filename_fmt = "{tim}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
pattern = [r"(?:https?://)?(?:www\.)?warosu\.org/([^/]+)/thread/(\d+)"]
test = [
pattern = r"(?:https?://)?(?:www\.)?warosu\.org/([^/]+)/thread/(\d+)"
test = (
("https://warosu.org/jp/thread/16656025", {
"url": "889d57246ed67e491e5b8f7f124e50ea7991e770",
"keyword": "c00ea4c5460c5986994f17bb8416826d42ca57c0",
@ -30,7 +30,7 @@ class WarosuThreadExtractor(Extractor):
"keyword": "7534edf4ec51891dbf44d775b73fbbefd52eec71",
"content": "d48df0a701e6599312bfff8674f4aa5d4fb8db1c",
}),
]
)
root = "https://warosu.org"
def __init__(self, match):

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2018 Mike Fährmann
# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -27,12 +27,12 @@ class XvideosExtractor(Extractor):
class XvideosGalleryExtractor(XvideosExtractor):
"""Extractor for user profile galleries from xvideos.com"""
subcategory = "gallery"
directory_fmt = ["{category}", "{user[name]}", "{title}"]
directory_fmt = ("{category}", "{user[name]}", "{title}")
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
archive_fmt = "{gallery_id}_{num}"
pattern = [r"(?:https?://)?(?:www\.)?xvideos\.com"
r"/profiles/([^/?&#]+)/photos/(\d+)"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
r"/profiles/([^/?&#]+)/photos/(\d+)")
test = (
(("https://www.xvideos.com/profiles"
"/pervertedcouple/photos/751031/random_stuff"), {
"url": "4f0d992e5dc39def2c3ac8e099d17bf09e76e3c7",
@ -41,7 +41,7 @@ class XvideosGalleryExtractor(XvideosExtractor):
("https://www.xvideos.com/profiles/pervertedcouple/photos/751032/", {
"exception": exception.NotFoundError,
}),
]
)
def __init__(self, match):
XvideosExtractor.__init__(self)
@ -94,9 +94,9 @@ class XvideosUserExtractor(XvideosExtractor):
"""Extractor for user profiles from xvideos.com"""
subcategory = "user"
categorytransfer = True
pattern = [r"(?:https?://)?(?:www\.)?xvideos\.com"
r"/profiles/([^/?&#]+)/?(?:#.*)?$"]
test = [
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
r"/profiles/([^/?&#]+)/?(?:#.*)?$")
test = (
("https://www.xvideos.com/profiles/pervertedcouple", {
"url": "a413f3e60d6d3a2de79bd44fa3b7a9c03db4336e",
"keyword": "a796760d34732adc7ec52a8feb057515209a2ca6",
@ -104,8 +104,8 @@ class XvideosUserExtractor(XvideosExtractor):
("https://www.xvideos.com/profiles/niwehrwhernvh", {
"exception": exception.NotFoundError,
}),
("https://www.xvideos.com/profiles/pervertedcouple#_tabPhotos", None),
]
("https://www.xvideos.com/profiles/pervertedcouple#_tabPhotos"),
)
def __init__(self, match):
XvideosExtractor.__init__(self)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -20,25 +20,25 @@ class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
class YandereTagExtractor(booru.TagMixin, YandereExtractor):
"""Extractor for images from yande.re based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?yande\.re"
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
test = [("https://yande.re/post?tags=ouzoku+armor", {
pattern = (r"(?:https?://)?(?:www\.)?yande\.re"
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)")
test = ("https://yande.re/post?tags=ouzoku+armor", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
})]
})
class YanderePoolExtractor(booru.PoolMixin, YandereExtractor):
"""Extractor for image-pools from yande.re"""
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(?P<pool>\d+)"]
test = [("https://yande.re/pool/show/318", {
pattern = r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(?P<pool>\d+)"
test = ("https://yande.re/pool/show/318", {
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
})]
})
class YanderePostExtractor(booru.PostMixin, YandereExtractor):
"""Extractor for single images from yande.re"""
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"]
test = [("https://yande.re/post/show/51824", {
pattern = r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"
test = ("https://yande.re/post/show/51824", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
"options": (("tags", True),),
"keyword": {
@ -47,20 +47,20 @@ class YanderePostExtractor(booru.PostMixin, YandereExtractor):
"tags_copyright": "ouzoku",
"tags_general": str,
},
})]
})
class YanderePopularExtractor(booru.MoebooruPopularMixin, YandereExtractor):
"""Extractor for popular images from yande.re"""
pattern = [r"(?:https?://)?(?:www\.)?yande\.re"
pattern = (r"(?:https?://)?(?:www\.)?yande\.re"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?"]
test = [
r"(?:\?(?P<query>[^#]*))?")
test = (
("https://yande.re/post/popular_by_month?month=6&year=2014", {
"count": 40,
}),
("https://yande.re/post/popular_recent", None),
]
("https://yande.re/post/popular_recent"),
)
def __init__(self, match):
super().__init__(match)

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,11 +16,11 @@ class YukiThreadExtractor(Extractor):
"""Extractor for images from threads on yuki.la"""
category = "yuki"
subcategory = "thread"
directory_fmt = ["{category}", "{board}", "{thread}{title:? - //}"]
directory_fmt = ("{category}", "{board}", "{thread}{title:? - //}")
filename_fmt = "{time}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
pattern = [r"(?:https?://)?yuki\.la/([^/?&#]+)/(\d+)"]
test = [
pattern = r"(?:https?://)?yuki\.la/([^/?&#]+)/(\d+)"
test = (
("https://yuki.la/gd/309639", {
"url": "289e86c5caf673a2515ec5f5f521ac0ae7e189e9",
"keyword": "01cbe29ae207a5cb7556bcbd5ed481ecdaf32727",
@ -40,7 +40,7 @@ class YukiThreadExtractor(Extractor):
"url": "010560bf254bd485e48366c3531728bda4b22583",
"keyword": "7b736c41e307dcfcb84ef495f29299a6ddd06d67",
}),
]
)
root = "https://yuki.la"
def __init__(self, match):

@ -19,7 +19,7 @@ from gallery_dl.extractor.directlink import DirectlinkExtractor as DLExtractor
class FakeExtractor(Extractor):
category = "fake"
subcategory = "test"
pattern = ["fake:"]
pattern = "fake:"
def __init__(self, match=None):
Extractor.__init__(self)
@ -68,7 +68,7 @@ class TestExtractor(unittest.TestCase):
tuples = extractor.add_module(sys.modules[__name__])
self.assertEqual(len(tuples), 1)
self.assertEqual(tuples[0][0].pattern, FakeExtractor.pattern[0])
self.assertEqual(tuples[0][0].pattern, FakeExtractor.pattern)
self.assertEqual(tuples[0][1], FakeExtractor)
self.assertIsInstance(extractor.find(uri), FakeExtractor)

Loading…
Cancel
Save