gallery-dl/scripts/supportedsites.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Generate a reStructuredText document with all supported sites"""

import sys
import collections

import util
from gallery_dl import extractor


CATEGORY_MAP = {
    "2chan"          : "Futaba Channel",
    "35photo"        : "35PHOTO",
    "adultempire"    : "Adult Empire",
    "archivedmoe"    : "Archived.Moe",
    "archiveofsins"  : "Archive of Sins",
    "artstation"     : "ArtStation",
    "aryion"         : "Eka's Portal",
    "b4k"            : "arch.b4k.co",
    "baraag"         : "baraag",
    "bcy"            : "半次元",
    "bobx"           : "BobX",
    "deviantart"     : "DeviantArt",
    "dokireader"     : "Doki Reader",
    "dynastyscans"   : "Dynasty Reader",
    "e621"           : "e621",
    "e-hentai"       : "E-Hentai",
    "exhentai"       : "ExHentai",
    "fallenangels"   : "Fallen Angels Scans",
    "fashionnova"    : "Fashion Nova",
    "furaffinity"    : "Fur Affinity",
    "hbrowse"        : "HBrowse",
    "hentai2read"    : "Hentai2Read",
    "hentaicafe"     : "Hentai Cafe",
    "hentaifoundry"  : "Hentai Foundry",
    "hentaifox"      : "HentaiFox",
    "hentaihand"     : "HentaiHand",
    "hentaihere"     : "HentaiHere",
    "hitomi"         : "Hitomi.la",
    "idolcomplex"    : "Idol Complex",
    "imagebam"       : "ImageBam",
    "imagefap"       : "ImageFap",
    "imgbb"          : "ImgBB",
    "imgbox"         : "imgbox",
    "imagechest"     : "ImageChest",
    "imgth"          : "imgth",
    "imgur"          : "imgur",
    "jaiminisbox"    : "Jaimini's Box",
    "kabeuchi"       : "かべうち",
    "kireicake"      : "Kirei Cake",
    "kissmanga"      : "KissManga",
    "lineblog"       : "LINE BLOG",
    "livedoor"       : "livedoor Blog",
    "mangadex"       : "MangaDex",
    "mangafox"       : "Manga Fox",
    "mangahere"      : "Manga Here",
    "mangakakalot"   : "MangaKakalot",
    "mangapark"      : "MangaPark",
    "mangastream"    : "Manga Stream",
    "mastodon.social": "mastodon.social",
    "myhentaigallery": "My Hentai Gallery",
    "myportfolio"    : "Adobe Portfolio",
    "nhentai"        : "nhentai",
    "nijie"          : "nijie",
    "nozomi"         : "Nozomi.la",
    "nsfwalbum"      : "NSFWalbum.com",
    "nyafuu"         : "Nyafuu Archive",
    "paheal"         : "rule #34",
    "powermanga"     : "PowerManga",
    "readcomiconline": "Read Comic Online",
    "rbt"            : "RebeccaBlackTech",
    "redgifs"        : "RedGIFs",
    "rule34"         : "Rule 34",
    "sankaku"        : "Sankaku Channel",
    "sankakucomplex" : "Sankaku Complex",
    "seaotterscans"  : "Sea Otter Scans",
    "seiga"          : "Niconico Seiga",
    "senmanga"       : "Sen Manga",
    "sensescans"     : "Sense-Scans",
    "sexcom"         : "Sex.com",
    "simplyhentai"   : "Simply Hentai",
    "slickpic"       : "SlickPic",
    "slideshare"     : "SlideShare",
    "smugmug"        : "SmugMug",
    "speakerdeck"    : "Speaker Deck",
    "subscribestar"  : "SubscribeStar",
    "thebarchive"    : "The /b/ Archive",
    "vanillarock"    : "もえぴりあ",
    "vsco"           : "VSCO",
    "webtoons"       : "Webtoon",
    "wikiart"        : "WikiArt.org",
    "worldthree"     : "World Three",
    "xhamster"       : "xHamster",
    "xvideos"        : "XVideos",
    "yuki"           : "yuki.la 4chan archive",
}

SUBCATEGORY_MAP = {
    "doujin" : "Doujin",
    "gallery": "Galleries",
    "image"  : "individual Images",
    "issue"  : "Comic Issues",
    "manga"  : "Manga",
    "popular": "Popular Images",
    "recent" : "Recent Images",
    "search" : "Search Results",
    "status" : "Images from Statuses",
    "tag"    : "Tag Searches",
    "user"   : "User Profiles",
    "following"    : "",
    "related-pin"  : "related Pins",
    "related-board": "",

    "artstation": {
        "artwork": "Artwork Listings",
    },
    "deviantart": {
        "stash": "Sta.sh",
    },
    "hentaifoundry": {
        "story": "",
    },
    "instagram": {
        "posts": "",
        "saved": "Saved Posts",
    },
    "newgrounds": {
        "art"  : "Art",
        "audio": "Audio",
        "media": "Media Files",
    },
    "pinterest": {
        "board": "",
        "pinit": "pin.it Links",
    },
    "pixiv": {
        "me"  : "pixiv.me Links",
        "work": "individual Images",
    },
    "smugmug": {
        "path": "Images from Users and Folders",
    },
    "twitter": {
        "media": "Media Timelines",
        "list-members": "List Members",
    },
    "wikiart": {
        "artists": "Artist Listings",
    },
    "weasyl": {
        "journals"   : "",
        "submissions": "",
    },
}

_OAUTH = "`OAuth <https://github.com/mikf/gallery-dl#oauth>`__"
_COOKIES = "`Cookies <https://github.com/mikf/gallery-dl#cookies>`__"
_APIKEY_WH = "`API Key <configuration.rst#extractorwallhavenapi-key>`__"
_APIKEY_WY = "`API Key <configuration.rst#extractorweasylapi-key>`__"

AUTH_MAP = {
    "aryion"         : "Supported",
    "baraag"         : _OAUTH,
    "danbooru"       : "Supported",
    "deviantart"     : _OAUTH,
    "e621"           : "Supported",
    "e-hentai"       : "Supported",
    "exhentai"       : "Supported",
    "flickr"         : _OAUTH,
    "furaffinity"    : _COOKIES,
    "idolcomplex"    : "Supported",
    "imgbb"          : "Supported",
    "inkbunny"       : "Supported",
    "instagram"      : "Supported",
    "mangoxo"        : "Supported",
    "mastodon.social": _OAUTH,
    "newgrounds"     : "Supported",
    "nijie"          : "Required",
    "patreon"        : _COOKIES,
    "pawoo"          : _OAUTH,
    "pinterest"      : "Supported",
    "pixiv"          : "Required",
    "reddit"         : _OAUTH,
    "sankaku"        : "Supported",
    "seiga"          : "Required",
    "smugmug"        : _OAUTH,
    "subscribestar"  : "Supported",
    "tsumino"        : "Supported",
    "tumblr"         : _OAUTH,
    "twitter"        : "Supported",
    "wallhaven"      : _APIKEY_WH,
    "weasyl"         : _APIKEY_WY,
}

IGNORE_LIST = (
    "directlink",
    "oauth",
    "recursive",
    "test",
)


def domain(cls):
    """Return the web-domain related to an extractor class"""
    url = sys.modules[cls.__module__].__doc__.split()[-1]
    if url.startswith("http"):
        return url

    if hasattr(cls, "root") and cls.root:
        return cls.root + "/"

    if hasattr(cls, "https"):
        scheme = "https" if cls.https else "http"
        netloc = cls.__doc__.split()[-1]
        return "{}://{}/".format(scheme, netloc)

    test = next(cls._get_tests(), None)
    if test:
        url = test[0]
        return url[:url.find("/", 8)+1]

    return ""


def category_text(cls):
    """Return a human-readable representation of a category"""
    c = cls.category
    return CATEGORY_MAP.get(c) or c.capitalize()


def subcategory_text(cls):
    """Return a human-readable representation of a subcategory"""
    c, sc = cls.category, cls.subcategory

    if c in SUBCATEGORY_MAP:
        scm = SUBCATEGORY_MAP[c]
        if sc in scm:
            return scm[sc]

    if sc in SUBCATEGORY_MAP:
        return SUBCATEGORY_MAP[sc]

    sc = sc.capitalize()
    return sc if sc.endswith("s") else sc + "s"


def category_key(cls):
    """Generate sorting keys by category"""
    key = category_text(cls).lower()
    if cls.__module__.endswith(".imagehosts"):
        key = "zz" + key
    return key


def subcategory_key(cls):
    """Generate sorting keys by subcategory"""
    if cls.subcategory == "issue":
        return "A"
    return cls.subcategory


def build_extractor_list():
    """Generate a sorted list of lists of extractor classes"""
    extractors = collections.defaultdict(list)

    # get lists of extractor classes grouped by category
    for extr in extractor.extractors():
        if not extr.category or extr.category in IGNORE_LIST:
            continue
        extractors[extr.category].append(extr)

    # sort extractor lists with the same category
    for extrlist in extractors.values():
        extrlist.sort(key=subcategory_key)

    # ugly hack to add e-hentai.org
    eh = []
    for extr in extractors["exhentai"]:
        class eh_extr(extr):
            category = "e-hentai"
            root = "https://e-hentai.org"
        eh.append(eh_extr)
    extractors["e-hentai"] = eh

    # sort lists by category
    return sorted(
        extractors.values(),
        key=lambda lst: category_key(lst[0]),
    )


# define table columns
COLUMNS = (
    ("Site", 20,
     lambda x: category_text(x[0])),
    ("URL" , 35,
     lambda x: domain(x[0])),
    ("Capabilities", 50,
     lambda x: ", ".join(subcategory_text(extr) for extr in x
                         if subcategory_text(extr))),
    ("Authentication", 16,
     lambda x: AUTH_MAP.get(x[0].category, "")),
)


def write_output(fobj, columns, extractors):

    def pad(output, col, category=None):
        size = col[1]
        output = output if isinstance(output, str) else col[2](output)

        if len(output) > size and col[0][0] != "A":
            sub = "|{}-{}|".format(category, col[0][0])
            subs.append((sub, output))
            output = sub

        return output + " " * (size - len(output))

    w = fobj.write
    subs = []

    # caption
    w("Supported Sites\n")
    w("===============\n")
    w("Unless otherwise known, assume all sites to be NSFW\n\n")

    # table head
    sep = " ".join("=" * c[1] for c in columns) + "\n"
    w(sep)
    w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
    w(sep)

    # table body
    for lst in extractors:
        w(" ".join(
            pad(col[2](lst), col, lst[0].category)
            for col in columns
        ).strip())
        w("\n")

    # table bottom
    w(sep)
    w("\n")

    # substitutions
    for sub, value in subs:
        w(".. {} replace:: {}\n".format(sub, value))


outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
with open(util.path("docs", outfile), "w") as file:
    write_output(file, COLUMNS, build_extractor_list())
-												update/cleanup Python dev scripts

- put common code in its own util.py file
- same Python3 shebang for all scripts
- add file docstrings
- fix format string replacement fields in man page template

											
										
										
											6 years ago
+								#!/usr/bin/env python3
 								# -*- coding: utf-8 -*-
 								"""Generate a reStructuredText document with all supported sites"""
-												add a script to automatically build a list of supported sites

											
										
										
											8 years ago
 								import sys
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								import collections
-												add a script to automatically build a list of supported sites

											
										
										
											8 years ago
-												update/cleanup Python dev scripts

- put common code in its own util.py file
- same Python3 shebang for all scripts
- add file docstrings
- fix format string replacement fields in man page template

											
										
										
											6 years ago
+								import util
 								from gallery_dl import extractor
-												add a script to automatically build a list of supported sites

											
										
										
											8 years ago
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								CATEGORY_MAP = {
-												update supportedsites.rst

											
										
										
											7 years ago
+								    "2chan"          : "Futaba Channel",
-												[35photo] add user-, genre-, and image-extractors (#162)

											
										
										
											6 years ago
+								    "35photo"        : "35PHOTO",
-												[adultempire] add gallery extractor (closes #340)

											
										
										
											5 years ago
+								    "adultempire"    : "Adult Empire",
-												update supportedsites.rst

											
										
										
											7 years ago
+								    "archivedmoe"    : "Archived.Moe",
 								    "archiveofsins"  : "Archive of Sins",
-												check supportedsites.rst in release script

											
										
										
											7 years ago
+								    "artstation"     : "ArtStation",
-												[aryion] add gallery and post extractors (#390, #673)

											
										
										
											5 years ago
+								    "aryion"         : "Eka's Portal",
-												[foolfuuka] add support for more sites (#18)

- https://arch.b4k.co
- https://archive.whatisthisimnotgoodwithcomputers.com
- https://archive.yeet.net

Notes:
- The name "whatisthisimnotgoodwithcomputers" is way too long ...
- archive.yeet.net is out of date and also blocked by 4chan servers
  - newest threads are 2 weeks old
  - using "https://archive.yeet.net" as Referer header results in
    "403 Forbidden" when accessing 4chan

											
										
										
											7 years ago
+								    "b4k"            : "arch.b4k.co",
-												[mastodon] add access tokens for mastodon.social and baraag.net

(closes #665)

											
										
										
											5 years ago
+								    "baraag"         : "baraag",
-												[bcy] add user and post extractors (#592)

											
										
										
											5 years ago
+								    "bcy"            : "半次元",
-												[bobx] add gallery and model extractors

											
										
										
											6 years ago
+								    "bobx"           : "BobX",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "deviantart"     : "DeviantArt",
 								    "dokireader"     : "Doki Reader",
 								    "dynastyscans"   : "Dynasty Reader",
 								    "e621"           : "e621",
-												have e-hentai and exhentai on supportedsites.rst (#365)

											
										
										
											5 years ago
+								    "e-hentai"       : "E-Hentai",
 								    "exhentai"       : "ExHentai",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "fallenangels"   : "Fallen Angels Scans",
-												[shopify] add generic collection and product extractors (#175)

with fashionnova.com  as a default domain

											
										
										
											6 years ago
+								    "fashionnova"    : "Fashion Nova",
-												[furaffinity] add extractors (#284)

											
										
										
											5 years ago
+								    "furaffinity"    : "Fur Affinity",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "hbrowse"        : "HBrowse",
 								    "hentai2read"    : "Hentai2Read",
-												[hentaicafe] add chapter and manga extractors (#101)

											
										
										
											6 years ago
+								    "hentaicafe"     : "Hentai Cafe",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "hentaifoundry"  : "Hentai Foundry",
-												[hentaifox] add chapter extractor (#160)

											
										
										
											6 years ago
+								    "hentaifox"      : "HentaiFox",
-												[hentaihand] add extractors (closes #605)

											
										
										
											5 years ago
+								    "hentaihand"     : "HentaiHand",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "hentaihere"     : "HentaiHere",
 								    "hitomi"         : "Hitomi.la",
-												[idolcomplex] add support for idol.sankakucomplex.com

											
										
										
											7 years ago
+								    "idolcomplex"    : "Idol Complex",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "imagebam"       : "ImageBam",
 								    "imagefap"       : "ImageFap",
-												[imgbb] add album extractor (#361)

											
										
										
											5 years ago
+								    "imgbb"          : "ImgBB",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "imgbox"         : "imgbox",
-												[imagechest] Add new extractor for ImageChest (#750)

* [imagechest] Add new extractor for ImageChest

* [imagechest] Fix flake8 compliance issues
											
										
										
											4 years ago
+								    "imagechest"     : "ImageChest",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "imgth"          : "imgth",
 								    "imgur"          : "imgur",
 								    "jaiminisbox"    : "Jaimini's Box",
-												[kabeuchi] add 'user' extractor (closes #561)

											
										
										
											5 years ago
+								    "kabeuchi"       : "かべうち",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "kireicake"      : "Kirei Cake",
 								    "kissmanga"      : "KissManga",
-												[lineblog] add blog and post extractors (closes #404)

											
										
										
											5 years ago
+								    "lineblog"       : "LINE BLOG",
-												[livedoor] add blog- and post-extractors (#190)

											
										
										
											6 years ago
+								    "livedoor"       : "livedoor Blog",
-												[mangadex] general improvements

- support >100 chapter entries per manga
- custom archive ID format
- detect non-existing chapters

											
										
										
											7 years ago
+								    "mangadex"       : "MangaDex",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "mangafox"       : "Manga Fox",
 								    "mangahere"      : "Manga Here",
-												[mangakakalot] Added extractors for MangaKakalot (#876)


											
										
										
											4 years ago
+								    "mangakakalot"   : "MangaKakalot",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "mangapark"      : "MangaPark",
 								    "mangastream"    : "Manga Stream",
-												[mastodon] add access tokens for mastodon.social and baraag.net

(closes #665)

											
										
										
											5 years ago
+								    "mastodon.social": "mastodon.social",
-												[myhentaigallery] update and fix extraction (#1001)

- extract more metadata
- match "/show/" URLs
- complete test results
- fix missing images for lines starting with " <img"
- fix missing comma in supportedsites.py

											
										
										
											4 years ago
+								    "myhentaigallery": "My Hentai Gallery",
-												[myportfolio] add user and gallery extractors (#95)

											
										
										
											6 years ago
+								    "myportfolio"    : "Adobe Portfolio",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "nhentai"        : "nhentai",
 								    "nijie"          : "nijie",
-												[nozomi] add post and tag extractors (#388)

											
										
										
											5 years ago
+								    "nozomi"         : "Nozomi.la",
-												[nsfwalbum] add album extractor (closes #287)

											
										
										
											5 years ago
+								    "nsfwalbum"      : "NSFWalbum.com",
-												update supportedsites.rst

											
										
										
											7 years ago
+								    "nyafuu"         : "Nyafuu Archive",
-												[paheal] add tag- and post-extractors (closes #69)

											
										
										
											7 years ago
+								    "paheal"         : "rule #34",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "powermanga"     : "PowerManga",
 								    "readcomiconline": "Read Comic Online",
-												update supportedsites.rst

											
										
										
											7 years ago
+								    "rbt"            : "RebeccaBlackTech",
-												[redgifs] add image extractor (#724)

											
										
										
											4 years ago
+								    "redgifs"        : "RedGIFs",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "rule34"         : "Rule 34",
 								    "sankaku"        : "Sankaku Channel",
-												[sankakucomplex] move article extractor to its own module (#258)

											
										
										
											5 years ago
+								    "sankakucomplex" : "Sankaku Complex",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "seaotterscans"  : "Sea Otter Scans",
 								    "seiga"          : "Niconico Seiga",
 								    "senmanga"       : "Sen Manga",
 								    "sensescans"     : "Sense-Scans",
-												[sexcom] add pin and board extractors (#147)

											
										
										
											5 years ago
+								    "sexcom"         : "Sex.com",
-												[simplyhentai] add gallery extractor (#89)

											
										
										
											6 years ago
+								    "simplyhentai"   : "Simply Hentai",
-												[slickpic] add album extractor (#249)

											
										
										
											5 years ago
+								    "slickpic"       : "SlickPic",
-												[slideshare] improve metadata; flake8

- added 'views' and 'published' keywords
- fixed longer titles and descriptions

											
										
										
											7 years ago
+								    "slideshare"     : "SlideShare",
-												[smugmug] added image and album extractor

just some initial code that still requires a lot of work ...

TODO:
- folders
- old-style albums (which are nearly all of them ...)
- images from users
- OAuth

It could also happen that the API credentials used will become invalid
whenever my 14 day trial period ends (7 days remaining), but that
would just require users to supply their own.

											
										
										
											6 years ago
+								    "smugmug"        : "SmugMug",
-												[speakerdeck] Add a new extractor for speakerdeck.com (#726)


											
										
										
											4 years ago
+								    "speakerdeck"    : "Speaker Deck",
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											4 years ago
+								    "subscribestar"  : "SubscribeStar",
-												update supportedsites.rst

											
										
										
											7 years ago
+								    "thebarchive"    : "The /b/ Archive",
-												[vanillarock] add post and tag extractors (closes #254)

											
										
										
											5 years ago
+								    "vanillarock"    : "もえぴりあ",
-												[vsco] add user extractor (#331)

											
										
										
											5 years ago
+								    "vsco"           : "VSCO",
-												[webtoons] Add a new extractor for webtoons.com (#761)

The webtoons extractor can extract episode and entire comic (all
episodes) from webtoons.com.

All the logic of the extractors should be trivial except for a couple
of kludges needed:

 - `ageGatePass' cookie is always set to avoid possible redirect and stop of
    extraction, especially in the comic extractor
 - The image URLs returned by the episode extractor could not be fetched
   directly and the `Referer:' HTTP header needs to be passed to fetch them

Close #593.
											
										
										
											4 years ago
+								    "webtoons"       : "Webtoon",
-												[wikiart] add extractors (#179)

for
- artists:          https://www.wikiart.org/en/thomas-cole
- artist-listings:  https://www.wikiart.org/en/artists-by-century/12
- artwork-listings: https://www.wikiart.org/en/paintings-by-media/grisaille

											
										
										
											6 years ago
+								    "wikiart"        : "WikiArt.org",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "worldthree"     : "World Three",
-												[xhamster] add gallery & user extractor (#281)

											
										
										
											5 years ago
+								    "xhamster"       : "xHamster",
-												[xvideos] add user profile extractor (#45)

											
										
										
											7 years ago
+								    "xvideos"        : "XVideos",
-												[yuki] add thread extractor (closes #111)

											
										
										
											6 years ago
+								    "yuki"           : "yuki.la 4chan archive",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								}
 								SUBCATEGORY_MAP = {
-												[nijie] add favorites extractor

adds support for 'https://nijie.info/user_like_illust_view.php?id=...'

											
										
										
											7 years ago
+								    "doujin" : "Doujin",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "gallery": "Galleries",
 								    "image"  : "individual Images",
-												remove dashes from subcategory names in supportedsites.rst

											
										
										
											5 years ago
+								    "issue"  : "Comic Issues",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "manga"  : "Manga",
-												[booru] add extractors for "Popular" images

											
										
										
											7 years ago
+								    "popular": "Popular Images",
-												[hentaifoundry] add 'popular' and 'recent' extractors

for "Popular Pictures" and "Recent Pictures" listings

											
										
										
											6 years ago
+								    "recent" : "Recent Images",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											7 years ago
+								    "search" : "Search Results",
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								    "status" : "Images from Statuses",
-												remove dashes from subcategory names in supportedsites.rst

											
										
										
											5 years ago
+								    "tag"    : "Tag Searches",
-												change text representation of user extractors to "User Profiles"

											
										
										
											5 years ago
+								    "user"   : "User Profiles",
-												fix auto-generation of supportedsites.rst

											
										
										
											5 years ago
+								    "following"    : "",
-												[pinterest] add extractors for related pins

Related pins can not be accessed by adding a "#related" fragment
to the end of a Pinterest URL, for example:
- https://www.pinterest.com/pin/858146903966145189/#related
- https://www.pinterest.com/g1952849/test-/#related

There are no explicit real URLs for related pins,
using an option to enable them results in "clunky" code,
and a custom "related:<URL>" scheme doesn't feel right either.

											
										
										
											6 years ago
+								    "related-pin"  : "related Pins",
 								    "related-board": "",
-												fix auto-generation of supportedsites.rst

											
										
										
											5 years ago
 								    "artstation": {
 								        "artwork": "Artwork Listings",
 								    },
 								    "deviantart": {
 								        "stash": "Sta.sh",
 								    },
-												[hentaifoundry] add support for stories (closes #734)

											
										
										
											4 years ago
+								    "hentaifoundry": {
 								        "story": "",
 								    },
-												[instagram] use 'itertools.chain()'

											
										
										
											5 years ago
+								    "instagram": {
-												[instagram] add 'include' option (closes #1180)

Split the functionality of the old 'user' extractor into separate
'posts' and 'highlights' extractors, which respond to virtual URLs
('/<user>/posts' and '/<user>/highlights')

											
										
										
											4 years ago
+								        "posts": "",
-												[instagram] use 'itertools.chain()'

											
										
										
											5 years ago
+								        "saved": "Saved Posts",
 								    },
-												fix auto-generation of supportedsites.rst

											
										
										
											5 years ago
+								    "newgrounds": {
 								        "art"  : "Art",
 								        "audio": "Audio",
 								        "media": "Media Files",
 								    },
 								    "pinterest": {
-												[pinterest] add support for getting all boards of a user

(#1205)

											
										
										
											4 years ago
+								        "board": "",
-												fix auto-generation of supportedsites.rst

											
										
										
											5 years ago
+								        "pinit": "pin.it Links",
 								    },
 								    "pixiv": {
 								        "me"  : "pixiv.me Links",
 								        "work": "individual Images",
 								    },
 								    "smugmug": {
 								        "path": "Images from Users and Folders",
 								    },
 								    "twitter": {
 								        "media": "Media Timelines",
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
+								        "list-members": "List Members",
-												fix auto-generation of supportedsites.rst

											
										
										
											5 years ago
+								    },
 								    "wikiart": {
 								        "artists": "Artist Listings",
 								    },
-												[weasyl] update and simplify

- simplify 'pattern' regexps
- parse 'posted_at' as 'date'
- use unaltered 'title' ({title!l:R /_/} to lowercase and replace spaces)

											
										
										
											4 years ago
+								    "weasyl": {
 								        "journals"   : "",
 								        "submissions": "",
 								    },
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
+								}
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											4 years ago
+								_OAUTH = "`OAuth <https://github.com/mikf/gallery-dl#oauth>`__"
 								_COOKIES = "`Cookies <https://github.com/mikf/gallery-dl#cookies>`__"
 								_APIKEY_WH = "`API Key <configuration.rst#extractorwallhavenapi-key>`__"
 								_APIKEY_WY = "`API Key <configuration.rst#extractorweasylapi-key>`__"
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											4 years ago
-												add 'Authentication' column to supportedsites.rst

											
										
										
											7 years ago
+								AUTH_MAP = {
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											4 years ago
+								    "aryion"         : "Supported",
 								    "baraag"         : _OAUTH,
 								    "danbooru"       : "Supported",
 								    "deviantart"     : _OAUTH,
 								    "e621"           : "Supported",
 								    "e-hentai"       : "Supported",
 								    "exhentai"       : "Supported",
 								    "flickr"         : _OAUTH,
 								    "furaffinity"    : _COOKIES,
 								    "idolcomplex"    : "Supported",
 								    "imgbb"          : "Supported",
 								    "inkbunny"       : "Supported",
 								    "instagram"      : "Supported",
 								    "mangoxo"        : "Supported",
 								    "mastodon.social": _OAUTH,
 								    "newgrounds"     : "Supported",
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											4 years ago
+								    "nijie"          : "Required",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											4 years ago
+								    "patreon"        : _COOKIES,
 								    "pawoo"          : _OAUTH,
 								    "pinterest"      : "Supported",
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											4 years ago
+								    "pixiv"          : "Required",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											4 years ago
+								    "reddit"         : _OAUTH,
-												[sankaku] reimplement login support (#1176, #1182)

											
										
										
											4 years ago
+								    "sankaku"        : "Supported",
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											4 years ago
+								    "seiga"          : "Required",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											4 years ago
+								    "smugmug"        : _OAUTH,
 								    "subscribestar"  : "Supported",
 								    "tsumino"        : "Supported",
 								    "tumblr"         : _OAUTH,
 								    "twitter"        : "Supported",
 								    "wallhaven"      : _APIKEY_WH,
 								    "weasyl"         : _APIKEY_WY,
-												add 'Authentication' column to supportedsites.rst

											
										
										
											7 years ago
+								}
 								IGNORE_LIST = (
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    "directlink",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											7 years ago
+								    "oauth",
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    "recursive",
 								    "test",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											7 years ago
+								)
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								def domain(cls):
 								    """Return the web-domain related to an extractor class"""
 								    url = sys.modules[cls.__module__].__doc__.split()[-1]
 								    if url.startswith("http"):
 								        return url
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    if hasattr(cls, "root") and cls.root:
 								        return cls.root + "/"
-												update build_supportedsites.py script

											
										
										
											6 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    if hasattr(cls, "https"):
 								        scheme = "https" if cls.https else "http"
 								        netloc = cls.__doc__.split()[-1]
 								        return "{}://{}/".format(scheme, netloc)
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    test = next(cls._get_tests(), None)
 								    if test:
 								        url = test[0]
 								        return url[:url.find("/", 8)+1]
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    return ""
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								def category_text(cls):
 								    """Return a human-readable representation of a category"""
 								    c = cls.category
 								    return CATEGORY_MAP.get(c) or c.capitalize()
-												update build_supportedsites.py script

											
										
										
											6 years ago
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								def subcategory_text(cls):
 								    """Return a human-readable representation of a subcategory"""
-												fix auto-generation of supportedsites.rst

											
										
										
											5 years ago
+								    c, sc = cls.category, cls.subcategory
 								    if c in SUBCATEGORY_MAP:
 								        scm = SUBCATEGORY_MAP[c]
 								        if sc in scm:
 								            return scm[sc]
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    if sc in SUBCATEGORY_MAP:
 								        return SUBCATEGORY_MAP[sc]
-												fix auto-generation of supportedsites.rst

											
										
										
											5 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    sc = sc.capitalize()
 								    return sc if sc.endswith("s") else sc + "s"
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								def category_key(cls):
 								    """Generate sorting keys by category"""
 								    key = category_text(cls).lower()
 								    if cls.__module__.endswith(".imagehosts"):
 								        key = "zz" + key
 								    return key
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								def subcategory_key(cls):
 								    """Generate sorting keys by subcategory"""
-												change text representation of user extractors to "User Profiles"

											
										
										
											5 years ago
+								    if cls.subcategory == "issue":
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								        return "A"
 								    return cls.subcategory
-												update supportedsites.rst

											
										
										
											7 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								def build_extractor_list():
 								    """Generate a sorted list of lists of extractor classes"""
 								    extractors = collections.defaultdict(list)
-												add simple imagehosts to list of supported sites

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    # get lists of extractor classes grouped by category
 								    for extr in extractor.extractors():
-												update build_supportedsites.py

											
										
										
											6 years ago
+								        if not extr.category or extr.category in IGNORE_LIST:
-												add 'Authentication' column to supportedsites.rst

											
										
										
											7 years ago
+								            continue
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								        extractors[extr.category].append(extr)
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    # sort extractor lists with the same category
 								    for extrlist in extractors.values():
 								        extrlist.sort(key=subcategory_key)
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												have e-hentai and exhentai on supportedsites.rst (#365)

											
										
										
											5 years ago
+								    # ugly hack to add e-hentai.org
 								    eh = []
 								    for extr in extractors["exhentai"]:
 								        class eh_extr(extr):
 								            category = "e-hentai"
 								            root = "https://e-hentai.org"
 								        eh.append(eh_extr)
 								    extractors["e-hentai"] = eh
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    # sort lists by category
 								    return sorted(
 								        extractors.values(),
 								        key=lambda lst: category_key(lst[0]),
 								    )
 								# define table columns
 								COLUMNS = (
 								    ("Site", 20,
 								     lambda x: category_text(x[0])),
 								    ("URL" , 35,
 								     lambda x: domain(x[0])),
 								    ("Capabilities", 50,
 								     lambda x: ", ".join(subcategory_text(extr) for extr in x
 								                         if subcategory_text(extr))),
 								    ("Authentication", 16,
 								     lambda x: AUTH_MAP.get(x[0].category, "")),
 								)
-												update build_supportedsites.py

											
										
										
											6 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								def write_output(fobj, columns, extractors):
-												add mastodon/foolslide/foolfuuka examples to example config

											
										
										
											6 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    def pad(output, col, category=None):
 								        size = col[1]
 								        output = output if isinstance(output, str) else col[2](output)
-												update handling of extractor URL patterns

When loading extractor classes during 'extractor.find(…)', their
'pattern' attribute will be replaced with a compiled version of itself.

											
										
										
											6 years ago
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											4 years ago
+								        if len(output) > size and col[0][0] != "A":
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								            sub = "|{}-{}|".format(category, col[0][0])
 								            subs.append((sub, output))
 								            output = sub
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								        return output + " " * (size - len(output))
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    w = fobj.write
 								    subs = []
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    # caption
 								    w("Supported Sites\n")
 								    w("===============\n")
-												add warning about NSFW sites in supportedsites.rst (#335)

											
										
										
											5 years ago
+								    w("Unless otherwise known, assume all sites to be NSFW\n\n")
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    # table head
 								    sep = " ".join("=" * c[1] for c in columns) + "\n"
 								    w(sep)
 								    w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
 								    w(sep)
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    # table body
 								    for lst in extractors:
 								        w(" ".join(
 								            pad(col[2](lst), col, lst[0].category)
 								            for col in columns
 								        ).strip())
 								        w("\n")
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    # table bottom
 								    w(sep)
 								    w("\n")
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    # substitutions
 								    for sub, value in subs:
 								        w(".. {} replace:: {}\n".format(sub, value))
-												improve supportedsites.rst and build script

											
										
										
											8 years ago
-												add a script to automatically build a list of supported sites

											
										
										
											8 years ago
 								outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
-												update/cleanup Python dev scripts

- put common code in its own util.py file
- same Python3 shebang for all scripts
- add file docstrings
- fix format string replacement fields in man page template

											
										
										
											6 years ago
+								with open(util.path("docs", outfile), "w") as file:
-												simplify build_supportedsites.py

											
										
										
											6 years ago
+								    write_output(file, COLUMNS, build_extractor_list())