From cc36f8858689bec4e00eee0979136fce6b929e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 20 Apr 2018 14:53:21 +0200 Subject: [PATCH] rename safe_int to parse_int; move parse_* to text module --- gallery_dl/downloader/http.py | 6 ++-- gallery_dl/extractor/artstation.py | 4 +-- gallery_dl/extractor/deviantart.py | 4 +-- gallery_dl/extractor/dynastyscans.py | 4 +-- gallery_dl/extractor/exhentai.py | 18 +++++------ gallery_dl/extractor/fallenangels.py | 4 +-- gallery_dl/extractor/foolslide.py | 8 ++--- gallery_dl/extractor/gelbooru.py | 4 +-- gallery_dl/extractor/hbrowse.py | 12 +++---- gallery_dl/extractor/hentai2read.py | 15 ++++----- gallery_dl/extractor/hentaifoundry.py | 8 ++--- gallery_dl/extractor/hentaihere.py | 14 ++++----- gallery_dl/extractor/hitomi.py | 2 +- gallery_dl/extractor/imagefap.py | 4 +-- gallery_dl/extractor/kissmanga.py | 10 +++--- gallery_dl/extractor/komikcast.py | 8 ++--- gallery_dl/extractor/mangadex.py | 20 ++++++------ gallery_dl/extractor/mangafox.py | 4 +-- gallery_dl/extractor/mangahere.py | 14 ++++----- gallery_dl/extractor/mangapark.py | 12 +++---- gallery_dl/extractor/mangareader.py | 12 +++---- gallery_dl/extractor/mangastream.py | 6 ++-- gallery_dl/extractor/nhentai.py | 4 +-- gallery_dl/extractor/nijie.py | 8 ++--- gallery_dl/extractor/paheal.py | 2 +- gallery_dl/extractor/pinterest.py | 10 +++--- gallery_dl/extractor/readcomiconline.py | 6 ++-- gallery_dl/extractor/sankaku.py | 14 ++++----- gallery_dl/extractor/seiga.py | 8 ++--- gallery_dl/extractor/senmanga.py | 4 +-- gallery_dl/extractor/slideshare.py | 4 +-- gallery_dl/extractor/spectrumnexus.py | 8 ++--- gallery_dl/extractor/xvideos.py | 10 +++--- gallery_dl/text.py | 32 ++++++++++++++++++- gallery_dl/util.py | 28 +---------------- test/test_text.py | 42 +++++++++++++++++++++++++ test/test_util.py | 26 --------------- 37 files changed, 210 insertions(+), 189 deletions(-) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index bf461ae2..b590485f 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -12,7 +12,7 @@ import time import mimetypes from requests.exceptions import ConnectionError, Timeout from .common import DownloaderBase -from .. import util, exception +from .. import text, exception class Downloader(DownloaderBase): @@ -28,7 +28,7 @@ class Downloader(DownloaderBase): self.chunk_size = 16384 if self.rate: - self.rate = util.parse_bytes(self.rate) + self.rate = text.parse_bytes(self.rate) if not self.rate: self.log.warning("Invalid rate limit specified") elif self.rate < self.chunk_size: @@ -61,7 +61,7 @@ class Downloader(DownloaderBase): else: self.response.raise_for_status() - return offset, util.safe_int(size) + return offset, text.parse_int(size) def receive(self, file): if self.rate: diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index fbea9595..6f8dbd53 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -158,7 +158,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor): def __init__(self, match): ArtstationExtractor.__init__(self, match) - self.album_id = util.safe_int(match.group(2)) + self.album_id = text.parse_int(match.group(2)) def metadata(self): userinfo = self.get_user_info(self.user) @@ -256,7 +256,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor): def _id_from_url(url): """Get an image's submission ID from its URL""" parts = url.split("/") - return util.safe_int("".join(parts[7:10])) + return text.parse_int("".join(parts[7:10])) class ArtstationSearchExtractor(ArtstationExtractor): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 19f7a58d..ee156342 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -9,7 +9,7 @@ """Extract images from https://www.deviantart.com/""" from .common import Extractor, Message -from .. import text, util, exception +from .. import text, exception from ..cache import cache, memcache import itertools import datetime @@ -62,7 +62,7 @@ class DeviantartExtractor(Extractor): if "videos" in deviation: video = max(deviation["videos"], - key=lambda x: util.safe_int(x["quality"][:-1])) + key=lambda x: text.parse_int(x["quality"][:-1])) yield self.commit(deviation, video) if "flash" in deviation: diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index bd9107ac..d63ddc0a 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -9,7 +9,7 @@ """Extract manga-chapters from https://dynasty-scans.com/""" from .common import ChapterExtractor -from .. import text, util +from .. import text import re import json @@ -53,7 +53,7 @@ class DynastyscansChapterExtractor(ChapterExtractor): return { "manga": text.unescape(match.group(1)), - "chapter": util.safe_int(match.group(2)), + "chapter": text.parse_int(match.group(2)), "chapter_minor": match.group(3) or "", "title": text.unescape(match.group(4) or ""), "author": text.remove_html(author), diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 41eaeca1..2af58110 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -120,7 +120,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.key = {} self.count = 0 self.version, self.gid, self.token = match.groups() - self.gid = util.safe_int(self.gid) + self.gid = text.parse_int(self.gid) def items(self): self.login() @@ -163,7 +163,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["lang"] = util.language_to_code(data["language"]) data["title"] = text.unescape(data["title"]) data["title_jp"] = text.unescape(data["title_jp"]) - data["count"] = util.safe_int(data["count"]) + data["count"] = text.parse_int(data["count"]) data["gallery_size"] = util.parse_bytes( data["gallery_size"].rstrip("Bb")) return data @@ -245,17 +245,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def _parse_image_info(url): parts = url.split("/")[4].split("-") return { - "width": util.safe_int(parts[2]), - "height": util.safe_int(parts[3]), - "size": util.safe_int(parts[1]), + "width": text.parse_int(parts[2]), + "height": text.parse_int(parts[3]), + "size": text.parse_int(parts[1]), } @staticmethod def _parse_original_info(info): parts = info.lstrip().split(" ") return { - "width": util.safe_int(parts[0]), - "height": util.safe_int(parts[2]), + "width": text.parse_int(parts[0]), + "height": text.parse_int(parts[2]), "size": util.parse_bytes(parts[3] + parts[4][0]), } @@ -274,7 +274,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): def __init__(self, match): ExhentaiExtractor.__init__(self) self.params = text.parse_query(match.group(1) or "") - self.params["page"] = util.safe_int(self.params.get("page")) + self.params["page"] = text.parse_int(self.params.get("page")) self.url = self.root def items(self): @@ -308,7 +308,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): return Message.Queue, url, { "type": gtype, "date": date, - "gallery_id": util.safe_int(parts[1]), + "gallery_id": text.parse_int(parts[1]), "gallery_token": parts[2], "title": text.unescape(title), key: last, diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py index a4ea6f58..3cd3f7a2 100644 --- a/gallery_dl/extractor/fallenangels.py +++ b/gallery_dl/extractor/fallenangels.py @@ -98,8 +98,8 @@ class FallenangelsMangaExtractor(MangaExtractor): chapter, dot, minor = chapter.partition(".") results.append((url, { "manga": manga, "title": title, - "volume": util.safe_int(volume), - "chapter": util.safe_int(chapter), + "volume": text.parse_int(volume), + "chapter": text.parse_int(chapter), "chapter_minor": dot + minor, "lang": self.lang, "language": language, })) diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index 117c3bdc..cf92b3cf 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -50,8 +50,8 @@ class FoolslideExtractor(SharedConfigExtractor): lang = info[1].partition("-")[0] data["lang"] = lang data["language"] = util.code_to_language(lang) - data["volume"] = util.safe_int(info[2]) - data["chapter"] = util.safe_int(info[3]) + data["volume"] = text.parse_int(info[2]) + data["chapter"] = text.parse_int(info[3]) data["chapter_minor"] = "." + info[4] if len(info) >= 5 else "" return data @@ -75,7 +75,7 @@ class FoolslideChapterExtractor(FoolslideExtractor): imgs = self.get_images(page) data["count"] = len(imgs) - data["chapter_id"] = util.safe_int(imgs[0]["chapter_id"]) + data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"]) yield Message.Version, 1 yield Message.Directory, data @@ -88,7 +88,7 @@ class FoolslideChapterExtractor(FoolslideExtractor): except KeyError: pass for key in ("height", "id", "size", "width"): - image[key] = util.safe_int(image[key]) + image[key] = text.parse_int(image[key]) data.update(image) text.nameext_from_url(data["filename"], data) yield Message.Url, url, data diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 33abdbd4..110160a6 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -37,7 +37,7 @@ class GelbooruExtractor(SharedConfigExtractor): if isinstance(post, str): post = self.get_post_data(post) for key in ("id", "width", "height", "score", "change"): - post[key] = util.safe_int(post[key]) + post[key] = text.parse_int(post[key]) url = post["file_url"] post.update(data) yield Message.Url, url, text.nameext_from_url(url, post) @@ -174,7 +174,7 @@ class GelbooruPoolExtractor(GelbooruExtractor): raise exception.NotFoundError("pool") return { - "pool": util.safe_int(self.pool_id), + "pool": text.parse_int(self.pool_id), "pool_name": text.unescape(name), "count": len(self.posts), } diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 03232799..fde517ac 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -9,7 +9,7 @@ """Extract images from http://www.hbrowse.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util +from .. import text from urllib.parse import urljoin import json @@ -30,7 +30,7 @@ class HbrowseExtractor(): ), values=data) data["manga"] = text.unescape(data["manga"]) - data["total"] = util.safe_int(data["total"]) + data["total"] = text.parse_int(data["total"]) data["artist"] = text.remove_html(data["artist"]) data["origin"] = text.remove_html(data["origin"]) return data @@ -48,7 +48,7 @@ class HbrowseMangaExtractor(HbrowseExtractor, MangaExtractor): def chapters(self, page): results = [] data = self.parse_page(page, { - "manga_id": util.safe_int( + "manga_id": text.parse_int( self.url.rstrip("/").rpartition("/")[2]) }) @@ -59,7 +59,7 @@ class HbrowseMangaExtractor(HbrowseExtractor, MangaExtractor): if not url: return results title, pos = text.extract(page, '>View ', '<', pos) - data["chapter"] = util.safe_int(url.rpartition("/")[2][1:]) + data["chapter"] = text.parse_int(url.rpartition("/")[2][1:]) data["title"] = title results.append((urljoin(self.root, url), data.copy())) @@ -84,8 +84,8 @@ class HbrowseChapterExtractor(HbrowseExtractor, ChapterExtractor): def get_metadata(self, page): return self.parse_page(page, { - "manga_id": util.safe_int(self.gid), - "chapter": util.safe_int(self.chapter) + "manga_id": text.parse_int(self.gid), + "chapter": text.parse_int(self.chapter) }) def get_images(self, page): diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index 6d2cd75f..34a7749c 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -9,7 +9,7 @@ """Extract hentai-manga from https://hentai2read.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util +from .. import text import re import json @@ -36,7 +36,8 @@ class Hentai2readMangaExtractor(MangaExtractor): page, '', '') mtype, pos = text.extract( page, '[', ']', pos) - manga_id = util.safe_int(text.extract(page, 'data-mid="', '"', pos)[0]) + manga_id = text.parse_int(text.extract( + page, 'data-mid="', '"', pos)[0]) while True: chapter_id, pos = text.extract(page, ' data-cid="', '"', pos) @@ -49,8 +50,8 @@ class Hentai2readMangaExtractor(MangaExtractor): chapter, _, title = text.unescape(chapter).strip().partition(" - ") results.append((url, { "manga_id": manga_id, "manga": manga, "type": mtype, - "chapter_id": util.safe_int(chapter_id), - "chapter": util.safe_int(chapter), + "chapter_id": text.parse_int(chapter_id), + "chapter": text.parse_int(chapter), "title": title, "lang": "en", "language": "English", })) @@ -78,9 +79,9 @@ class Hentai2readChapterExtractor(ChapterExtractor): r"(\d+): (.+) . Page 1 ", title) return { "manga": match.group(1), - "manga_id": util.safe_int(manga_id), - "chapter": util.safe_int(self.chapter), - "chapter_id": util.safe_int(chapter_id), + "manga_id": text.parse_int(manga_id), + "chapter": text.parse_int(self.chapter), + "chapter_id": text.parse_int(chapter_id), "type": match.group(2), "author": match.group(3), "title": match.group(5), diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 2fe4daa7..d3d2e8a4 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -9,7 +9,7 @@ """Extract images from https://www.hentai-foundry.com/""" from .common import Extractor, Message -from .. import text, util, exception +from .. import text, exception class HentaifoundryUserExtractor(Extractor): @@ -69,7 +69,7 @@ class HentaifoundryUserExtractor(Extractor): page = response.text token, pos = text.extract(page, 'hidden" value="', '"') count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos) - return {"artist": self.artist, "count": util.safe_int(count)}, token + return {"artist": self.artist, "count": text.parse_int(count)}, token def get_image_metadata(self, url): """Collect metadata for an image""" @@ -79,7 +79,7 @@ class HentaifoundryUserExtractor(Extractor): page, 'Pictures » ', '<') part, pos = text.extract( page, '//pictures.hentai-foundry.com', '"', pos) - data = {"index": util.safe_int(index), "title": text.unescape(title)} + data = {"index": text.parse_int(index), "title": text.unescape(title)} text.nameext_from_url(part, data) return "https://pictures.hentai-foundry.com" + part, data @@ -161,7 +161,7 @@ class HentaifoundryImageExtractor(Extractor): url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos) data = { "artist": artist, - "index": util.safe_int(self.index), + "index": text.parse_int(self.index), "title": text.unescape(title), } text.nameext_from_url(url, data) diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py index d1c5c932..50150cb8 100644 --- a/gallery_dl/extractor/hentaihere.py +++ b/gallery_dl/extractor/hentaihere.py @@ -9,7 +9,7 @@ """Extract hentai-manga from https://hentaihere.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util +from .. import text import re import json @@ -32,7 +32,7 @@ class HentaihereMangaExtractor(MangaExtractor): def chapters(self, page): results = [] - manga_id = util.safe_int( + manga_id = text.parse_int( self.url.rstrip("/").rpartition("/")[2][1:]) manga, pos = text.extract( page, '', '') @@ -50,8 +50,8 @@ class HentaihereMangaExtractor(MangaExtractor): chapter, _, title = text.unescape(chapter).strip().partition(" - ") results.append((url, { "manga_id": manga_id, "manga": manga, "type": mtype, - "chapter_id": util.safe_int(chapter_id), - "chapter": util.safe_int(chapter), + "chapter_id": text.parse_int(chapter_id), + "chapter": text.parse_int(chapter), "title": title, "lang": "en", "language": "English", })) @@ -79,9 +79,9 @@ class HentaihereChapterExtractor(ChapterExtractor): match = re.match(pattern, title) return { "manga": match.group(1), - "manga_id": util.safe_int(self.manga_id), - "chapter": util.safe_int(self.chapter), - "chapter_id": util.safe_int(chapter_id), + "manga_id": text.parse_int(self.manga_id), + "chapter": text.parse_int(self.chapter), + "chapter_id": text.parse_int(chapter_id), "type": match.group(2), "title": match.group(3), "author": match.group(4), diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 64ae1e12..60d91a3f 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -30,7 +30,7 @@ class HitomiGalleryExtractor(ChapterExtractor): ] def __init__(self, match): - self.gid = util.safe_int(match.group(1)) + self.gid = text.parse_int(match.group(1)) url = "https://hitomi.la/galleries/{}.html".format(self.gid) ChapterExtractor.__init__(self, url) diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 97d8cb68..75f2e623 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -9,7 +9,7 @@ """Extract images from http://imagefap.com/""" from .common import Extractor, Message -from .. import text, util +from .. import text import json @@ -159,7 +159,7 @@ class ImagefapUserExtractor(ImagefapExtractor): yield Message.Version, 1 for gid, name in self.get_gallery_data(): url = "http://www.imagefap.com/gallery/" + gid - data = {"gallery_id": util.safe_int(gid), "title": name} + data = {"gallery_id": text.parse_int(gid), "title": name} yield Message.Queue, url, data def get_gallery_data(self): diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 36e1f02f..534c36bf 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from http://kissmanga.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util, cloudflare, aes, exception +from .. import text, cloudflare, aes, exception from ..cache import cache import re import hashlib @@ -56,8 +56,8 @@ class KissmangaBase(): ), data["chapter_string"]) volume, chapter, minor, title = match.groups() - data["volume"] = util.safe_int(volume) - data["chapter"] = util.safe_int(chapter) + data["volume"] = text.parse_int(volume) + data["chapter"] = text.parse_int(chapter) data["chapter_minor"] = "." + minor if minor else "" data["title"] = title if title and title != "Read Online" else "" return data @@ -89,7 +89,7 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor): url, _, chapter = item.partition(needle) data = { "manga": manga, "chapter_string": chapter, - "chapter_id": util.safe_int(url.rpartition("=")[2]), + "chapter_id": text.parse_int(url.rpartition("=")[2]), "lang": "en", "language": "English", } self.parse_chapter_string(data) @@ -128,7 +128,7 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor): data = { "manga": manga.strip(), "chapter_string": cinfo.strip(), - "chapter_id": util.safe_int(self.chapter_id), + "chapter_id": text.parse_int(self.chapter_id), "lang": "en", "language": "English", } diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py index 04805001..9270fdef 100644 --- a/gallery_dl/extractor/komikcast.py +++ b/gallery_dl/extractor/komikcast.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from https://komikcast.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util, cloudflare +from .. import text, cloudflare import re @@ -39,7 +39,7 @@ class KomikcastBase(): data["title"] = title.strip() else: data["title"] = "" - data["chapter"] = util.safe_int(chapter) + data["chapter"] = text.parse_int(chapter) data["lang"] = "id" data["language"] = "Indonesian" @@ -75,8 +75,8 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): page, '
', '