diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py index f5125eea..92ea6caf 100644 --- a/gallery_dl/extractor/2chan.py +++ b/gallery_dl/extractor/2chan.py @@ -60,8 +60,8 @@ class _2chanThreadExtractor(Extractor): def metadata(self, page): """Collect metadata for extractor-job""" - title = text.extract(page, "", "")[0] - title, _, boardname = title.rpartition(" - ") + title, _, boardname = text.extr( + page, "", "").rpartition(" - ") return { "server": self.server, "title": title, @@ -72,8 +72,8 @@ class _2chanThreadExtractor(Extractor): def posts(self, page): """Build a list of all post-objects""" - page = text.extract( - page, '
')[0] + page = text.extr( + page, '
') return [ self.parse(post) for post in page.split('') @@ -84,7 +84,7 @@ class _2chanThreadExtractor(Extractor): data = self._extract_post(post) if data["name"]: data["name"] = data["name"].strip() - path = text.extract(post, '', '')[0]) + 'id="ractive-public" type="text/plain">', '')) images = data.get("pictures") if images: diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 14d1e6b9..da2d8f2a 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -41,8 +41,8 @@ class ArtstationExtractor(Extractor): if adict["has_embedded_player"] and self.external: player = adict["player_embedded"] - url = text.extract(player, 'src="', '"')[0] or \ - text.extract(player, "src='", "'")[0] + url = (text.extr(player, 'src="', '"') or + text.extr(player, "src='", "'")) if url and not url.startswith(self.root): asset["extension"] = None yield Message.Url, "ytdl:" + url, asset diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index fa590b90..6f015728 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -128,8 +128,7 @@ class AryionExtractor(Extractor): # get filename from 'Content-Disposition' header cdis = headers["content-disposition"] - fname, _, ext = text.extract( - cdis, 'filename="', '"')[0].rpartition(".") + fname, _, ext = text.extr(cdis, 'filename="', '"').rpartition(".") if not fname: fname, ext = ext, fname diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py index 17b5f528..1b49d6a5 100644 --- a/gallery_dl/extractor/bbc.py +++ b/gallery_dl/extractor/bbc.py @@ -38,8 +38,8 @@ class BbcGalleryExtractor(GalleryExtractor): ) def metadata(self, page): - data = json.loads(text.extract( - page, '')[0]) + data = json.loads(text.extr( + page, '')) return { "programme": self.gallery_url.split("/")[4], "path": list(util.unique_sequence( diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py index 79828819..4eb446dc 100644 --- a/gallery_dl/extractor/bcy.py +++ b/gallery_dl/extractor/bcy.py @@ -97,7 +97,7 @@ class BcyExtractor(Extractor): url = "{}/item/detail/{}".format(self.root, post_id) page = self.request(url, notfound="post").text return json.loads( - text.extract(page, 'JSON.parse("', '");')[0] + text.extr(page, 'JSON.parse("', '");') .replace('\\\\u002F', '/') .replace('\\"', '"') )["detail"] diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 994a7016..cf332acb 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -119,8 +119,8 @@ class BehanceGalleryExtractor(BehanceExtractor): } page = self.request(url, cookies=cookies).text - data = json.loads(text.extract( - page, 'id="beconfig-store_state">', '')[0]) + data = json.loads(text.extr( + page, 'id="beconfig-store_state">', '')) return self._update(data["project"]["project"]) def get_images(self, data): @@ -137,7 +137,7 @@ class BehanceGalleryExtractor(BehanceExtractor): elif mtype == "video": page = self.request(module["src"]).text - url = text.extract(page, '', '<')[0]) + 'id="__NEXT_DATA__" type="application/json">', '<')) album = data["props"]["pageProps"]["album"] files = album["files"] except Exception as exc: diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 12218438..45beddf3 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -603,22 +603,22 @@ class DeviantartStashExtractor(DeviantartExtractor): page = self._limited_request(url).text if stash_id[0] == "0": - uuid = text.extract(page, '//deviation/', '"')[0] + uuid = text.extr(page, '//deviation/', '"') if uuid: deviation = self.api.deviation(uuid) - deviation["index"] = text.parse_int(text.extract( - page, 'gmi-deviationid="', '"')[0]) + deviation["index"] = text.parse_int(text.extr( + page, 'gmi-deviationid="', '"')) yield deviation return for item in text.extract_iter( page, 'class="stash-thumb-container', ''): - url = text.extract(item, '", "") url = extr(' src="', '"') - src = text.extract(src, 'href="', '"')[0] if "Source<" in src else "" + src = text.extr(src, 'href="', '"') if "Source<" in src else "" return { "url" : self.root + url, @@ -75,7 +75,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): "title" : text.unescape(match.group(4) or ""), "author" : text.remove_html(author), "group" : (text.remove_html(group) or - text.extract(group, ' alt="', '"')[0] or ""), + text.extr(group, ' alt="', '"')), "date" : text.parse_datetime(extr( '"icon-calendar"> ', '<'), "%b %d, %Y"), "lang" : "en", @@ -83,7 +83,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): } def images(self, page): - data = text.extract(page, "var pages = ", ";\n")[0] + data = text.extr(page, "var pages = ", ";\n") return [ (self.root + img["image"], None) for img in json.loads(data) diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py index 992db972..b4dadc7e 100644 --- a/gallery_dl/extractor/erome.py +++ b/gallery_dl/extractor/erome.py @@ -55,8 +55,8 @@ class EromeExtractor(Extractor): yield Message.Directory, data groups = page.split('
", "<")[0]) + data["uploader"] = text.unescape(text.extr( + data["uploader"], ">", "<")) f = data["favorites"][0] if f == "N": @@ -400,7 +400,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): } page = self.request(url, cookies=cookies).text - current = text.extract(page, "", "")[0] + current = text.extr(page, "", "") self.log.debug("Image Limits: %s/%s", current, self.limits) self._remaining = self.limits - text.parse_int(current) diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py index ab0e0c52..57587b60 100644 --- a/gallery_dl/extractor/fallenangels.py +++ b/gallery_dl/extractor/fallenangels.py @@ -57,7 +57,7 @@ class FallenangelsChapterExtractor(ChapterExtractor): return [ (img["page_image"], None) for img in json.loads( - text.extract(page, "var pages = ", ";")[0] + text.extr(page, "var pages = ", ";") ) ] diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 5e6da5b6..4f9a6bf5 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -56,7 +56,7 @@ class FoolfuukaExtractor(BaseExtractor): """Resolve a remote media link""" needle = '", "")[0].strip() + title = text.extr(page, "", "").strip() title, _, gallery_id = title.rpartition("#") return { @@ -104,7 +104,7 @@ class FuskatorSearchExtractor(Extractor): page, 'class="pic_pad">', '>>><')[0] + pages = text.extr(page, 'class="pages">', '>>><') if not pages: return url = self.root + text.rextract(pages, 'href="', '"')[0] diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 63450c04..d8109e12 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -69,7 +69,7 @@ class GelbooruBase(): yield "https://img1.gelbooru.com" + path def _notes(self, post, page): - notes_data = text.extract(page, '
')[0] + notes_data = text.extr(page, '
') if not notes_data: return diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index 9bf367be..3abac798 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -98,8 +98,8 @@ class GelbooruV02Extractor(booru.BooruExtractor): self.root, post["id"])).text def _tags(self, post, page): - tag_container = (text.extract(page, '