From 70c7fbe89a0ff81db1e1542ab9f23a1daa925c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 11 Nov 2022 20:21:41 +0100 Subject: [PATCH] [instagram] add 'guide' extractor (#3192) --- docs/supportedsites.md | 2 +- gallery_dl/extractor/instagram.py | 51 +++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7d10d969..2d0d983b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -394,7 +394,7 @@ Consider all sites to be NSFW unless otherwise known. Instagram https://www.instagram.com/ - Avatars, Collections, Highlights, Posts, Reels, Saved Posts, Stories, Tag Searches, Tagged Posts, User Profiles + Avatars, Collections, Guides, Highlights, Posts, Reels, Saved Posts, Stories, Tag Searches, Tagged Posts, User Profiles Cookies diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 04f510fd..78c9de1e 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -447,6 +447,27 @@ class InstagramTaggedExtractor(InstagramExtractor): return self.api.user_tagged(self.user_id) +class InstagramGuideExtractor(InstagramExtractor): + """Extractor for an Instagram guide""" + subcategory = "guide" + pattern = USER_PATTERN + r"/guide/[^/?#]+/(\d+)" + test = (("https://www.instagram.com/kadakaofficial/guide" + "/knit-i-need-collection/18131821684305217/"), { + "range": "1-16", + "count": ">= 16", + }) + + def __init__(self, match): + InstagramExtractor.__init__(self, match) + self.guide_id = match.group(2) + + def metadata(self): + return {"guide": self.api.guide(self.guide_id)} + + def posts(self): + return self.api.guide_media(self.guide_id) + + class InstagramSavedExtractor(InstagramExtractor): """Extractor for an Instagram user's saved media""" subcategory = "saved" @@ -707,6 +728,15 @@ class InstagramRestAPI(): def __init__(self, extractor): self.extractor = extractor + def guide(self, guide_id): + endpoint = "/v1/guides/web_info/" + params = {"guide_id": guide_id} + return self._call(endpoint, params=params) + + def guide_media(self, guide_id): + endpoint = "/v1/guides/guide/{}/".format(guide_id) + return self._pagination_guides(endpoint) + def highlights_media(self, user_id): chunk_size = 5 reel_ids = [hl["id"] for hl in self.highlights_tray(user_id)] @@ -858,13 +888,28 @@ class InstagramRestAPI(): params["page"] = info["next_page"] params["max_id"] = extr._update_cursor(info["next_max_id"]) + def _pagination_guides(self, endpoint): + extr = self.extractor + params = {"max_id": extr._init_cursor()} + + while True: + data = self._call(endpoint, params=params) + + for item in data["items"]: + yield from item["media_items"] + + if "next_max_id" not in data: + return + params["max_id"] = extr._update_cursor(data["next_max_id"]) + class InstagramGraphqlAPI(): def __init__(self, extractor): self.extractor = extractor self.user_collection = self.user_saved = self.reels_media = \ - self.highlights_media = self._login_required + self.highlights_media = self.guide = self.guide_media = \ + self._unsupported self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode api = InstagramRestAPI(extractor) @@ -873,8 +918,8 @@ class InstagramGraphqlAPI(): self.user_id = api.user_id @staticmethod - def _login_required(_=None): - raise exception.AuthorizationError("Login required") + def _unsupported(_=None): + raise exception.StopExtraction("Unsupported with GraphQL API") def highlights_tray(self, user_id): query_hash = "d4d88dc1500312af6f937f7b804c68c3"