diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 2b1d4281..bc58803d 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -12,6 +12,7 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache +import itertools import json import time import re @@ -40,7 +41,11 @@ class InstagramExtractor(Extractor): videos = self.config("videos", True) for post in self.posts(): - post = self._parse_post(post) + + if post["__typename"] == "GraphHighlightReel": + post = self._parse_reel("highlight:" + post["id"]) + else: + post = self._parse_post(post) post.update(data) files = post.pop("_files") @@ -70,6 +75,20 @@ class InstagramExtractor(Extractor): self.www_claim = www_claim return response + def _api_request(self, endpoint, params): + url = "https://i.instagram.com/api/" + endpoint + headers = { + "X-CSRFToken" : self.csrf_token, + "X-IG-App-ID" : "936619743392459", + "X-IG-WWW-Claim": self.www_claim, + } + cookies = { + "csrftoken": self.csrf_token, + } + return self.request( + url, params=params, headers=headers, cookies=cookies, + ).json() + def _graphql_request(self, query_hash, variables): url = self.root + "/graphql/query/" params = { @@ -201,6 +220,55 @@ class InstagramExtractor(Extractor): return data + def _parse_reel(self, reel_id): + params = {"reel_ids": reel_id} + data = self._api_request("v1/feed/reels_media/", params) + if not data["reels_media"]: + raise exception.NotFoundError("reel") + reel = data["reels_media"][0] + + reel_id = reel_id.rpartition(":")[2] + owner = reel["user"] + + data = { + "date" : text.parse_timestamp(reel["created_at"]), + "owner_id" : owner["pk"], + "username" : owner.get("username"), + "fullname" : owner.get("full_name"), + "post_id" : reel_id, + "post_shortcode": self._shortcode_from_id(reel_id), + # "post_url" : "{}/p/{}/".format(self.root, post["shortcode"]), + } + + data["_files"] = files = [] + for num, item in enumerate(reel["items"], 1): + + image = item["image_versions2"]["candidates"][0] + + if "video_versions" in item: + video = max( + item["video_versions"], + key=lambda x: (x["width"], x["height"], x["type"]), + ) + media = video + else: + video = None + media = image + + files.append({ + 'num': num, + 'media_id' : item["pk"], + 'shortcode' : item["code"], + 'display_url': image["url"], + 'video_url' : video["url"] if video else None, + 'width' : media["width"], + 'height' : media["height"], + 'sidecar_media_id' : reel_id, + 'sidecar_shortcode': data["post_shortcode"], + }) + + return data + @staticmethod def _shortcode_from_id(post_id): return util.bencode( @@ -290,9 +358,30 @@ class InstagramUserExtractor(InstagramExtractor): user = self._extract_profile_page(url) edge = user["edge_owner_to_timeline_media"] + if user.get("highlight_reel_count") and self.config("highlights"): + query_hash = "d4d88dc1500312af6f937f7b804c68c3" + variables = { + "user_id": user["id"], + "include_chaining": False, + "include_reel": True, + "include_suggested_users": False, + "include_logged_out_extras": False, + "include_highlight_reels": True, + "include_live_status": True, + } + data = self._graphql_request(query_hash, variables) + highlights = [ + edge["node"] + for edge in data["user"]["edge_highlight_reels"]["edges"] + ] + else: + highlights = None + query_hash = "003056d32c2554def87228bc3fd9668a" variables = {"id": user["id"], "first": 12} - return self._pagination(query_hash, variables, edge) + posts = self._pagination(query_hash, variables, edge) + + return itertools.chain(highlights, posts) if highlights else posts class InstagramChannelExtractor(InstagramExtractor): @@ -504,3 +593,21 @@ class InstagramPostExtractor(InstagramExtractor): } data = self._graphql_request(query_hash, variables) return (data["shortcode_media"],) + + +class InstagramHighlightExtractor(InstagramExtractor): + """Extractor for Instagram story highlights""" + subcategory = "highlight" + pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" + r"/stories/highlights/(\d+)") + test = ("https://www.instagram.com/stories/highlights/18042509488170095/",) + + def __init__(self, match): + InstagramExtractor.__init__(self, match) + self.highlight_id = match.group(1) + + def posts(self): + return ({ + "__typename": "GraphHighlightReel", + "id" : self.highlight_id, + },)