# -*- coding: utf-8 -*-
# Copyright 2018-2020 Leonardo Taccari
# Copyright 2018-2023 Mike Fährmann
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for"""
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
import binascii
import json
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?instagram\.com"
USER_PATTERN = BASE_PATTERN + r"/(?!(?:p|tv|reel|explore|stories)/)([^/?#]+)"
class InstagramExtractor(Extractor):
"""Base class for instagram extractors"""
category = "instagram"
directory_fmt = ("{category}", "{username}")
filename_fmt = "{sidecar_media_id:?/_/}{media_id}.{extension}"
archive_fmt = "{media_id}"
root = ""
cookies_domain = ""
cookies_names = ("sessionid",)
request_interval = (6.0, 12.0)
def __init__(self, match):
Extractor.__init__(self, match)
self.item =
def _init(self):
self.www_claim = "0"
self.csrf_token = util.generate_token()
self._find_tags = re.compile(r"#\w+").findall
self._logged_in = True
self._cursor = None
self._user = None
"csrftoken", self.csrf_token, domain=self.cookies_domain)
if self.config("api") == "graphql":
self.api = InstagramGraphqlAPI(self)
self.api = InstagramRestAPI(self)
def items(self):
data = self.metadata()
videos = self.config("videos", True)
previews = self.config("previews", False)
video_headers = {"User-Agent": "Mozilla/5.0"}
order = self.config("order-files")
reverse = order[0] in ("r", "d") if order else False
for post in self.posts():
if "__typename" in post:
post = self._parse_post_graphql(post)
post = self._parse_post_rest(post)
if self._user:
post["user"] = self._user
files = post.pop("_files")
post["count"] = len(files)
yield Message.Directory, post
if "date" in post:
del post["date"]
if reverse:
for file in files:
url = file.get("video_url")
if url:
if videos:
file["_http_headers"] = video_headers
text.nameext_from_url(url, file)
yield Message.Url, url, file
if previews:
file["media_id"] += "p"
url = file["display_url"]
yield Message.Url, url, text.nameext_from_url(url, file)
def metadata(self):
return ()
def posts(self):
return ()
def finalize(self):
if self._cursor:"Use '-o cursor=%s' to continue downloading "
"from the current position", self._cursor)
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
if response.history:
url = response.url
if "/accounts/login/" in url:
page = "login"
elif "/challenge/" in url:
page = "challenge"
page = None
if page:
raise exception.StopExtraction("HTTP redirect to %s page (%s)",
page, url.partition("?")[0])
www_claim = response.headers.get("x-ig-set-www-claim")
if www_claim is not None:
self.www_claim = www_claim
csrf_token = response.cookies.get("csrftoken")
if csrf_token:
self.csrf_token = csrf_token
return response
def login(self):
if self.cookies_check(self.cookies_names):
username, password = self._get_auth_info()
if username:
return self.cookies_update(_login_impl(self, username, password))
self._logged_in = False
def _parse_post_rest(self, post):
if "items" in post: # story or highlight
items = post["items"]
reel_id = str(post["id"]).rpartition(":")[2]
data = {
"expires": text.parse_timestamp(post.get("expiring_at")),
"post_id": reel_id,
"post_shortcode": shortcode_from_id(reel_id),
if "title" in post:
data["highlight_title"] = post["title"]
if "created_at" in post:
data["date"] = text.parse_timestamp(post.get("created_at"))
else: # regular image/video post
data = {
"post_id" : post["pk"],
"post_shortcode": post["code"],
"likes": post.get("like_count", 0),
"pinned": post.get("timeline_pinned_user_ids", ()),
"date": text.parse_timestamp(post.get("taken_at")),
"liked": post.get("has_liked", False),
caption = post["caption"]
data["description"] = caption["text"] if caption else ""
tags = self._find_tags(data["description"])
if tags:
data["tags"] = sorted(set(tags))
location = post.get("location")
if location:
slug = location["short_name"].replace(" ", "-").lower()
data["location_id"] = location["pk"]
data["location_slug"] = slug
data["location_url"] = "{}/explore/locations/{}/{}/".format(
self.root, location["pk"], slug)
coauthors = post.get("coauthor_producers")
if coauthors:
data["coauthors"] = [
{"id" : user["pk"],
"username" : user["username"],
"full_name": user["full_name"]}
for user in coauthors
if "carousel_media" in post:
items = post["carousel_media"]
data["sidecar_media_id"] = data["post_id"]
data["sidecar_shortcode"] = data["post_shortcode"]
items = (post,)
owner = post["user"]
data["owner_id"] = owner["pk"]
data["username"] = owner.get("username")
data["fullname"] = owner.get("full_name")
data["post_url"] = "{}/p/{}/".format(self.root, data["post_shortcode"])
data["_files"] = files = []
for num, item in enumerate(items, 1):
image = item["image_versions2"]["candidates"][0]
except Exception:
self.log.warning("Missing media in post %s",
video_versions = item.get("video_versions")
if video_versions:
video = max(
key=lambda x: (x["width"], x["height"], x["type"]),
media = video
video = None
media = image
media = {
"num" : num,
"date" : text.parse_timestamp(item.get("taken_at") or
media.get("taken_at") or
"media_id" : item["pk"],
"shortcode" : (item.get("code") or
"display_url": image["url"],
"video_url" : video["url"] if video else None,
"width" : media["width"],
"height" : media["height"],
if "expiring_at" in item:
media["expires"] = text.parse_timestamp(post["expiring_at"])
self._extract_tagged_users(item, media)
return data
def _parse_post_graphql(self, post):
typename = post["__typename"]
if self._logged_in:
if post.get("is_video") and "video_url" not in post:
post =["id"])[0]
elif typename == "GraphSidecar" and \
"edge_sidecar_to_children" not in post:
post =["id"])[0]
pinned = post.get("pinned_for_users", ())
if pinned:
for index, user in enumerate(pinned):
pinned[index] = int(user["id"])
owner = post["owner"]
data = {
"typename" : typename,
"date" : text.parse_timestamp(post["taken_at_timestamp"]),
"likes" : post["edge_media_preview_like"]["count"],
"liked" : post.get("viewer_has_liked", False),
"pinned" : pinned,
"owner_id" : owner["id"],
"username" : owner.get("username"),
"fullname" : owner.get("full_name"),
"post_id" : post["id"],
"post_shortcode": post["shortcode"],
"post_url" : "{}/p/{}/".format(self.root, post["shortcode"]),
"description": text.parse_unicode_escapes("\n".join(
for edge in post["edge_media_to_caption"]["edges"]
tags = self._find_tags(data["description"])
if tags:
data["tags"] = sorted(set(tags))
location = post.get("location")
if location:
data["location_id"] = location["id"]
data["location_slug"] = location["slug"]
data["location_url"] = "{}/explore/locations/{}/{}/".format(
self.root, location["id"], location["slug"])
coauthors = post.get("coauthor_producers")
if coauthors:
data["coauthors"] = [
{"id" : user["id"],
"username": user["username"]}
for user in coauthors
data["_files"] = files = []
if "edge_sidecar_to_children" in post:
for num, edge in enumerate(
post["edge_sidecar_to_children"]["edges"], 1):
node = edge["node"]
dimensions = node["dimensions"]
media = {
"num": num,
"media_id" : node["id"],
"shortcode" : (node.get("shortcode") or
"display_url": node["display_url"],
"video_url" : node.get("video_url"),
"width" : dimensions["width"],
"height" : dimensions["height"],
"sidecar_media_id" : post["id"],
"sidecar_shortcode": post["shortcode"],
self._extract_tagged_users(node, media)
dimensions = post["dimensions"]
media = {
"media_id" : post["id"],
"shortcode" : post["shortcode"],
"display_url": post["display_url"],
"video_url" : post.get("video_url"),
"width" : dimensions["width"],
"height" : dimensions["height"],
self._extract_tagged_users(post, media)
return data
def _extract_tagged_users(src, dest):
dest["tagged_users"] = tagged_users = []
edges = src.get("edge_media_to_tagged_user")
if edges:
for edge in edges["edges"]:
user = edge["node"]["user"]
tagged_users.append({"id" : user["id"],
"username" : user["username"],
"full_name": user["full_name"]})
usertags = src.get("usertags")
if usertags:
for tag in usertags["in"]:
user = tag["user"]
tagged_users.append({"id" : user["pk"],
"username" : user["username"],
"full_name": user["full_name"]})
mentions = src.get("reel_mentions")
if mentions:
for mention in mentions:
user = mention["user"]
tagged_users.append({"id" : user.get("pk"),
"username" : user["username"],
"full_name": user["full_name"]})
stickers = src.get("story_bloks_stickers")
if stickers:
for sticker in stickers:
sticker = sticker["bloks_sticker"]
if sticker["bloks_sticker_type"] == "mention":
user = sticker["sticker_data"]["ig_mention"]
tagged_users.append({"id" : user["account_id"],
"username" : user["username"],
"full_name": user["full_name"]})
def _init_cursor(self):
return self.config("cursor") or None
def _update_cursor(self, cursor):
self.log.debug("Cursor: %s", cursor)
self._cursor = cursor
return cursor
def _assign_user(self, user):
self._user = user
for key, old in (
("count_media" , "edge_owner_to_timeline_media"),
("count_video" , "edge_felix_video_timeline"),
("count_saved" , "edge_saved_media"),
("count_mutual" , "edge_mutual_followed_by"),
("count_follow" , "edge_follow"),
("count_followed" , "edge_followed_by"),
("count_collection", "edge_media_collections")):
user[key] = user.pop(old)["count"]
except Exception:
user[key] = 0
class InstagramUserExtractor(InstagramExtractor):
"""Extractor for an Instagram user profile"""
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:$|[?#])"
example = ""
def initialize(self):
def finalize(self):
def items(self):
base = "{}/{}/".format(self.root, self.item)
stories = "{}/stories/{}/".format(self.root, self.item)
return self._dispatch_extractors((
(InstagramAvatarExtractor , base + "avatar/"),
(InstagramStoriesExtractor , stories),
(InstagramHighlightsExtractor, base + "highlights/"),
(InstagramPostsExtractor , base + "posts/"),
(InstagramReelsExtractor , base + "reels/"),
(InstagramTaggedExtractor , base + "tagged/"),
), ("posts",))
class InstagramPostsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's posts"""
subcategory = "posts"
pattern = USER_PATTERN + r"/posts"
example = ""
def posts(self):
uid = self.api.user_id(self.item)
return self.api.user_feed(uid)
class InstagramReelsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's reels"""
subcategory = "reels"
pattern = USER_PATTERN + r"/reels"
example = ""
def posts(self):
uid = self.api.user_id(self.item)
return self.api.user_clips(uid)
class InstagramTaggedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's tagged posts"""
subcategory = "tagged"
pattern = USER_PATTERN + r"/tagged"
example = ""
def metadata(self):
if self.item.startswith("id:"):
self.user_id = self.item[3:]
return {"tagged_owner_id": self.user_id}
self.user_id = self.api.user_id(self.item)
user = self.api.user_by_name(self.item)
return {
"tagged_owner_id" : user["id"],
"tagged_username" : user["username"],
"tagged_full_name": user["full_name"],
def posts(self):
return self.api.user_tagged(self.user_id)
class InstagramGuideExtractor(InstagramExtractor):
"""Extractor for an Instagram guide"""
subcategory = "guide"
pattern = USER_PATTERN + r"/guide/[^/?#]+/(\d+)"
example = ""
def __init__(self, match):
InstagramExtractor.__init__(self, match)
self.guide_id =
def metadata(self):
return {"guide":}
def posts(self):
return self.api.guide_media(self.guide_id)
class InstagramSavedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's saved media"""
subcategory = "saved"
pattern = USER_PATTERN + r"/saved(?:/all-posts)?/?$"
example = ""
def posts(self):
return self.api.user_saved()
class InstagramCollectionExtractor(InstagramExtractor):
"""Extractor for Instagram collection"""
subcategory = "collection"
pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)"
example = ""
def __init__(self, match):
InstagramExtractor.__init__(self, match)
self.user, self.collection_name, self.collection_id = match.groups()
def metadata(self):
return {
"collection_id" : self.collection_id,
"collection_name": text.unescape(self.collection_name),
def posts(self):
return self.api.user_collection(self.collection_id)
class InstagramStoriesExtractor(InstagramExtractor):
"""Extractor for Instagram stories"""
subcategory = "stories"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
example = ""
def __init__(self, match):
h1, self.user, m1, h2, m2 = match.groups()
if self.user:
self.highlight_id = None
self.subcategory = InstagramHighlightsExtractor.subcategory
self.highlight_id = ("highlight:" + h1 if h1 else
self.media_id = m1 or m2
InstagramExtractor.__init__(self, match)
def posts(self):
reel_id = self.highlight_id or self.api.user_id(self.user)
reels = self.api.reels_media(reel_id)
if self.media_id and reels:
reel = reels[0]
for item in reel["items"]:
if item["pk"] == self.media_id:
reel["items"] = (item,)
raise exception.NotFoundError("story")
return reels
class InstagramHighlightsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's story highlights"""
subcategory = "highlights"
pattern = USER_PATTERN + r"/highlights"
example = ""
def posts(self):
uid = self.api.user_id(self.item)
return self.api.highlights_media(uid)
class InstagramFollowingExtractor(InstagramExtractor):
"""Extractor for an Instagram user's followed users"""
subcategory = "following"
pattern = USER_PATTERN + r"/following"
example = ""
def items(self):
uid = self.api.user_id(self.item)
for user in self.api.user_following(uid):
user["_extractor"] = InstagramUserExtractor
url = "{}/{}".format(self.root, user["username"])
yield Message.Queue, url, user
class InstagramTagExtractor(InstagramExtractor):
"""Extractor for Instagram tags"""
subcategory = "tag"
directory_fmt = ("{category}", "{subcategory}", "{tag}")
pattern = BASE_PATTERN + r"/explore/tags/([^/?#]+)"
example = ""
def metadata(self):
return {"tag": text.unquote(self.item)}
def posts(self):
return self.api.tags_media(self.item)
class InstagramProfileExtractor(InstagramExtractor):
"""Extractor for an Instagram user's profile data"""
subcategory = "profile"
pattern = USER_PATTERN + r"/profile"
example = ""
def items(self):
screen_name = self.item
if screen_name.startswith("id:"):
user = self.api.user_by_id(screen_name[3:])
user = self.api.user_by_name(screen_name)
return iter(((Message.Directory, user),))
class InstagramAvatarExtractor(InstagramExtractor):
"""Extractor for an Instagram user's avatar"""
subcategory = "avatar"
pattern = USER_PATTERN + r"/avatar"
example = ""
def posts(self):
if self._logged_in:
user_id = self.api.user_id(self.item, check_private=False)
user = self.api.user_by_id(user_id)
avatar = (user.get("hd_profile_pic_url_info") or
user = self.item
if user.startswith("id:"):
user = self.api.user_by_id(user[3:])
user = self.api.user_by_name(user)
user["pk"] = user["id"]
url = user.get("profile_pic_url_hd") or user["profile_pic_url"]
avatar = {"url": url, "width": 0, "height": 0}
pk = user.get("profile_pic_id")
if pk:
pk = pk.partition("_")[0]
code = shortcode_from_id(pk)
pk = code = "avatar:" + str(user["pk"])
return ({
"pk" : pk,
"code" : code,
"user" : user,
"caption" : None,
"like_count": 0,
"image_versions2": {"candidates": (avatar,)},
class InstagramPostExtractor(InstagramExtractor):
"""Extractor for an Instagram post"""
subcategory = "post"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
example = ""
def posts(self):
class InstagramRestAPI():
def __init__(self, extractor):
self.extractor = extractor
def guide(self, guide_id):
endpoint = "/v1/guides/web_info/"
params = {"guide_id": guide_id}
return self._call(endpoint, params=params)
def guide_media(self, guide_id):
endpoint = "/v1/guides/guide/{}/".format(guide_id)
return self._pagination_guides(endpoint)
def highlights_media(self, user_id, chunk_size=5):
reel_ids = [hl["id"] for hl in self.highlights_tray(user_id)]
order = self.extractor.config("order-posts")
if order:
if order in ("desc", "reverse"):
elif order in ("id", "id_asc"):
reel_ids.sort(key=lambda r: int(r[10:]))
elif order == "id_desc":
reel_ids.sort(key=lambda r: int(r[10:]), reverse=True)
elif order != "asc":
self.extractor.log.warning("Unknown posts order '%s'", order)
for offset in range(0, len(reel_ids), chunk_size):
yield from self.reels_media(
reel_ids[offset : offset+chunk_size])
def highlights_tray(self, user_id):
endpoint = "/v1/highlights/{}/highlights_tray/".format(user_id)
return self._call(endpoint)["tray"]
def media(self, shortcode):
if len(shortcode) > 28:
shortcode = shortcode[:-28]
endpoint = "/v1/media/{}/info/".format(id_from_shortcode(shortcode))
return self._pagination(endpoint)
def reels_media(self, reel_ids):
endpoint = "/v1/feed/reels_media/"
params = {"reel_ids": reel_ids}
return self._call(endpoint, params=params)["reels_media"]
except KeyError:
raise exception.AuthorizationError("Login required")
def tags_media(self, tag):
for section in self.tags_sections(tag):
for media in section["layout_content"]["medias"]:
yield media["media"]
def tags_sections(self, tag):
endpoint = "/v1/tags/{}/sections/".format(tag)
data = {
"include_persistent": "0",
"max_id" : None,
"page" : None,
"surface": "grid",
"tab" : "recent",
return self._pagination_sections(endpoint, data)
def user_by_name(self, screen_name):
endpoint = "/v1/users/web_profile_info/"
params = {"username": screen_name}
return self._call(
endpoint, params=params, notfound="user")["data"]["user"]
def user_by_id(self, user_id):
endpoint = "/v1/users/{}/info/".format(user_id)
return self._call(endpoint)["user"]
def user_id(self, screen_name, check_private=True):
if screen_name.startswith("id:"):
if self.extractor.config("metadata"):
self.extractor._user = self.user_by_id(screen_name[3:])
return screen_name[3:]
user = self.user_by_name(screen_name)
if user is None:
raise exception.AuthorizationError(
"Login required to access this profile")
if check_private and user["is_private"] and \
not user["followed_by_viewer"]:
name = user["username"]
s = "" if name.endswith("s") else "s"
self.extractor.log.warning("%s'%s posts are private", name, s)
return user["id"]
def user_clips(self, user_id):
endpoint = "/v1/clips/user/"
data = {
"target_user_id": user_id,
"page_size": "50",
"max_id": None,
"include_feed_video": "true",
return self._pagination_post(endpoint, data)
def user_collection(self, collection_id):
endpoint = "/v1/feed/collection/{}/posts/".format(collection_id)
params = {"count": 50}
return self._pagination(endpoint, params, media=True)
def user_feed(self, user_id):
endpoint = "/v1/feed/user/{}/".format(user_id)
params = {"count": 30}
return self._pagination(endpoint, params)
def user_following(self, user_id):
endpoint = "/v1/friendships/{}/following/".format(user_id)
params = {"count": 12}
return self._pagination_following(endpoint, params)
def user_saved(self):
endpoint = "/v1/feed/saved/posts/"
params = {"count": 50}
return self._pagination(endpoint, params, media=True)
def user_tagged(self, user_id):
endpoint = "/v1/usertags/{}/feed/".format(user_id)
params = {"count": 20}
return self._pagination(endpoint, params)
def _call(self, endpoint, **kwargs):
extr = self.extractor
url = "" + endpoint
kwargs["headers"] = {
"Accept" : "*/*",
"X-CSRFToken" : extr.csrf_token,
"X-IG-App-ID" : "936619743392459",
"X-ASBD-ID" : "129477",
"X-IG-WWW-Claim" : extr.www_claim,
"X-Requested-With": "XMLHttpRequest",
"Connection" : "keep-alive",
"Referer" : extr.root + "/",
"Sec-Fetch-Dest" : "empty",
"Sec-Fetch-Mode" : "cors",
"Sec-Fetch-Site" : "same-origin",
return extr.request(url, **kwargs).json()
def _pagination(self, endpoint, params=None, media=False):
if params is None:
params = {}
extr = self.extractor
params["max_id"] = extr._init_cursor()
while True:
data = self._call(endpoint, params=params)
if media:
for item in data["items"]:
yield item["media"]
yield from data["items"]
if not data.get("more_available"):
return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(data["next_max_id"])
def _pagination_post(self, endpoint, params):
extr = self.extractor
params["max_id"] = extr._init_cursor()
while True:
data = self._call(endpoint, method="POST", data=params)
for item in data["items"]:
yield item["media"]
info = data["paging_info"]
if not info.get("more_available"):
return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(info["max_id"])
def _pagination_sections(self, endpoint, params):
extr = self.extractor
params["max_id"] = extr._init_cursor()
while True:
info = self._call(endpoint, method="POST", data=params)
yield from info["sections"]
if not info.get("more_available"):
return extr._update_cursor(None)
params["page"] = info["next_page"]
params["max_id"] = extr._update_cursor(info["next_max_id"])
def _pagination_guides(self, endpoint):
extr = self.extractor
params = {"max_id": extr._init_cursor()}
while True:
data = self._call(endpoint, params=params)
for item in data["items"]:
yield from item["media_items"]
if "next_max_id" not in data:
return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(data["next_max_id"])
def _pagination_following(self, endpoint, params):
extr = self.extractor
params["max_id"] = text.parse_int(extr._init_cursor())
while True:
data = self._call(endpoint, params=params)
yield from data["users"]
if len(data["users"]) < params["count"]:
return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(
params["max_id"] + params["count"])
class InstagramGraphqlAPI():
def __init__(self, extractor):
self.extractor = extractor
self.user_collection = self.user_saved = self.reels_media = \
self.highlights_media = = self.guide_media = \
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
api = InstagramRestAPI(extractor)
self.user_by_name = api.user_by_name
self.user_by_id = api.user_by_id
self.user_id = api.user_id
def _unsupported(_=None):
raise exception.StopExtraction("Unsupported with GraphQL API")
def highlights_tray(self, user_id):
query_hash = "d4d88dc1500312af6f937f7b804c68c3"
variables = {
"user_id": user_id,
"include_chaining": False,
"include_reel": False,
"include_suggested_users": False,
"include_logged_out_extras": True,
"include_highlight_reels": True,
"include_live_status": False,
edges = (self._call(query_hash, variables)["user"]
return [edge["node"] for edge in edges]
def media(self, shortcode):
query_hash = "9f8827793ef34641b2fb195d4d41151c"
variables = {
"shortcode": shortcode,
"child_comment_count": 3,
"fetch_comment_count": 40,
"parent_comment_count": 24,
"has_threaded_comments": True,
media = self._call(query_hash, variables).get("shortcode_media")
return (media,) if media else ()
def tags_media(self, tag):
query_hash = "9b498c08113f1e09617a1703c22b2f32"
variables = {"tag_name": text.unescape(tag), "first": 50}
return self._pagination(query_hash, variables,
"hashtag", "edge_hashtag_to_media")
def user_clips(self, user_id):
query_hash = "bc78b344a68ed16dd5d7f264681c4c76"
variables = {"id": user_id, "first": 50}
return self._pagination(query_hash, variables)
def user_feed(self, user_id):
query_hash = "69cba40317214236af40e7efa697781d"
variables = {"id": user_id, "first": 50}
return self._pagination(query_hash, variables)
def user_tagged(self, user_id):
query_hash = "be13233562af2d229b008d2976b998b5"
variables = {"id": user_id, "first": 50}
return self._pagination(query_hash, variables)
def _call(self, query_hash, variables):
extr = self.extractor
url = ""
params = {
"query_hash": query_hash,
"variables" : self._json_dumps(variables),
headers = {
"Accept" : "*/*",
"X-CSRFToken" : extr.csrf_token,
"X-Instagram-AJAX": "1006267176",
"X-IG-App-ID" : "936619743392459",
"X-ASBD-ID" : "198387",
"X-IG-WWW-Claim" : extr.www_claim,
"X-Requested-With": "XMLHttpRequest",
"Referer" : extr.root + "/",
return extr.request(url, params=params, headers=headers).json()["data"]
def _pagination(self, query_hash, variables,
key_data="user", key_edge=None):
extr = self.extractor
variables["after"] = extr._init_cursor()
while True:
data = self._call(query_hash, variables)[key_data]
data = data[key_edge] if key_edge else next(iter(data.values()))
for edge in data["edges"]:
yield edge["node"]
info = data["page_info"]
if not info["has_next_page"]:
return extr._update_cursor(None)
elif not data["edges"]:
s = "" if self.item.endswith("s") else "s"
raise exception.StopExtraction(
"%s'%s posts are private", self.item, s)
variables["after"] = extr._update_cursor(info["end_cursor"])
@cache(maxage=90*86400, keyarg=1)
def _login_impl(extr, username, password):
extr.log.error("Login with username & password is no longer supported. "
"Use browser cookies instead.")
return {}
def id_from_shortcode(shortcode):
return util.bdecode(shortcode, _ALPHABET)
def shortcode_from_id(post_id):
return util.bencode(int(post_id), _ALPHABET)