diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 56802401..94fc492a 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -778,7 +778,7 @@ Consider all sites to be NSFW unless otherwise known.
VK |
https://vk.com/ |
- Photos |
+ Albums, Photos |
|
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 9dd2d472..9724c4b7 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -12,18 +12,67 @@ from .common import Extractor, Message
from .. import text
import re
+BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
-class VkPhotosExtractor(Extractor):
- """Extractor for photos from a vk user"""
+
+class VkExtractor(Extractor):
+ """Base class for vk extractors"""
category = "vk"
- subcategory = "photos"
directory_fmt = ("{category}", "{user[name]|user[id]}")
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
root = "https://vk.com"
request_interval = 1.0
- pattern = (r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:"
- r"(?:albums|photos|id)(-?\d+)|([^/?#]+))")
+
+ def items(self):
+ data = self.metadata()
+ yield Message.Directory, data
+ for photo in self.photos():
+ photo.update(data)
+ yield Message.Url, photo["url"], photo
+
+ def _pagination(self, photos_url, user_id):
+ sub = re.compile(r"/imp[fg]/").sub
+ needle = 'data-id="{}_'.format(user_id)
+ cnt = 0
+
+ headers = {
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin" : self.root,
+ "Referer" : photos_url,
+ }
+ params = {
+ "al" : "1",
+ "al_ad" : "0",
+ "offset": 0,
+ "part" : "1",
+ }
+
+ while True:
+ payload = self.request(
+ photos_url, method="POST", headers=headers, data=params
+ ).json()["payload"][1]
+
+ offset = payload[0]
+ html = payload[1]
+
+ for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
+ pid = photo[:photo.find('"')]
+ url = photo[photo.rindex("(")+1:]
+ url = sub("/", url.partition("?")[0])
+ yield text.nameext_from_url(url, {"url": url, "id": pid})
+
+ if cnt <= 20 or offset == params["offset"]:
+ return
+ params["offset"] = offset
+
+
+class VkPhotosExtractor(VkExtractor):
+ """Extractor for photos from a vk user"""
+ subcategory = "photos"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"(?:albums|photos|id)(-?\d+)"
+ r"|(?!album-?\d+_)([^/?#]+))")
test = (
("https://vk.com/id398982326", {
"pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
@@ -58,10 +107,14 @@ class VkPhotosExtractor(Extractor):
)
def __init__(self, match):
- Extractor.__init__(self, match)
+ VkExtractor.__init__(self, match)
self.user_id, self.user_name = match.groups()
- def items(self):
+ def photos(self):
+ url = "{}/photos{}".format(self.root, self.user_id)
+ return self._pagination(url, self.user_id)
+
+ def metadata(self):
if self.user_id:
user_id = self.user_id
prefix = "public" if user_id[0] == "-" else "id"
@@ -70,40 +123,8 @@ class VkPhotosExtractor(Extractor):
else:
url = "{}/{}".format(self.root, self.user_name)
data = self._extract_profile(url)
- user_id = data["user"]["id"]
-
- photos_url = "{}/photos{}".format(self.root, user_id)
- headers = {
- "X-Requested-With": "XMLHttpRequest",
- "Origin" : self.root,
- "Referer" : photos_url,
- }
- params = {
- "al" : "1",
- "al_ad" : "0",
- "offset": 0,
- "part" : "1",
- }
-
- yield Message.Directory, data
- sub = re.compile(r"/imp[fg]/").sub
- needle = 'data-id="{}_'.format(user_id)
- cnt = 0
-
- while True:
- offset, html = self.request(
- photos_url, method="POST", headers=headers, data=params
- ).json()["payload"][1]
-
- for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
- data["id"] = photo[:photo.find('"')]
- url = photo[photo.rindex("(")+1:]
- url = sub("/", url.partition("?")[0])
- yield Message.Url, url, text.nameext_from_url(url, data)
-
- if cnt <= 40 or offset == params["offset"]:
- return
- params["offset"] = offset
+ self.user_id = data["user"]["id"]
+ return data
def _extract_profile(self, url):
extr = text.extract_from(self.request(url).text)
@@ -116,3 +137,32 @@ class VkPhotosExtractor(Extractor):
'', '