[e621] implement 'notes' and 'pools' metadata extraction

(#3425)
pull/3656/head
Mike Fährmann 2 years ago
parent 925b467496
commit bbf0911a46
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1103,8 +1103,21 @@ Description
follow the ``source`` and download from there if possible.
extractor.danbooru.metadata
---------------------------
extractor.danbooru.ugoira
-------------------------
Type
``bool``
Default
``false``
Description
Controls the download target for Ugoira posts.
* ``true``: Original ZIP archives
* ``false``: Converted video files
extractor.[Danbooru].metadata
-----------------------------
Type
* ``bool``
* ``string``
@ -1125,8 +1138,8 @@ Description
Note: This requires 1 additional HTTP request per post.
extractor.danbooru.threshold
----------------------------
extractor.{Danbooru].threshold
------------------------------
Type
* ``string``
* ``integer``
@ -1135,27 +1148,13 @@ Default
Description
Stop paginating over API results if the length of a batch of returned
posts is less than the specified number. Defaults to the per-page limit
of the current instance, which is 320 for ``e621`` and 200 for
everything else.
of the current instance, which is 200.
Note: Changing this setting is normally not necessary. When the value is
greater than the per-page limit, gallery-dl will stop after the first
batch. The value cannot be less than 1.
extractor.danbooru.ugoira
-------------------------
Type
``bool``
Default
``false``
Description
Controls the download target for Ugoira posts.
* ``true``: Original ZIP archives
* ``false``: Converted video files
extractor.derpibooru.api-key
----------------------------
Type
@ -1388,6 +1387,40 @@ Description
Minimum wait time in seconds before API requests.
extractor.[E621].metadata
-------------------------
Type
* ``bool``
* ``string``
* ``list`` of ``strings``
Default
``false``
Example
* ``notes,pools``
* ``["notes", "pools"``
Description
Extract additional metadata (notes, pool metadata) if available.
Note: This requires 0-2 additional HTTP requests per post.
extractor.[E621].threshold
--------------------------
Type
* ``string``
* ``integer``
Default
``"auto"``
Description
Stop paginating over API results if the length of a batch of returned
posts is less than the specified number. Defaults to the per-page limit
of the current instance, which is 320.
Note: Changing this setting is normally not necessary. When the value is
greater than the per-page limit, gallery-dl will stop after the first
batch. The value cannot be less than 1.
extractor.exhentai.domain
-------------------------
Type

@ -26,16 +26,6 @@ class DanbooruExtractor(BaseExtractor):
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
metadata = self.config("metadata", False)
if metadata:
if isinstance(metadata, (list, tuple)):
metadata = ",".join(metadata)
elif not isinstance(metadata, str):
metadata = "artist_commentary,children,notes,parent,uploader"
self.metadata_includes = metadata
else:
self.metadata_includes = None
threshold = self.config("threshold")
if isinstance(threshold, int):
self.threshold = 1 if threshold < 1 else threshold
@ -55,6 +45,13 @@ class DanbooruExtractor(BaseExtractor):
return pages * self.per_page
def items(self):
includes = self.config("metadata")
if includes:
if isinstance(includes, (list, tuple)):
includes = ",".join(includes)
elif not isinstance(includes, str):
includes = "artist_commentary,children,notes,parent,uploader"
data = self.metadata()
for post in self.posts():
@ -77,9 +74,9 @@ class DanbooruExtractor(BaseExtractor):
url = post["large_file_url"]
post["extension"] = "webm"
if self.metadata_includes:
if includes:
meta_url = "{}/posts/{}.json?only={}".format(
self.root, post["id"], self.metadata_includes)
self.root, post["id"], includes)
post.update(self.request(meta_url).json())
if url[0] == "/":

@ -29,6 +29,16 @@ class E621Extractor(danbooru.DanbooruExtractor):
self.headers = {"User-Agent": "gallery-dl/{} (by mikf)".format(
version.__version__)}
includes = self.config("metadata") or ()
if includes:
if isinstance(includes, str):
includes = includes.split(",")
elif not isinstance(includes, (list, tuple)):
includes = ("notes", "pools")
notes = ("notes" in includes)
pools = ("pools" in includes)
data = self.metadata()
for post in self.posts():
file = post["file"]
@ -38,6 +48,18 @@ class E621Extractor(danbooru.DanbooruExtractor):
file["url"] = "https://static1.{}/data/{}/{}/{}.{}".format(
self.root[8:], md5[0:2], md5[2:4], md5, file["ext"])
if notes and post.get("has_notes"):
url = "{}/notes.json?search[post_id]={}".format(
self.root, post["id"])
post["notes"] = self.request(url).json()
if pools and post["pools"]:
url = "{}/pools.json?search[id]={}".format(
self.root, ",".join(map(str, post["pools"])))
post["pools"] = _pools = self.request(url).json()
for pool in _pools:
pool["name"] = pool["name"].replace("_", " ")
post["filename"] = file["md5"]
post["extension"] = file["ext"]
@ -124,6 +146,47 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
"url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
}),
("https://e621.net/posts/3181052", {
"options": (("metadata", "notes,pools"),),
"pattern": r"https://static\d\.e621\.net/data/c6/8c"
r"/c68cca0643890b615f75fb2719589bff\.png",
"keyword": {
"notes": [
{
"body": "Little Legends 2",
"created_at": "2022-05-16T13:58:38.877-04:00",
"creator_id": 517450,
"creator_name": "EeveeCuddler69",
"height": 475,
"id": 321296,
"is_active": True,
"post_id": 3181052,
"updated_at": "2022-05-16T13:59:02.050-04:00",
"version": 3,
"width": 809,
"x": 83,
"y": 117,
},
],
"pools": [
{
"category": "series",
"created_at": "2022-02-17T00:29:22.669-05:00",
"creator_id": 1077440,
"creator_name": "Yeetus90",
"description": "* \"Little Legends\":/pools/27971\r\n"
"* Little Legends 2\r\n"
"* \"Little Legends 3\":/pools/27481",
"id": 27492,
"is_active": False,
"name": "Little Legends 2",
"post_count": 39,
"post_ids": list,
"updated_at": "2022-03-27T06:30:03.382-04:00"
},
],
},
}),
("https://e621.net/post/show/535"),
("https://e926.net/posts/535", {

Loading…
Cancel
Save