From 34d13bc9061558a4d1688de3be471249dc70a6d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Tue, 30 Dec 2014 21:34:55 +0100
Subject: [PATCH] added extractor 'danbooru' + split BooruExtractor to handle
 XML and  JSON

---
 gallery_dl/extractor/danbooru.py | 62 ++++++++++++++++++++++++++++++++
 gallery_dl/extractor/e621.py     |  8 ++---
 gallery_dl/extractor/gelbooru.py | 41 ++-------------------
 3 files changed, 69 insertions(+), 42 deletions(-)
 create mode 100644 gallery_dl/extractor/danbooru.py

diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
new file mode 100644
index 00000000..9579dea2
--- /dev/null
+++ b/gallery_dl/extractor/danbooru.py
@@ -0,0 +1,62 @@
+from .common import AsyncExtractor
+from ..util import filename_from_url
+import xml.etree.ElementTree as ET
+import json
+import urllib.parse
+
+class BooruExtractor(AsyncExtractor):
+
+    def __init__(self, match, config):
+        AsyncExtractor.__init__(self, config)
+        self.tags      = urllib.parse.unquote(match.group(1))
+        self.category  = "booru"
+        self.params    = {"tags": self.tags}
+        self.page      = "page"
+        self.directory = self.tags.replace("/", "_")
+
+    def update_page(self, reset=False):
+        # Override this method in derived classes if necessary.
+        # It is usually enough to adjust the 'page' attribute
+        if reset is False:
+            self.params[self.page] += 1
+        else:
+            self.params[self.page]  = 1
+
+class JSONBooruExtractor(BooruExtractor):
+
+    def images(self):
+        self.update_page(reset=True)
+        while True:
+            images = json.loads(
+                self.request(self.api_url, verify=True, params=self.params).text
+            )
+            if len(images) == 0:
+                return
+            for img in images:
+                url  = urllib.parse.urljoin(self.api_url, img["file_url"])
+                name = "{}_{}".format(self.category, filename_from_url(url))
+                yield url, name
+            self.update_page()
+
+class XMLBooruExtractor(BooruExtractor):
+
+    def images(self):
+        self.update_page(reset=True)
+        while True:
+            root = ET.fromstring(
+                self.request(self.api_url, verify=True, params=self.params).text
+            )
+            if len(root) == 0:
+                return
+            for item in root:
+                url  = item.attrib["file_url"]
+                name = "{}_{}".format(self.category, filename_from_url(url))
+                yield url, name
+            self.update_page()
+
+class Extractor(JSONBooruExtractor):
+
+    def __init__(self, match, config):
+        JSONBooruExtractor.__init__(self, match, config)
+        self.category = "danbooru"
+        self.api_url  = "https://danbooru.donmai.us/posts.json"
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 7128d86a..81e2a2c6 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -1,8 +1,8 @@
-from .gelbooru import BooruExtractor
+from .danbooru import JSONBooruExtractor
 
-class Extractor(BooruExtractor):
+class Extractor(JSONBooruExtractor):
 
     def __init__(self, match, config):
-        BooruExtractor.__init__(self, match, config)
+        JSONBooruExtractor.__init__(self, match, config)
         self.category = "e621"
-        self.api_url  = "https://e621.net/post/index.xml"
+        self.api_url  = "https://e621.net/post/index.json"
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 5295f1d5..43a9ffcd 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -1,44 +1,9 @@
-from .common import AsyncExtractor
-from ..util import filename_from_url
-import xml.etree.ElementTree as ET
-import urllib.parse
+from .danbooru import XMLBooruExtractor
 
-class BooruExtractor(AsyncExtractor):
+class Extractor(XMLBooruExtractor):
 
     def __init__(self, match, config):
-        AsyncExtractor.__init__(self, config)
-        self.tags      = urllib.parse.unquote(match.group(1))
-        self.category  = "booru"
-        self.params    = {"tags": self.tags}
-        self.page      = "page"
-        self.directory = self.tags.replace("/", "_")
-
-    def images(self):
-        self.update_page(reset=True)
-        while True:
-            root = ET.fromstring(
-                self.request(self.api_url, verify=True, params=self.params).text
-            )
-            if len(root) == 0:
-                return
-            for item in root:
-                url  = item.attrib["file_url"]
-                name = "{}_{}".format(self.category, filename_from_url(url))
-                yield url, name
-            self.update_page()
-
-    def update_page(self, reset=False):
-        # Override this method in derived classes if necessary.
-        # It is usually enough to adjust the 'page' attribute
-        if reset is False:
-            self.params[self.page] += 1
-        else:
-            self.params[self.page]  = 1
-
-class Extractor(BooruExtractor):
-
-    def __init__(self, match, config):
-        BooruExtractor.__init__(self, match, config)
+        XMLBooruExtractor.__init__(self, match, config)
         self.category = "gelbooru"
         self.api_url  = "http://gelbooru.com/"
         self.params   = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}