add `--write-unsupported` option (#15)

7 years ago · 25bcdc8aa9
parent bf452a8516
commit 25bcdc8aa9
3 changed files with 22 additions and 12 deletions
--- a/gallery_dl/init.py
+++ b/gallery_dl/init.py
@ -100,8 +100,14 @@ def main():
                        file = open(args.inputfile)
                    import itertools
                    urls = itertools.chain(urls, sanatize_input(file))
-                except OSError as err:
-                    log.error(err)
+                except OSError as exc:
+                    log.warning("input-file: %s", exc)
+
+            if args.unsupportedfile:
+                try:
+                    job.Job.ufile = open(args.unsupportedfile, "w")
+                except OSError as exc:
+                    log.warning("unsupported-URL file: %s", exc)

            for url in urls:
                try:
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@ -15,6 +15,7 @@ from .extractor.message import Message

 class Job():
    """Base class for Job-types"""
+    ufile = None

    def __init__(self, url):
        self.url = url
@ -111,6 +112,10 @@ class Job():
        kwdict["category"] = self.extractor.category
        kwdict["subcategory"] = self.extractor.subcategory

+    def _write_unsupported(self, url):
+        if self.ufile:
+            print(url, file=self.ufile, flush=True)
+

 class DownloadJob(Job):
    """Download images into appropriate directory/filename locations"""
@ -138,7 +143,7 @@ class DownloadJob(Job):
        try:
            DownloadJob(url).run()
        except exception.NoExtractorError:
-            pass
+            self._write_unsupported(url)

    def handle_headers(self, headers):
        self.get_downloader("http:").set_headers(headers)
@ -205,7 +210,7 @@ class UrlJob(Job):
        Job.__init__(self, url)
        self.depth = depth
        if depth == self.maxdepth:
-            self.handle_queue = self._print
+            self.handle_queue = print

    @staticmethod
    def handle_url(url, _):
@ -215,13 +220,7 @@ class UrlJob(Job):
        try:
            UrlJob(url, self.depth + 1).run()
        except exception.NoExtractorError:
-            pass
-
-    @staticmethod
-    def _print(url):
-        if url.startswith("nofollow:"):
-            url = url[9:]
-        print(url)
+            self._write_unsupported(url)


 class TestJob(DownloadJob):
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@ -67,7 +67,7 @@ def build_parser():
    )
    parser.add_argument(
        "-g", "--get-urls", dest="list_urls", action="count",
-        help="Print download urls",
+        help="Print URLs instead of downloading",
    )
    parser.add_argument(
        "-j", "--dump-json", dest="list_data", action="store_true",
@ -139,6 +139,11 @@ def build_parser():
        metavar="OPT", action=ParseAction, dest="options", default=[],
        help="Additional '<key>=<value>' option values",
    )
+    parser.add_argument(
+        "--write-unsupported", metavar="FILE", dest="unsupportedfile",
+        help=("Write URLs, which get emitted by other extractors but cannot "
+              "be handled, to FILE"),
+    )
    parser.add_argument(
        "--list-extractors", dest="list_extractors", action="store_true",
        help=("Print a list of extractor classes "