[recursive] simplify

pull/4539/head
Mike Fährmann 1 year ago
parent 899df8f237
commit 9f75713e00
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -9,7 +9,6 @@
"""Recursive extractor""" """Recursive extractor"""
from .common import Extractor, Message from .common import Extractor, Message
import requests
import re import re
@ -20,29 +19,13 @@ class RecursiveExtractor(Extractor):
example = "recursive:https://pastebin.com/raw/FLwrCYsT" example = "recursive:https://pastebin.com/raw/FLwrCYsT"
def items(self): def items(self):
self.session.mount("file://", FileAdapter()) url = self.url.partition(":")[2]
page = self.request(self.url.partition(":")[2]).text
del self.session.adapters["file://"]
for match in re.finditer(r"https?://[^\s\"']+", page): if url.startswith("file://"):
yield Message.Queue, match.group(0), {} with open(url[7:]) as fp:
page = fp.read()
class FileAdapter(requests.adapters.BaseAdapter):
"""Requests adapter for local files"""
def send(self, request, **kwargs):
response = requests.Response()
try:
response.raw = open(request.url[7:], "rb")
except OSError:
import io
response.raw = io.BytesIO()
response.status_code = requests.codes.bad_request
else: else:
response.raw.release_conn = response.raw.close page = self.request(url).text
response.status_code = requests.codes.ok
return response
def close(self): for match in re.finditer(r"https?://[^\s\"']+", page):
pass yield Message.Queue, match.group(0), {}

Loading…
Cancel
Save