fix/improve various things

pull/13/head
Mike Fährmann 8 years ago
parent abfe7456d6
commit ed94d9b92d
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -23,7 +23,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
pattern = [(r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts" pattern = [(r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts"
r"\?(?:utf8=%E2%9C%93&)?tags=([^&]+)")] r"\?(?:utf8=%E2%9C%93&)?tags=([^&]+)")]
test = [("https://danbooru.donmai.us/posts?tags=bonocho", { test = [("https://danbooru.donmai.us/posts?tags=bonocho", {
"url": "fb730af3f5e15650e5d924ffcda54b9ef232b89b", "url": "f94774bcb5169e943efb4d7bb51c47ae786b05f3",
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746", "content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
})] })]

@ -53,16 +53,18 @@ class Job():
def dispatch(self, msg): def dispatch(self, msg):
"""Call the appropriate message handler""" """Call the appropriate message handler"""
if msg[0] == Message.Url and self.pred_url: if msg[0] == Message.Url:
self.update_kwdict(msg[2]) if self.pred_url:
self.handle_url(msg[1], msg[2]) self.update_kwdict(msg[2])
self.handle_url(msg[1], msg[2])
elif msg[0] == Message.Directory: elif msg[0] == Message.Directory:
self.update_kwdict(msg[1]) self.update_kwdict(msg[1])
self.handle_directory(msg[1]) self.handle_directory(msg[1])
elif msg[0] == Message.Queue and self.pred_queue: elif msg[0] == Message.Queue:
self.handle_queue(msg[1]) if self.pred_queue:
self.handle_queue(msg[1])
elif msg[0] == Message.Headers: elif msg[0] == Message.Headers:
self.handle_headers(msg[1]) self.handle_headers(msg[1])

@ -15,15 +15,14 @@ import html
import urllib.parse import urllib.parse
INVALID_XML_CHARS = [ INVALID_XML_CHARS = (1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 18,
chr(c) for c in (1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
]
def clean_xml(xmldata, repl=""): def clean_xml(xmldata, repl=""):
"""Replace/Remove invalid control characters in XML data""" """Replace/Remove invalid control characters in XML data"""
for char in INVALID_XML_CHARS: for char in INVALID_XML_CHARS:
char = chr(char)
if char in xmldata: if char in xmldata:
xmldata = xmldata.replace(char, repl) xmldata = xmldata.replace(char, repl)
return xmldata return xmldata

@ -38,11 +38,11 @@ def main():
config.load() config.load()
for url in urls: for url in urls:
hjob = job.HashJob(url, content=args.content) tjob = job.TestJob(url, content=args.content)
hjob.run() tjob.run()
print(hjob.extractor.__class__.__name__) print(tjob.extractor.__class__.__name__)
print(TESTDATA_FMT.format(url, hjob.hash_url.hexdigest(), print(TESTDATA_FMT.format(url, tjob.hash_url.hexdigest(),
hjob.hash_keyword.hexdigest(), hjob.hash_content.hexdigest())) tjob.hash_keyword.hexdigest(), tjob.hash_content.hexdigest()))
if __name__ == '__main__': if __name__ == '__main__':
main() main()

Loading…
Cancel
Save