diff --git a/example-bot.cfg b/example-bot.cfg index 27d2ad8..503c8a5 100644 --- a/example-bot.cfg +++ b/example-bot.cfg @@ -5,6 +5,7 @@ token=[token here] cw=[content warning here; make "none" for no cw] [markov] +exclude_links=false min_notes=5000 max_notes=10000 includeReplies=true diff --git a/roboduck.py b/roboduck.py index 836f265..72844d4 100755 --- a/roboduck.py +++ b/roboduck.py @@ -84,6 +84,11 @@ def get_notes(**kwargs): except (TypeError, ValueError, configparser.NoOptionError): exclude_nsfw = True + try: + exclude_links = check_str_to_bool(config.get("markov", "exclude_links")) + except (TypeError, ValueError, configparser.NoOptionError): + exclude_links = False + run = True oldnote = "" @@ -146,6 +151,9 @@ def get_notes(**kwargs): content = content.replace("::", ": :") # Break long emoji chains content = content.replace("@", "@" + chr(8203)) + if exclude_links: + content = regex.sub(r"(http|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", "", content) + note_dict = {"id": element["id"], "text": content, "timestamp": lastTimestamp, "user_id": userid} return_list.append(note_dict)