From 2420a83c42cf23251d9f5507c4d4583f5f2b3892 Mon Sep 17 00:00:00 2001 From: ente Date: Tue, 14 Jun 2022 20:22:34 +0200 Subject: [PATCH 01/10] small readme.md changes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1009b4a..b402bd7 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ to install `MiPA`please use the following commands: `python3.10 -m pip install git+https://github.com/yupix/MiPA.git` `python3.10 -m pip install git+https://github.com/yupix/MiPAC.git` -For the bot to run you also need two additional packages +For the bot to run you also need a few additional packages ``` markovify configparser @@ -41,7 +41,7 @@ configure it according to the configuration section below. The best way to run it would be a `systemd` unit file and run it as a daemon. Just to test it you can use `nohup python3.10 rdbot.py &` in the directory the bot is located in. -### Docker (To be checked!) +### Docker (Will be updated later, so might be broken at the moment!) To host this image with docker, copy the `docker-compose.yml` file to the directory that you want to host it from. From 0fb821b34e5ba2a7826ab483dc23eaf4828c9c95 Mon Sep 17 00:00:00 2001 From: shibao Date: Wed, 15 Jun 2022 19:00:01 -0400 Subject: [PATCH 02/10] fix docker --- Dockerfile | 12 ++++++------ README.md | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index abc4c3c..5ebd700 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3-alpine +FROM python:3.10-alpine RUN apk add --no-cache \ py3-pip \ @@ -10,10 +10,10 @@ RUN apk add --no-cache \ WORKDIR /usr/src/app COPY requirements.txt ./ -RUN pip install --upgrade pip -RUN pip install --no-cache-dir -r requirements.txt -#RUN pip install git+https://github.com/yupix/Mi.py.git@v3.9.91 - +RUN pip3.10 install --upgrade pip +RUN pip3.10 install --no-cache-dir -r requirements.txt +RUN pip3.10 install git+https://github.com/yupix/MiPA.git +RUN pip3.10 install git+https://github.com/yupix/MiPAC.git COPY . . -CMD [ "python", "-u", "./rdbot.py" ] +CMD [ "python3.10", "-u", "rdbot" ] diff --git a/README.md b/README.md index b402bd7..f108af2 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ configure it according to the configuration section below. The best way to run it would be a `systemd` unit file and run it as a daemon. Just to test it you can use `nohup python3.10 rdbot.py &` in the directory the bot is located in. -### Docker (Will be updated later, so might be broken at the moment!) +### Docker To host this image with docker, copy the `docker-compose.yml` file to the directory that you want to host it from. @@ -90,4 +90,4 @@ If an option is missing from the `misskey` part of the config file, the default [Nullobsi](https://github.com/nullobsi) - Added multi-user support
[ThatOneCalculator](https://github.com/ThatOneCalculator) - Option to CW the posts
-Thank you very much! Without your this project wouldn't be on this level! \ No newline at end of file +Thank you very much! Without your this project wouldn't be on this level! From cbbd75b9efe699f64cc86fc0791a76763aea9fab Mon Sep 17 00:00:00 2001 From: shibao Date: Thu, 16 Jun 2022 17:45:56 -0400 Subject: [PATCH 03/10] fix docker exec command --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5ebd700..6eeb566 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,4 +16,4 @@ RUN pip3.10 install git+https://github.com/yupix/MiPA.git RUN pip3.10 install git+https://github.com/yupix/MiPAC.git COPY . . -CMD [ "python3.10", "-u", "rdbot" ] +CMD [ "python3.10", "-u", "rdbot.py" ] From f5b10257897d9a4e3015d0428aece9da1784b55d Mon Sep 17 00:00:00 2001 From: shibao Date: Thu, 16 Jun 2022 17:46:05 -0400 Subject: [PATCH 04/10] fix method call --- rdbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdbot.py b/rdbot.py index 1964b2d..11a2cc2 100755 --- a/rdbot.py +++ b/rdbot.py @@ -62,7 +62,7 @@ if __name__ == "__main__": databasepath = roboduck.Path(__file__).parent.joinpath('roboduck.db') if not (roboduck.os.path.exists(databasepath) and roboduck.os.stat(databasepath).st_size != 0): - init_bot() + roboduck.init_bot() bot = MyBot() asyncio.run(bot.start(url, token)) From 0b1e73de9dfb40e232ef5f8193520b68407989c8 Mon Sep 17 00:00:00 2001 From: ente Date: Sat, 18 Jun 2022 10:23:49 +0200 Subject: [PATCH 05/10] refactor names of existing Misskey functions --- .idea/misc.xml | 2 +- roboduck.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.idea/misc.xml b/.idea/misc.xml index 36599bc..4cb40cc 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/roboduck.py b/roboduck.py index 72844d4..05a9f3b 100755 --- a/roboduck.py +++ b/roboduck.py @@ -20,7 +20,7 @@ def check_str_to_bool(text) -> bool: return True -def get_user_id(username, instance): +def misskey_get_user_id(username, instance): url = "https://" + instance + "/api/users/show" try: req = requests.post(url, json={"username": username, "host": instance}) @@ -31,7 +31,7 @@ def get_user_id(username, instance): return req.json()["id"] -def get_notes(**kwargs): +def misskey_get_notes(**kwargs): note_id = "k" sinceid = "" min_notes = 0 @@ -66,7 +66,7 @@ def get_notes(**kwargs): config.read(os.path.join(os.path.dirname(__file__), 'bot.cfg')) # print(os.path.join(os.path.dirname(__file__), 'bot.cfg')) - userid = get_user_id(username, instance) + userid = misskey_get_user_id(username, instance) # Read & Sanitize Inputs from Config File try: @@ -218,7 +218,7 @@ def clean_database(): for user in config.get("misskey", "users").split(";"): username = user.split("@")[1] instance = user.split("@")[2] - userid = get_user_id(username, instance) + userid = misskey_get_user_id(username, instance) data = database.cursor() data.execute( "DELETE FROM notes WHERE user_id=:user_id AND id NOT IN (SELECT id FROM notes WHERE user_id=:user_id ORDER BY timestamp DESC LIMIT :max );", @@ -333,7 +333,7 @@ def update(): for user in config.get("misskey", "users").split(";"): username = user.split("@")[1] instance = user.split("@")[2] - userid = get_user_id(username, instance) + userid = misskey_get_user_id(username, instance) data = database.cursor() data.execute( "SELECT id FROM notes WHERE timestamp = (SELECT MAX(timestamp) FROM notes WHERE user_id=:user_id) AND user_id=:user_id;", @@ -341,7 +341,7 @@ def update(): sinceNote = data.fetchone()[0] - notesList.extend(get_notes(lastnote=sinceNote, username=username, instance=instance)) + notesList.extend(misskey_get_notes(lastnote=sinceNote, username=username, instance=instance)) if notesList == 0: database.close() @@ -399,7 +399,7 @@ def init_bot(): for user in config.get("misskey", "users").split(";"): print("Try reading first " + str(initnotes) + " notes for " + user + ".") - notesList = get_notes(min_notes=initnotes, username=user.split("@")[1], instance=user.split("@")[2]) + notesList = misskey_get_notes(min_notes=initnotes, username=user.split("@")[1], instance=user.split("@")[2]) print("Writing notes into database...") From 7d20ad0d841c5217537f741692b307f7278fe90d Mon Sep 17 00:00:00 2001 From: ente Date: Sat, 18 Jun 2022 14:36:45 +0200 Subject: [PATCH 06/10] Get `id` from Pleroma and Mastodon --- roboduck.py | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/roboduck.py b/roboduck.py index 05a9f3b..7e822b4 100755 --- a/roboduck.py +++ b/roboduck.py @@ -19,15 +19,35 @@ def check_str_to_bool(text) -> bool: else: return True +def get_endpoint(instance:str) -> str: + #Try Misskey + url = "https://" + instance + "/api/ping" + req = requests.post(url) + if req.status_code == 200: + return "Misskey" + + #Try Mastodon and Pleroma + url = "https://" + instance + "/api/v1/instance" #Pleroma uses the same API as Mastodon + req = requests.get(url) + if req.status_code == 200: + version = req.json()["version"] + + if version.find("(compatible; Pleroma") > 0: #String only available in Pleroma instances. Mastodon will return '-1' + return "Pleroma" + else: + return "Mastodon" + + return "unknown" -def misskey_get_user_id(username, instance): + +def misskey_get_user_id(username:str, instance:str) -> str: url = "https://" + instance + "/api/users/show" try: req = requests.post(url, json={"username": username, "host": instance}) req.raise_for_status() except requests.exceptions.HTTPError as err: print("Couldn't get Username! " + str(err)) - sys.exit(1) + return None return req.json()["id"] @@ -159,6 +179,26 @@ def misskey_get_notes(**kwargs): return return_list +def mastodon_get_user_id(username:str, instance:str) -> str: + url = "https://" + instance + "/api/v1/accounts/lookup?acct=" + username + + try: + req = requests.get(url) + req.raise_for_status() + except requests.exceptions.HTTPError as err: + print("Couldn't get Username! " + str(err)) + return None + return req.json()["id"] + +def mastodon_get_notes(): + print("MASTODON'T NOTES!") #TODO Write routine to get Mastodon notes (check for limiting commands!) + +def pleroma_get_user_id(username:str, instance:str) -> str: + #Pleroma uses the Mastodon API so as a shortcut I just reuse the Mastodon function + return mastodon_get_user_id(username, instance) + +def pleroma_get_notes(): + print("Pleroma notes!") #TODO Write routine to get leroma notes (check for limiting commands) def calculate_markov_chain(): text = "" From 879878638ef9f69647ea6be14561a17e8bdf7cf7 Mon Sep 17 00:00:00 2001 From: ente Date: Sat, 18 Jun 2022 15:25:51 +0200 Subject: [PATCH 07/10] refactor variables and fixed typos --- roboduck.py | 74 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/roboduck.py b/roboduck.py index 7e822b4..85aa964 100755 --- a/roboduck.py +++ b/roboduck.py @@ -19,41 +19,42 @@ def check_str_to_bool(text) -> bool: else: return True -def get_endpoint(instance:str) -> str: - #Try Misskey + +def get_endpoint(instance: str) -> str: + # Try Misskey url = "https://" + instance + "/api/ping" req = requests.post(url) if req.status_code == 200: return "Misskey" - #Try Mastodon and Pleroma - url = "https://" + instance + "/api/v1/instance" #Pleroma uses the same API as Mastodon + # Try Mastodon and Pleroma + url = "https://" + instance + "/api/v1/instance" # Pleroma uses the same API as Mastodon req = requests.get(url) if req.status_code == 200: version = req.json()["version"] - if version.find("(compatible; Pleroma") > 0: #String only available in Pleroma instances. Mastodon will return '-1' - return "Pleroma" + if version.find("(compatible; Pleroma") > 0: # String only available in Pleroma instances. Mastodon will + return "Pleroma" else: return "Mastodon" return "unknown" -def misskey_get_user_id(username:str, instance:str) -> str: +def misskey_get_user_id(username: str, instance: str) -> str: url = "https://" + instance + "/api/users/show" try: req = requests.post(url, json={"username": username, "host": instance}) req.raise_for_status() except requests.exceptions.HTTPError as err: print("Couldn't get Username! " + str(err)) - return None + return "" return req.json()["id"] def misskey_get_notes(**kwargs): note_id = "k" - sinceid = "" + since_id = "" min_notes = 0 notes_list = [] return_list = [] @@ -70,7 +71,7 @@ def misskey_get_notes(**kwargs): elif "lastnote" in kwargs: # print("Lastnote found!") init = False - sinceid = kwargs["lastnote"] + since_id = kwargs["lastnote"] else: print("Wrong arguments given!") @@ -117,7 +118,8 @@ def misskey_get_notes(**kwargs): if (init and len(notes_list) >= min_notes) or (oldnote == note_id): break - if not init: # sinceid should only be used when updating the database so the json object has to be parsed every time + if not init: # sinceid should only be used when updating the database so the json object has to be parsed + # every time api_json = { "userId": userid, "includeReplies": include_replies, @@ -126,7 +128,7 @@ def misskey_get_notes(**kwargs): "withFiles": False, "excludeNsfw": exclude_nsfw, "untilId": note_id, - "sinceId": sinceid} + "sinceId": since_id} else: api_json = { "userId": userid, @@ -159,7 +161,7 @@ def misskey_get_notes(**kwargs): for element in notes_list: last_time = element["createdAt"] - lastTimestamp = int(datetime.timestamp(datetime.strptime(last_time, '%Y-%m-%dT%H:%M:%S.%f%z')) * 1000) + last_timestamp = int(datetime.timestamp(datetime.strptime(last_time, '%Y-%m-%dT%H:%M:%S.%f%z')) * 1000) content = element["text"] @@ -172,14 +174,16 @@ def misskey_get_notes(**kwargs): content = content.replace("@", "@" + chr(8203)) if exclude_links: - content = regex.sub(r"(http|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", "", content) + content = regex.sub(r"(http|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", + "", content) - note_dict = {"id": element["id"], "text": content, "timestamp": lastTimestamp, "user_id": userid} + note_dict = {"id": element["id"], "text": content, "timestamp": last_timestamp, "user_id": userid} return_list.append(note_dict) return return_list -def mastodon_get_user_id(username:str, instance:str) -> str: + +def mastodon_get_user_id(username: str, instance: str) -> str: url = "https://" + instance + "/api/v1/accounts/lookup?acct=" + username try: @@ -187,18 +191,22 @@ def mastodon_get_user_id(username:str, instance:str) -> str: req.raise_for_status() except requests.exceptions.HTTPError as err: print("Couldn't get Username! " + str(err)) - return None + return "" return req.json()["id"] + def mastodon_get_notes(): - print("MASTODON'T NOTES!") #TODO Write routine to get Mastodon notes (check for limiting commands!) + print("MASTODON'T NOTES!") # TODO Write routine to get Mastodon notes (check for limiting commands!) -def pleroma_get_user_id(username:str, instance:str) -> str: - #Pleroma uses the Mastodon API so as a shortcut I just reuse the Mastodon function + +def pleroma_get_user_id(username: str, instance: str) -> str: + # Pleroma uses the Mastodon API so as a shortcut I just reuse the Mastodon function return mastodon_get_user_id(username, instance) + def pleroma_get_notes(): - print("Pleroma notes!") #TODO Write routine to get leroma notes (check for limiting commands) + print("Pleroma notes!") # TODO Write routine to get Pleroma notes (check for limiting commands) + def calculate_markov_chain(): text = "" @@ -261,7 +269,8 @@ def clean_database(): userid = misskey_get_user_id(username, instance) data = database.cursor() data.execute( - "DELETE FROM notes WHERE user_id=:user_id AND id NOT IN (SELECT id FROM notes WHERE user_id=:user_id ORDER BY timestamp DESC LIMIT :max );", + "DELETE FROM notes WHERE user_id=:user_id AND id NOT IN (SELECT id FROM notes WHERE user_id=:user_id " + "ORDER BY timestamp DESC LIMIT :max );", {"user_id": userid, "max": int(max_notes)}) database.commit() @@ -331,7 +340,7 @@ def create_sentence(): min_words = None """ - #Debug section to rpint the used values + #Debug section to print the used values print("These values are used:") print("test_output: " + str(test_output)) print("tries: " + str(tries)) @@ -357,7 +366,7 @@ def create_sentence(): def update(): - notesList = [] + notes_list = [] databasepath = Path(__file__).parent.joinpath('roboduck.db') if not (os.path.exists(databasepath) and os.stat(databasepath).st_size != 0): print("No database found!") @@ -376,20 +385,21 @@ def update(): userid = misskey_get_user_id(username, instance) data = database.cursor() data.execute( - "SELECT id FROM notes WHERE timestamp = (SELECT MAX(timestamp) FROM notes WHERE user_id=:user_id) AND user_id=:user_id;", + "SELECT id FROM notes WHERE timestamp = (SELECT MAX(timestamp) FROM notes WHERE user_id=:user_id) AND " + "user_id=:user_id;", {"user_id": userid}) - sinceNote = data.fetchone()[0] + since_note = data.fetchone()[0] - notesList.extend(misskey_get_notes(lastnote=sinceNote, username=username, instance=instance)) + notes_list.extend(misskey_get_notes(lastnote=since_note, username=username, instance=instance)) - if notesList == 0: + if notes_list == 0: database.close() return print("Insert new notes to database...") database.executemany("INSERT OR IGNORE INTO notes (id, text, timestamp, user_id) VALUES(?, ?, ?, ?)", - [(note["id"], note["text"], note["timestamp"], note["user_id"]) for note in notesList]) + [(note["id"], note["text"], note["timestamp"], note["user_id"]) for note in notes_list]) database.commit() print("Notes updated!") @@ -420,7 +430,7 @@ def init_bot(): with open(databasepath, "w+", encoding="utf-8"): database = sqlite3.connect(databasepath) - print("Connected to roboduck.db succesfull...") + print("Connected to roboduck.db successful...") print("Creating Table...") database.execute("CREATE TABLE notes (id CHAR(10) PRIMARY KEY, text CHAR(5000), timestamp INT, user_id CHAR(10));") @@ -439,12 +449,12 @@ def init_bot(): for user in config.get("misskey", "users").split(";"): print("Try reading first " + str(initnotes) + " notes for " + user + ".") - notesList = misskey_get_notes(min_notes=initnotes, username=user.split("@")[1], instance=user.split("@")[2]) + notes_list = misskey_get_notes(min_notes=initnotes, username=user.split("@")[1], instance=user.split("@")[2]) print("Writing notes into database...") database.executemany("INSERT INTO notes (id, text, timestamp, user_id) VALUES(?, ?, ?, ?)", - [(note["id"], note["text"], note["timestamp"], note["user_id"]) for note in notesList]) + [(note["id"], note["text"], note["timestamp"], note["user_id"]) for note in notes_list]) database.commit() database.close() From 424ebe43f6c5f664873c90ce8db023ef23f89fd3 Mon Sep 17 00:00:00 2001 From: ente Date: Sun, 19 Jun 2022 12:13:15 +0200 Subject: [PATCH 08/10] Changed occurrence of `misskey_get_user_id()` to `get_user_id()` so the right one will be thrown back Minor typos fixed --- roboduck.py | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/roboduck.py b/roboduck.py index 85aa964..a337227 100755 --- a/roboduck.py +++ b/roboduck.py @@ -85,9 +85,9 @@ def misskey_get_notes(**kwargs): # Load configuration config = configparser.ConfigParser() config.read(os.path.join(os.path.dirname(__file__), 'bot.cfg')) - # print(os.path.join(os.path.dirname(__file__), 'bot.cfg')) - userid = misskey_get_user_id(username, instance) + userid = misskey_get_user_id(username, instance) # Here are only Misskey ID is necessary so no need to check + # endpoint again # Read & Sanitize Inputs from Config File try: @@ -208,6 +208,22 @@ def pleroma_get_notes(): print("Pleroma notes!") # TODO Write routine to get Pleroma notes (check for limiting commands) +def get_user_id(username: str, instance: str) -> str: + # Determine API endpoint + api = get_endpoint(instance) + + # Determine how to get User ID on used Software + if api == "Misskey": + return misskey_get_user_id(username, instance) + elif api == "Mastodon": + return mastodon_get_user_id(username, instance) + elif api == "Pleroma": + return pleroma_get_user_id(username, instance) + else: + print("Domain isn't Misskey, Pleroma or Mastodon!\nCheck spelling of the domain!") + sys.exit(1) + + def calculate_markov_chain(): text = "" # Load configuration @@ -215,7 +231,7 @@ def calculate_markov_chain(): config.read(Path(__file__).parent.joinpath('bot.cfg')) try: max_notes = config.get("markov", "max_notes") - except (TypeError, ValueError): + except (TypeError, ValueError, configparser.NoOptionError): max_notes = "10000" databasepath = Path(__file__).parent.joinpath('roboduck.db') @@ -266,7 +282,9 @@ def clean_database(): for user in config.get("misskey", "users").split(";"): username = user.split("@")[1] instance = user.split("@")[2] - userid = misskey_get_user_id(username, instance) + + userid = get_user_id(username, instance) + data = database.cursor() data.execute( "DELETE FROM notes WHERE user_id=:user_id AND id NOT IN (SELECT id FROM notes WHERE user_id=:user_id " @@ -286,42 +304,42 @@ def create_sentence(): # Reading config file bot.cfg with config parser config = configparser.ConfigParser() config.read(Path(__file__).parent.joinpath('bot.cfg')) - # print((Path(__file__).parent).joinpath('bot.cfg')) + # Read & Sanitize Inputs try: test_output = check_str_to_bool(config.get("markov", "test_output")) - except (TypeError, ValueError): + except (TypeError, ValueError, configparser.NoOptionError): # print("test_output: " + str(err)) test_output = True if test_output: try: tries = int(config.get("markov", "tries")) - except (TypeError, ValueError): + except (TypeError, ValueError, configparser.NoOptionError): # print("tries: " + str(err)) tries = 250 try: max_overlap_ratio = float(config.get("markov", "max_overlap_ratio")) - except (TypeError, ValueError): + except (TypeError, ValueError, configparser.NoOptionError): # print("max_overlap_ratio: " + str(err)) max_overlap_ratio = 0.7 try: max_overlap_total = int(config.get("markov", "max_overlap_total")) - except (TypeError, ValueError): + except (TypeError, ValueError, configparser.NoOptionError): # print("max_overlap_total: " + str(err)) max_overlap_total = 10 try: max_words = int(config.get("markov", "max_words")) - except (TypeError, ValueError): + except (TypeError, ValueError, configparser.NoOptionError): # print("max_words: " + str(err)) max_words = None try: min_words = int(config.get("markov", "min_words")) - except (TypeError, ValueError): + except (TypeError, ValueError, configparser.NoOptionError): # print("min_words: " + str(err)) min_words = None @@ -382,7 +400,7 @@ def update(): for user in config.get("misskey", "users").split(";"): username = user.split("@")[1] instance = user.split("@")[2] - userid = misskey_get_user_id(username, instance) + userid = get_user_id(username, instance) data = database.cursor() data.execute( "SELECT id FROM notes WHERE timestamp = (SELECT MAX(timestamp) FROM notes WHERE user_id=:user_id) AND " @@ -433,7 +451,7 @@ def init_bot(): print("Connected to roboduck.db successful...") print("Creating Table...") - database.execute("CREATE TABLE notes (id CHAR(10) PRIMARY KEY, text CHAR(5000), timestamp INT, user_id CHAR(10));") + database.execute("CREATE TABLE notes (id CHAR(20) PRIMARY KEY, text CHAR(5000), timestamp INT, user_id CHAR(10));") print("Table NOTES created...") From b9868b7fba75425181c1c2015d2c416913570494 Mon Sep 17 00:00:00 2001 From: ente Date: Sun, 19 Jun 2022 16:27:33 +0200 Subject: [PATCH 09/10] Implemented reading from Mastodon & Pleroma --- .gitignore | 3 +- roboduck.py | 200 ++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 189 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 2f6867d..7749c17 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ test-rd.py bot.cfg __pycache__ mia-markov-chain -.venv \ No newline at end of file +.venv +notes.txt \ No newline at end of file diff --git a/roboduck.py b/roboduck.py index a337227..6aca6d3 100755 --- a/roboduck.py +++ b/roboduck.py @@ -24,7 +24,7 @@ def get_endpoint(instance: str) -> str: # Try Misskey url = "https://" + instance + "/api/ping" req = requests.post(url) - if req.status_code == 200: + if req.status_code == 200 and ("pong" in req.json()): return "Misskey" # Try Mastodon and Pleroma @@ -195,8 +195,142 @@ def mastodon_get_user_id(username: str, instance: str) -> str: return req.json()["id"] -def mastodon_get_notes(): - print("MASTODON'T NOTES!") # TODO Write routine to get Mastodon notes (check for limiting commands!) +def mastodon_get_notes(**kwargs): + note_id = "k" + since_id = "" + min_notes = 0 + notes_list = [] + return_list = [] + username = kwargs["username"] + instance = kwargs["instance"] + + print("Reading notes for @" + username + "@" + instance + ".") + if kwargs: + if "min_notes" in kwargs: + # print("min_notes found!") + init = True + min_notes = kwargs["min_notes"] + + elif "lastnote" in kwargs: + # print("Lastnote found!") + init = False + since_id = kwargs["lastnote"] + + else: + print("Wrong arguments given!") + print("Exiting routine!") + return + else: + print("No arguments given!") + print("Exiting routine") + return None + + # Load configuration + config = configparser.ConfigParser() + config.read(os.path.join(os.path.dirname(__file__), 'bot.cfg')) + + userid = mastodon_get_user_id(username, instance) # Here are only Mastodon ID is necessary so no need to check + # endpoint again + + # Read & Sanitize Inputs from Config File + try: + include_replies = check_str_to_bool(config.get("markov", "includeReplies")) + except (TypeError, ValueError, configparser.NoOptionError): + include_replies = True + + try: + include_my_renotes = check_str_to_bool(config.get("markov", "includeMyRenotes")) + except (TypeError, ValueError, configparser.NoOptionError): + include_my_renotes = False + + try: + exclude_nsfw = check_str_to_bool(config.get("markov", "excludeNsfw")) + except (TypeError, ValueError, configparser.NoOptionError): + exclude_nsfw = True + + try: + exclude_links = check_str_to_bool(config.get("markov", "exclude_links")) + except (TypeError, ValueError, configparser.NoOptionError): + exclude_links = False + + run = True + oldnote = "" + + base_url = "https://" + instance + "/api/v1/accounts/" + userid + "/statuses?limit=20&exclude_replies="\ + + str(not include_replies) + + if init: + url = base_url + else: + url = base_url + "&since_id=" + since_id + + while run: + + if (init and len(notes_list) >= min_notes) or (oldnote == note_id): + break + + try: + req = requests.get(url) + req.raise_for_status() + except requests.exceptions.HTTPError as err: + print("Couldn't get Posts! " + str(err)) + sys.exit(1) + + for jsonObj in req.json(): + notes_list.append(jsonObj) + if len(notes_list) == 0: + print("No new notes to load!") + return [] + + oldnote = note_id + + note_id = notes_list[len(notes_list)-1]["id"] + + if init: + url = base_url + "&max_id=" + note_id + else: + url = base_url + "&since_id=" + since_id + "&max_id=" + note_id + + print(str(len(notes_list)) + " Notes read.") + print("Processing notes...") + + for element in notes_list: + last_time = element["created_at"] + last_timestamp = int(datetime.timestamp(datetime.strptime(last_time, '%Y-%m-%dT%H:%M:%S.%f%z')) * 1000) + + content = element["content"] + + if content == "" and element["reblog"] is None: # Skips empty notes + continue + elif content == "" and element["reblog"] is not None: + if include_my_renotes: # Add Renotes to Database (if wanted) + content = element["reblog"]["content"] + content = content.replace(chr(8203), "") + else: + continue + + if element["spoiler_text"] != "" and exclude_nsfw: + continue + else: + content = element["spoiler_text"] + " " + content + + content = regex.sub(r"<[^>]+>", '', content) # Remove HTML tags in Note + + content = regex.sub(r"([.,!?])", r"\1 ", content) # Add spaces behind punctuation mark + content = regex.sub(r"\s{2,}", " ", content) # Remove double spaces + content = regex.sub(r"(?>@(?>[\w\-])+)(?>@(?>[\w\-\.])+)?", '', content) # Remove instance name with regular + # expression + content = content.replace("::", ": :") # Break long emoji chains + content = content.replace("@", "@" + chr(8203)) # Add no-length-space behind @ + + if exclude_links: + content = regex.sub(r"(http|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", + "", content) + + note_dict = {"id": element["id"], "text": content, "timestamp": last_timestamp, "user_id": userid} + return_list.append(note_dict) + + return return_list def pleroma_get_user_id(username: str, instance: str) -> str: @@ -204,9 +338,26 @@ def pleroma_get_user_id(username: str, instance: str) -> str: return mastodon_get_user_id(username, instance) -def pleroma_get_notes(): - print("Pleroma notes!") # TODO Write routine to get Pleroma notes (check for limiting commands) +def pleroma_get_notes(**kwargs): + return_list = [] + username = kwargs["username"] + instance = kwargs["instance"] + if kwargs: + if "min_notes" in kwargs: + return_list = mastodon_get_notes(username=username, instance=instance, min_notes=kwargs["min_notes"]) + elif "lastnote" in kwargs: + return_list = mastodon_get_notes(username=username, instance=instance, lastnote=kwargs["lastnote"]) + else: + print("Wrong arguments given!") + print("Exiting routine!") + return + else: + print("No arguments given!") + print("Exiting routine") + return None + + return return_list def get_user_id(username: str, instance: str) -> str: # Determine API endpoint @@ -393,7 +544,7 @@ def update(): with open(databasepath, "a", encoding="utf-8"): database = sqlite3.connect(databasepath) - print("Connected to roboduck.db succesfull...") + print("Connected to roboduck.db successful...") config = configparser.ConfigParser() config.read(Path(__file__).parent.joinpath('bot.cfg')) @@ -409,7 +560,16 @@ def update(): since_note = data.fetchone()[0] - notes_list.extend(misskey_get_notes(lastnote=since_note, username=username, instance=instance)) + api = get_endpoint(instance) + + if api == "Misskey": + notes_list.extend(misskey_get_notes(lastnote=since_note, username=username, instance=instance)) + elif api == "Mastodon": + notes_list.extend(mastodon_get_notes(lastnote=since_note, username=username, instance=instance)) + elif api == "Pleroma": + notes_list.extend(pleroma_get_notes(lastnote=since_note, username=username, instance=instance)) + else: + print("BIG ERROR!") if notes_list == 0: database.close() @@ -427,7 +587,7 @@ def update(): clean_database() print("Database cleaned!") - print("Short sleep to prevent file collison...") + print("Short sleep to prevent file collision...") sleep(10) print("Calculating new Markov Chain...") @@ -451,7 +611,7 @@ def init_bot(): print("Connected to roboduck.db successful...") print("Creating Table...") - database.execute("CREATE TABLE notes (id CHAR(20) PRIMARY KEY, text CHAR(5000), timestamp INT, user_id CHAR(10));") + database.execute("CREATE TABLE notes (id CHAR(20) PRIMARY KEY, text TEXT, timestamp INT, user_id CHAR(20));") print("Table NOTES created...") @@ -459,15 +619,29 @@ def init_bot(): config = configparser.ConfigParser() config.read(Path(__file__).parent.joinpath('bot.cfg')) try: - initnotes = int(config.get("markov", "min_notes")) + init_notes = int(config.get("markov", "min_notes")) except (TypeError, ValueError): # print(err) - initnotes = 1000 + init_notes = 1000 for user in config.get("misskey", "users").split(";"): - print("Try reading first " + str(initnotes) + " notes for " + user + ".") + print("Try reading first " + str(init_notes) + " notes for " + user + ".") + + username = user.split("@")[1] + instance = user.split("@")[2] + + api = get_endpoint(instance) - notes_list = misskey_get_notes(min_notes=initnotes, username=user.split("@")[1], instance=user.split("@")[2]) + print(instance + " is a " + api + " instance.") + + if api == "Misskey": + notes_list = misskey_get_notes(min_notes=init_notes, username=username, instance=instance) + elif api == "Mastodon": + notes_list = mastodon_get_notes(min_notes=init_notes, username=username, instance=instance) + elif api == "Pleroma": + notes_list = pleroma_get_notes(min_notes=init_notes, username=username, instance=instance) + else: + print("BIG ERROR!") print("Writing notes into database...") From 2805f518a4e521d4bbbe0cfb2306ef5e55e667ce Mon Sep 17 00:00:00 2001 From: ente Date: Sun, 19 Jun 2022 16:29:27 +0200 Subject: [PATCH 10/10] changed README.md to reflect recent changes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f108af2..5617a60 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Misskey eBooks Bot with Markov Chain ## Introduction This small python script is a Markov Chain eBooks bot based on the framework of [MiPA](https://github.com/yupix/MiPA.git) -It can only read and write from and to Misskey. Reading from Mastodon or Pleroma is not (yet) implemented. +It can only write to Misskey. It is possible to read from Misskey, Pleroma or Mastodon instances. It posts every hour on his own and reacts to mention. Every 12 hours the bot reloads the notes and recalculates the Markov Chain.