# SPDX-License-Identifier: MPL-2.0 import sqlite3 import markovify from random import randint, choice def make_sentence(cfg, keywords): class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences def test_sentence_input(self, sentence): return True # all sentences are valid <3 db = sqlite3.connect(cfg["db_path"]) db.text_factory = str c = db.cursor() if cfg['learn_from_cw']: ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")" toots = c.execute( f""" SELECT content FROM posts WHERE summary IS NULL OR summary NOT IN {ignored_cws_query_params} ORDER BY RANDOM() LIMIT 10000 """, cfg["ignored_cws"], ).fetchall() else: toots = c.execute( """ SELECT content FROM posts WHERE summary IS NULL ORDER BY RANDOM() LIMIT 10000 """, ).fetchall() if not toots: raise ValueError("Database is empty! Try running main.py.") nlt = markovify.NewlineText if cfg['overlap_ratio_enabled'] else nlt_fixed # TODO support replicating \n in output posts instead of squashing them together model = nlt("\n".join(toot[0].replace('\n', ' ') for toot in toots)) db.close() if cfg['limit_length']: sentence_len = randint(cfg['length_lower_limit'], cfg['length_upper_limit']) def make_short_sentence_with_keyword(max_chars, min_chars=0, keywords=None, **kwargs): tries = kwargs.get("tries") for _ in range(tries): if keywords: try: keyword = choice(model.word_split(keywords)) sentence = model.make_sentence_with_start(keyword, strict=False, **kwargs) except: sentence = model.make_sentence(**kwargs) else: sentence = model.make_sentence(**kwargs) if sentence and min_chars <= len(sentence) <= max_chars: return sentence sentence = None tries = 0 for tries in range(10): if (sentence := make_short_sentence_with_keyword( keywords=keywords if cfg['keywords_from_reply'] else None, max_chars=500, tries=10000, max_overlap_ratio=cfg['overlap_ratio'] if cfg['overlap_ratio_enabled'] else 0.7, max_words=sentence_len if cfg['limit_length'] else None )) is not None: break else: raise ValueError("Failed 10 times to produce a sentence!") return sentence