Support for CW partial regexp match by word. Update documentation with 'How to Use'

2023-07-20 00:10:54 -04:00 · 2023-07-20 00:10:54 -04:00 · c03c025773
commit c03c025773
parent 59e9efe118
2 changed files with 18 additions and 6 deletions
--- a/generators/markov.py
+++ b/generators/markov.py
@ -2,6 +2,7 @@

 import sqlite3
 import markovify
+import regex

 def make_sentence(cfg):
 	class nlt_fixed(markovify.NewlineText):  # modified version of NewlineText that never rejects sentences
@ -10,19 +11,21 @@ def make_sentence(cfg):

 	db = sqlite3.connect(cfg["db_path"])
 	db.text_factory = str
+	def cw_regexp(x):
+		p = regex.compile(r"\L<words>", words=cfg["ignored_cws"],flags=regex.IGNORECASE)
+		return 1 if p.search(x) else 0
+	db.create_function('cwregexp', 1, cw_regexp)
 	c = db.cursor()
 	if cfg['learn_from_cw']:
-		ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")"
 		toots = c.execute(
 			f"""
 			SELECT content
 			FROM posts
 			WHERE
 				summary IS NULL
-				OR summary NOT IN {ignored_cws_query_params}
+				OR NOT CWREGEXP(summary)
 			ORDER BY RANDOM() LIMIT 10000
 			""",
-			cfg["ignored_cws"],
 		).fetchall()
 	else:
 		toots = c.execute(