Support for CW partial regexp match by word. Update documentation with 'How to Use'

This commit is contained in:
smitten 2023-07-20 00:10:54 -04:00
parent 59e9efe118
commit c03c025773
2 changed files with 18 additions and 6 deletions

View file

@ -2,6 +2,7 @@
import sqlite3
import markovify
import regex
def make_sentence(cfg):
class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences
@ -10,19 +11,21 @@ def make_sentence(cfg):
db = sqlite3.connect(cfg["db_path"])
db.text_factory = str
def cw_regexp(x):
p = regex.compile(r"\L<words>", words=cfg["ignored_cws"],flags=regex.IGNORECASE)
return 1 if p.search(x) else 0
db.create_function('cwregexp', 1, cw_regexp)
c = db.cursor()
if cfg['learn_from_cw']:
ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")"
toots = c.execute(
f"""
SELECT content
FROM posts
WHERE
summary IS NULL
OR summary NOT IN {ignored_cws_query_params}
OR NOT CWREGEXP(summary)
ORDER BY RANDOM() LIMIT 10000
""",
cfg["ignored_cws"],
).fetchall()
else:
toots = c.execute(