Support for CW partial regexp match by word. Update documentation with 'How to Use'
This commit is contained in:
parent
59e9efe118
commit
c03c025773
2 changed files with 18 additions and 6 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import sqlite3
|
||||
import markovify
|
||||
import regex
|
||||
|
||||
def make_sentence(cfg):
|
||||
class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences
|
||||
|
|
@ -10,19 +11,21 @@ def make_sentence(cfg):
|
|||
|
||||
db = sqlite3.connect(cfg["db_path"])
|
||||
db.text_factory = str
|
||||
def cw_regexp(x):
|
||||
p = regex.compile(r"\L<words>", words=cfg["ignored_cws"],flags=regex.IGNORECASE)
|
||||
return 1 if p.search(x) else 0
|
||||
db.create_function('cwregexp', 1, cw_regexp)
|
||||
c = db.cursor()
|
||||
if cfg['learn_from_cw']:
|
||||
ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")"
|
||||
toots = c.execute(
|
||||
f"""
|
||||
SELECT content
|
||||
FROM posts
|
||||
WHERE
|
||||
summary IS NULL
|
||||
OR summary NOT IN {ignored_cws_query_params}
|
||||
OR NOT CWREGEXP(summary)
|
||||
ORDER BY RANDOM() LIMIT 10000
|
||||
""",
|
||||
cfg["ignored_cws"],
|
||||
).fetchall()
|
||||
else:
|
||||
toots = c.execute(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue