add ability to ignore CWs
This commit is contained in:
parent
a904587b32
commit
71dbf59796
3 changed files with 9 additions and 41 deletions
|
@ -6,6 +6,7 @@ This version makes quite a few changes from [the original](https://github.com/Je
|
|||
- Non-Markov stuff
|
||||
- Stores toots in a sqlite database rather than a text file
|
||||
- Doesn't unnecessarily redownload all toots every time
|
||||
- Ability to ignore specific CWs
|
||||
|
||||
## FediBooks
|
||||
Before you use mstdn-ebooks to create your own ebooks bot, I recommend checking out [FediBooks](https://fedibooks.com). Compared to mstdn-ebooks, FediBooks offers a few advantages:
|
||||
|
@ -54,6 +55,7 @@ Configuring mstdn-ebooks is accomplished by editing `config.json`. If you want t
|
|||
| cw | null | The content warning (aka subject) mstdn-ebooks will apply to non-error posts. |
|
||||
| instance_blacklist | ["bofa.lol", "witches.town", "knzk.me"] | If your bot is following someone from a blacklisted instance, it will skip over them and not download their posts. This is useful for ensuring that mstdn-ebooks doesn't waste time trying to download posts from dead instances, without you having to unfollow the user(s) from them. |
|
||||
| learn_from_cw | false | If true, mstdn-ebooks will learn from CW'd posts. |
|
||||
| ignored_cws | [] | If `learn_from_cw` is true, do not learn from posts with these CWs.
|
||||
| mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). |
|
||||
| max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. |
|
||||
| strip_paired_punctuation | false | If true, mstdn-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. |
|
||||
|
|
|
@ -19,9 +19,10 @@ def make_sentence(output, cfg):
|
|||
db.text_factory = str
|
||||
c = db.cursor()
|
||||
if cfg['learn_from_cw']:
|
||||
toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall()
|
||||
ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")"
|
||||
toots = c.execute(f"SELECT content FROM `toots` WHERE cw NOT IN {ignored_cws_query_params} ORDER BY RANDOM() LIMIT 10000", cfg["ignored_cws"]).fetchall()
|
||||
else:
|
||||
toots = c.execute("SELECT content FROM `toots` WHERE cw = 0 ORDER BY RANDOM() LIMIT 10000").fetchall()
|
||||
toots = c.execute("SELECT content FROM `toots` WHERE cw IS NULL ORDER BY RANDOM() LIMIT 10000").fetchall()
|
||||
|
||||
if len(toots) == 0:
|
||||
output.send("Database is empty! Try running main.py.")
|
||||
|
|
43
main.py
43
main.py
|
@ -31,7 +31,8 @@ cfg = {
|
|||
"length_lower_limit": 5,
|
||||
"length_upper_limit": 50,
|
||||
"overlap_ratio_enabled": False,
|
||||
"overlap_ratio": 0.7
|
||||
"overlap_ratio": 0.7,
|
||||
"ignored_cws": [],
|
||||
}
|
||||
|
||||
try:
|
||||
|
@ -94,46 +95,10 @@ following = client.account_following(me.id)
|
|||
db = sqlite3.connect("toots.db")
|
||||
db.text_factory = str
|
||||
c = db.cursor()
|
||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw INT NOT NULL DEFAULT 0, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)")
|
||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw VARCHAR, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)")
|
||||
c.execute("CREATE TRIGGER IF NOT EXISTS `dedup` AFTER INSERT ON toots FOR EACH ROW BEGIN DELETE FROM toots WHERE rowid NOT IN (SELECT MIN(sortid) FROM toots GROUP BY uri ); END; ")
|
||||
db.commit()
|
||||
|
||||
tableinfo = c.execute("PRAGMA table_info(`toots`)").fetchall()
|
||||
found = False
|
||||
columns = []
|
||||
for entry in tableinfo:
|
||||
if entry[1] == "sortid":
|
||||
found = True
|
||||
break
|
||||
columns.append(entry[1])
|
||||
|
||||
if not found:
|
||||
print("Migrating to new database format. Please wait...")
|
||||
print("WARNING: If any of the accounts your bot is following are Pleroma users, please delete toots.db and run main.py again to create it anew.")
|
||||
try:
|
||||
c.execute("DROP TABLE `toots_temp`")
|
||||
except:
|
||||
pass
|
||||
|
||||
c.execute("CREATE TABLE `toots_temp` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw INT NOT NULL DEFAULT 0, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)")
|
||||
for f in following:
|
||||
user_toots = c.execute("SELECT * FROM `toots` WHERE userid LIKE ? ORDER BY id", (f.id,)).fetchall()
|
||||
if user_toots is None:
|
||||
continue
|
||||
|
||||
if columns[-1] == "cw":
|
||||
for toot in user_toots:
|
||||
c.execute("INSERT INTO `toots_temp` (id, userid, uri, content, cw) VALUES (?, ?, ?, ?, ?)", toot)
|
||||
else:
|
||||
for toot in user_toots:
|
||||
c.execute("INSERT INTO `toots_temp` (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", toot)
|
||||
|
||||
c.execute("DROP TABLE `toots`")
|
||||
c.execute("ALTER TABLE `toots_temp` RENAME TO `toots`")
|
||||
c.execute("CREATE TRIGGER IF NOT EXISTS `dedup` AFTER INSERT ON toots FOR EACH ROW BEGIN DELETE FROM toots WHERE rowid NOT IN (SELECT MIN(sortid) FROM toots GROUP BY uri ); END; ")
|
||||
|
||||
db.commit()
|
||||
|
||||
|
||||
def handleCtrlC(signal, frame):
|
||||
print("\nPREMATURE EVACUATION - Saving chunks")
|
||||
|
@ -155,7 +120,7 @@ def insert_toot(oii, acc, post, cursor): # extracted to prevent duplication
|
|||
pid = patterns["pid"].search(oii['object']['id']).group(0)
|
||||
cursor.execute("REPLACE INTO toots (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", (
|
||||
pid,
|
||||
1 if (oii['object']['summary'] is not None and oii['object']['summary'] != "") else 0,
|
||||
oii['object']['summary'] or None,
|
||||
acc.id,
|
||||
oii['object']['id'],
|
||||
post
|
||||
|
|
Loading…
Reference in a new issue