From 71dbf5979632161df7b98ed475bad4795c51fb28 Mon Sep 17 00:00:00 2001 From: io Date: Fri, 11 Jun 2021 21:29:51 +0000 Subject: [PATCH] add ability to ignore CWs --- README.md | 2 ++ functions.py | 5 +++-- main.py | 43 ++++--------------------------------------- 3 files changed, 9 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index a4c4d86..5ac8ba6 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ This version makes quite a few changes from [the original](https://github.com/Je - Non-Markov stuff - Stores toots in a sqlite database rather than a text file - Doesn't unnecessarily redownload all toots every time +- Ability to ignore specific CWs ## FediBooks Before you use mstdn-ebooks to create your own ebooks bot, I recommend checking out [FediBooks](https://fedibooks.com). Compared to mstdn-ebooks, FediBooks offers a few advantages: @@ -54,6 +55,7 @@ Configuring mstdn-ebooks is accomplished by editing `config.json`. If you want t | cw | null | The content warning (aka subject) mstdn-ebooks will apply to non-error posts. | | instance_blacklist | ["bofa.lol", "witches.town", "knzk.me"] | If your bot is following someone from a blacklisted instance, it will skip over them and not download their posts. This is useful for ensuring that mstdn-ebooks doesn't waste time trying to download posts from dead instances, without you having to unfollow the user(s) from them. | | learn_from_cw | false | If true, mstdn-ebooks will learn from CW'd posts. | +| ignored_cws | [] | If `learn_from_cw` is true, do not learn from posts with these CWs. | mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). | | max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. | | strip_paired_punctuation | false | If true, mstdn-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. | diff --git a/functions.py b/functions.py index d9e38cb..36cbfdb 100755 --- a/functions.py +++ b/functions.py @@ -19,9 +19,10 @@ def make_sentence(output, cfg): db.text_factory = str c = db.cursor() if cfg['learn_from_cw']: - toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall() + ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")" + toots = c.execute(f"SELECT content FROM `toots` WHERE cw NOT IN {ignored_cws_query_params} ORDER BY RANDOM() LIMIT 10000", cfg["ignored_cws"]).fetchall() else: - toots = c.execute("SELECT content FROM `toots` WHERE cw = 0 ORDER BY RANDOM() LIMIT 10000").fetchall() + toots = c.execute("SELECT content FROM `toots` WHERE cw IS NULL ORDER BY RANDOM() LIMIT 10000").fetchall() if len(toots) == 0: output.send("Database is empty! Try running main.py.") diff --git a/main.py b/main.py index d67d377..eb2fb58 100755 --- a/main.py +++ b/main.py @@ -31,7 +31,8 @@ cfg = { "length_lower_limit": 5, "length_upper_limit": 50, "overlap_ratio_enabled": False, - "overlap_ratio": 0.7 + "overlap_ratio": 0.7, + "ignored_cws": [], } try: @@ -94,46 +95,10 @@ following = client.account_following(me.id) db = sqlite3.connect("toots.db") db.text_factory = str c = db.cursor() -c.execute("CREATE TABLE IF NOT EXISTS `toots` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw INT NOT NULL DEFAULT 0, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)") +c.execute("CREATE TABLE IF NOT EXISTS `toots` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw VARCHAR, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)") c.execute("CREATE TRIGGER IF NOT EXISTS `dedup` AFTER INSERT ON toots FOR EACH ROW BEGIN DELETE FROM toots WHERE rowid NOT IN (SELECT MIN(sortid) FROM toots GROUP BY uri ); END; ") db.commit() -tableinfo = c.execute("PRAGMA table_info(`toots`)").fetchall() -found = False -columns = [] -for entry in tableinfo: - if entry[1] == "sortid": - found = True - break - columns.append(entry[1]) - -if not found: - print("Migrating to new database format. Please wait...") - print("WARNING: If any of the accounts your bot is following are Pleroma users, please delete toots.db and run main.py again to create it anew.") - try: - c.execute("DROP TABLE `toots_temp`") - except: - pass - - c.execute("CREATE TABLE `toots_temp` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw INT NOT NULL DEFAULT 0, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)") - for f in following: - user_toots = c.execute("SELECT * FROM `toots` WHERE userid LIKE ? ORDER BY id", (f.id,)).fetchall() - if user_toots is None: - continue - - if columns[-1] == "cw": - for toot in user_toots: - c.execute("INSERT INTO `toots_temp` (id, userid, uri, content, cw) VALUES (?, ?, ?, ?, ?)", toot) - else: - for toot in user_toots: - c.execute("INSERT INTO `toots_temp` (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", toot) - - c.execute("DROP TABLE `toots`") - c.execute("ALTER TABLE `toots_temp` RENAME TO `toots`") - c.execute("CREATE TRIGGER IF NOT EXISTS `dedup` AFTER INSERT ON toots FOR EACH ROW BEGIN DELETE FROM toots WHERE rowid NOT IN (SELECT MIN(sortid) FROM toots GROUP BY uri ); END; ") - -db.commit() - def handleCtrlC(signal, frame): print("\nPREMATURE EVACUATION - Saving chunks") @@ -155,7 +120,7 @@ def insert_toot(oii, acc, post, cursor): # extracted to prevent duplication pid = patterns["pid"].search(oii['object']['id']).group(0) cursor.execute("REPLACE INTO toots (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", ( pid, - 1 if (oii['object']['summary'] is not None and oii['object']['summary'] != "") else 0, + oii['object']['summary'] or None, acc.id, oii['object']['id'], post