Support for CW partial regexp match by word. Update documentation with 'How to Use'
This commit is contained in:
parent
59e9efe118
commit
c03c025773
2 changed files with 18 additions and 6 deletions
15
README.md
15
README.md
|
@ -10,6 +10,15 @@ It's like [@AgathaSorceress's mstdn-ebooks] but it supports Pleroma better.
|
||||||
## Secure Fetch
|
## Secure Fetch
|
||||||
Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by pleroma-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch).
|
Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by pleroma-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch).
|
||||||
|
|
||||||
|
|
||||||
|
## How to Use
|
||||||
|
1. Create your bot account on the server.
|
||||||
|
2. Follow the user(s) you want to base the model on.
|
||||||
|
3. Get an access token for your bot. See [mastodon-bot](https://tinysubversions.com/notes/mastodon-bot/) for details.
|
||||||
|
4. Copy `config.defaults.json` to `config.json` and set as `access_token`. Make any other config tweaks you'd like.
|
||||||
|
5. Run `fetch_posts.py` to collect the posts from the followed user(s).
|
||||||
|
6. Run `gen.py` to generate the sentence and write it to the server.
|
||||||
|
|
||||||
## Compatibility
|
## Compatibility
|
||||||
| Software | Downloading statuses | Posting | Replying |
|
| Software | Downloading statuses | Posting | Replying |
|
||||||
|-----------|-------------------------------------------------------------------|---------|-------------------------------------------------------------|
|
|-----------|-------------------------------------------------------------------|---------|-------------------------------------------------------------|
|
||||||
|
@ -31,11 +40,11 @@ Configuring pleroma-ebooks is accomplished by editing `config.json`. If you want
|
||||||
| site | https://botsin.space | The instance your bot will log in to and post from. This must start with `https://` or `http://` (preferably the latter) |
|
| site | https://botsin.space | The instance your bot will log in to and post from. This must start with `https://` or `http://` (preferably the latter) |
|
||||||
| cw | null | The content warning (aka subject) pleroma-ebooks will apply to non-error posts. |
|
| cw | null | The content warning (aka subject) pleroma-ebooks will apply to non-error posts. |
|
||||||
| learn_from_cw | false | If true, pleroma-ebooks will learn from CW'd posts. |
|
| learn_from_cw | false | If true, pleroma-ebooks will learn from CW'd posts. |
|
||||||
| ignored_cws | [] | If `learn_from_cw` is true, do not learn from posts with these CWs.
|
| ignored_cws | [] | If `learn_from_cw` is true, do not learn from posts with these CW words. matches case-insensitive and by word.
|
||||||
| mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). |
|
| mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). |
|
||||||
| max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. |
|
| max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. |
|
||||||
| strip_paired_punctuation | false | If true, pleroma-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. |
|
| strip_paired_punctuation | false | If true, pleroma-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. |
|
||||||
| limit_length | false | If true, the sentence length will be random between `length_lower_limit` and `length_upper_limit` |
|
| limit_length | false | If true, the sentence word length will be random between `length_lower_limit` and `length_upper_limit` |
|
||||||
| length_lower_limit | 5 | The lower bound in the random number range above. Only matters if `limit_length` is true. |
|
| length_lower_limit | 5 | The lower bound in the random number range above. Only matters if `limit_length` is true. |
|
||||||
| length_upper_limit | 50 | The upper bound in the random number range above. Can be the same as `length_lower_limit` to disable randomness. Only matters if `limit_length` is true. |
|
| length_upper_limit | 50 | The upper bound in the random number range above. Can be the same as `length_lower_limit` to disable randomness. Only matters if `limit_length` is true. |
|
||||||
| overlap_ratio_enabled | false | If true, checks the output's similarity to the original posts. |
|
| overlap_ratio_enabled | false | If true, checks the output's similarity to the original posts. |
|
||||||
|
@ -51,4 +60,4 @@ Please don't feel obligated to donate at all.
|
||||||
|
|
||||||
This is released under the AGPLv3 (only) license, and based on Lynnesbian's fork which is under the MPL 2.0 license. See LICENSE-AGPL.md and LICENSE-MPL for details.
|
This is released under the AGPLv3 (only) license, and based on Lynnesbian's fork which is under the MPL 2.0 license. See LICENSE-AGPL.md and LICENSE-MPL for details.
|
||||||
|
|
||||||
**This means you must publish the source code of any ebooks bot you make with this.** A link back to this repository on your bot's profile page or profile metadata will suffice. If you make changes to the code you need to link to your fork/repo instead
|
**This means you must publish the source code of any ebooks bot you make with this.** A link back to this repository on your bot's profile page or profile metadata will suffice. If you make changes to the code you need to link to your fork/repo instead.
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import markovify
|
import markovify
|
||||||
|
import regex
|
||||||
|
|
||||||
def make_sentence(cfg):
|
def make_sentence(cfg):
|
||||||
class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences
|
class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences
|
||||||
|
@ -10,19 +11,21 @@ def make_sentence(cfg):
|
||||||
|
|
||||||
db = sqlite3.connect(cfg["db_path"])
|
db = sqlite3.connect(cfg["db_path"])
|
||||||
db.text_factory = str
|
db.text_factory = str
|
||||||
|
def cw_regexp(x):
|
||||||
|
p = regex.compile(r"\L<words>", words=cfg["ignored_cws"],flags=regex.IGNORECASE)
|
||||||
|
return 1 if p.search(x) else 0
|
||||||
|
db.create_function('cwregexp', 1, cw_regexp)
|
||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
if cfg['learn_from_cw']:
|
if cfg['learn_from_cw']:
|
||||||
ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")"
|
|
||||||
toots = c.execute(
|
toots = c.execute(
|
||||||
f"""
|
f"""
|
||||||
SELECT content
|
SELECT content
|
||||||
FROM posts
|
FROM posts
|
||||||
WHERE
|
WHERE
|
||||||
summary IS NULL
|
summary IS NULL
|
||||||
OR summary NOT IN {ignored_cws_query_params}
|
OR NOT CWREGEXP(summary)
|
||||||
ORDER BY RANDOM() LIMIT 10000
|
ORDER BY RANDOM() LIMIT 10000
|
||||||
""",
|
""",
|
||||||
cfg["ignored_cws"],
|
|
||||||
).fetchall()
|
).fetchall()
|
||||||
else:
|
else:
|
||||||
toots = c.execute(
|
toots = c.execute(
|
||||||
|
|
Loading…
Reference in a new issue