From 4492f6685c74430f56c32356e90de815c4bfc4cb Mon Sep 17 00:00:00 2001 From: io Date: Mon, 27 Sep 2021 11:10:36 +0000 Subject: [PATCH 01/10] remove repeated call to extract_post_content (#2) fortunately, extract_post_content returns the same thing when called repeatedly, so this wasn't a big deal anyway --- fetch_posts.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fetch_posts.py b/fetch_posts.py index 611604d..784d753 100755 --- a/fetch_posts.py +++ b/fetch_posts.py @@ -137,7 +137,6 @@ class PostFetcher: obj = activity['object'] - content = extract_post_content(obj['content']) await self._db.execute( """ INSERT INTO posts (post_id, summary, content, published_at) From 94949289f07e7f915f640b71179b193356fc1d5b Mon Sep 17 00:00:00 2001 From: io Date: Mon, 27 Sep 2021 11:13:03 +0000 Subject: [PATCH 02/10] normalize Pleroman posts without cws (fix #3) --- fetch_posts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fetch_posts.py b/fetch_posts.py index 784d753..c7e8c9b 100755 --- a/fetch_posts.py +++ b/fetch_posts.py @@ -144,7 +144,9 @@ class PostFetcher: """, ( obj['id'], - obj['summary'], + # Pleroma returns an empty string here for posts without a CW, + # which is semantically incorrect IMO + obj['summary'] or None, extract_post_content(obj['content']), pendulum.parse(obj['published']).astimezone(pendulum.timezone('UTC')).timestamp(), ), From 766b60c09c8ff955049f508eeb7a56f924866ea5 Mon Sep 17 00:00:00 2001 From: io Date: Mon, 27 Sep 2021 11:15:18 +0000 Subject: [PATCH 03/10] accept pleroma-style empty string CWs (fix #3) --- pleroma.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pleroma.py b/pleroma.py index dda9cd0..91ed8e6 100644 --- a/pleroma.py +++ b/pleroma.py @@ -86,7 +86,10 @@ class Pleroma: data['in_reply_to_id'] = in_reply_to_id if visibility is not None: data['visibility'] = visibility - if cw is not None: + # normally, this would be a check against None. + # however, apparently Pleroma serializes posts without CWs as posts with an empty string + # as a CW, so per the robustness principle we'll accept that too. + if cw: data['spoiler_text'] = cw return await self.request('POST', '/api/v1/statuses', data=data) @@ -103,7 +106,7 @@ class Pleroma: content = ''.join('@' + x + ' ' for x in mentioned_accounts.values()) + content visibility = 'unlisted' if to_status['visibility'] == 'public' else to_status['visibility'] - if cw is None and 'spoiler_text' in to_status and to_status['spoiler_text']: + if not cw and 'spoiler_text' in to_status and to_status['spoiler_text']: cw = 're: ' + to_status['spoiler_text'] return await self.post(content, in_reply_to_id=to_status['id'], cw=cw, visibility=visibility) From 6523a28e1194f955f43e7e769a3e907ef94c1863 Mon Sep 17 00:00:00 2001 From: Joel Beckmeyer Date: Wed, 28 Dec 2022 20:22:24 -0500 Subject: [PATCH 04/10] fix some pleroma errors with async and 500 errors (#4) * fix some pleroma errors with async and 500 errors * add better recovery/handling of HTTP 500 * remove unnecessary else --- pleroma.py | 5 +++++ reply.py | 20 ++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pleroma.py b/pleroma.py index 91ed8e6..c726d0d 100644 --- a/pleroma.py +++ b/pleroma.py @@ -21,6 +21,9 @@ def http_session_factory(headers={}): class BadRequest(Exception): pass +class BadResponse(Exception): + pass + class Pleroma: def __init__(self, *, api_base_url, access_token): self.api_base_url = api_base_url.rstrip('/') @@ -50,6 +53,8 @@ class Pleroma: async with self._session.request(method, self.api_base_url + path, **kwargs) as resp: if resp.status == HTTPStatus.BAD_REQUEST: raise BadRequest((await resp.json())['error']) + if resp.status == HTTPStatus.INTERNAL_SERVER_ERROR: + raise BadResponse((await resp.json())) #resp.raise_for_status() return await resp.json() diff --git a/reply.py b/reply.py index c54df27..57fe2a6 100755 --- a/reply.py +++ b/reply.py @@ -22,10 +22,22 @@ class ReplyBot: async for notification in self.pleroma.stream_mentions(): await self.process_notification(notification) - async def process_notification(self, notification): + async def process_notification(self, notification, retry_count=0): acct = "@" + notification['account']['acct'] # get the account's @ post_id = notification['status']['id'] - context = await self.pleroma.status_context(post_id) + + # catch HTTP 500 and backoff on requests + retry_count = retry_count + 1 + try: + context = await self.pleroma.status_context(post_id) + except pleroma.BadResponse as exc: + if retry_count < 3: + await anyio.sleep(2**retry_count) + await self.process_notification(notification, retry_count) + else: + # failed too many times in a row, logging + print(f"Received HTTP 500 {retry_count} times in a row, aborting reply attempt.") + return # check if we've already been participating in this thread if self.check_thread_length(context): @@ -69,12 +81,12 @@ class ReplyBot: await self.pleroma.react(post_id, '✅') async def reply(self, notification): - toot = utils.make_toot(self.cfg) # generate a toot + toot = await utils.make_post(self.cfg) # generate a toot await self.pleroma.reply(notification['status'], toot, cw=self.cfg['cw']) @staticmethod def extract_toot(toot): - text = utils.extract_toot(toot) + text = utils.extract_post_content(toot) text = re.sub(r"^@\S+\s", r"", text) # remove the initial mention text = text.lower() # treat text as lowercase for easier keyword matching (if this bot uses it) return text From 82320560a139d95558d135da11bfefd8e9782471 Mon Sep 17 00:00:00 2001 From: lucdev Date: Wed, 28 Dec 2022 11:09:37 -0300 Subject: [PATCH 05/10] Rate limiter: fix datetime parser (fixes #5) --- utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils.py b/utils.py index 8bb1daa..4a802d6 100644 --- a/utils.py +++ b/utils.py @@ -4,6 +4,7 @@ import anyio import contextlib from functools import wraps from datetime import datetime, timezone +from dateutil.parser import parse as parsedate def as_corofunc(f): @wraps(f) @@ -60,7 +61,7 @@ class _RateLimitContextManager(contextlib.AbstractAsyncContextManager): if resp.headers.get('X-RateLimit-Remaining') not in {'0', '1'}: return resp - await sleep_until(datetime.fromisoformat(resp.headers['X-RateLimit-Reset'])) + await sleep_until(parsedate(resp.headers['X-RateLimit-Reset'])) await self._request_cm.__aexit__(*(None,)*3) return await self.__aenter__() From 03305c4a55715a8ae7d75971624b76d842e2c0c2 Mon Sep 17 00:00:00 2001 From: io Date: Wed, 11 Jan 2023 06:23:42 +0000 Subject: [PATCH 06/10] use external pleroma.py --- fetch_posts.py | 17 ++--- pleroma.py | 164 ------------------------------------------ requirements/base.txt | 1 + utils.py | 46 ++++-------- 4 files changed, 18 insertions(+), 210 deletions(-) delete mode 100644 pleroma.py diff --git a/fetch_posts.py b/fetch_posts.py index c7e8c9b..48859d8 100755 --- a/fetch_posts.py +++ b/fetch_posts.py @@ -10,19 +10,13 @@ import operator import aiosqlite import contextlib from yarl import URL -from pleroma import Pleroma +from pleroma import Pleroma, HandleRateLimits from bs4 import BeautifulSoup from functools import partial from typing import Iterable, NewType -from utils import shield, HandleRateLimits, suppress +from utils import shield, suppress, http_session_factory from third_party.utils import extract_post_content -USER_AGENT = ( - 'pleroma-ebooks; ' - f'{aiohttp.__version__}; ' - f'{platform.python_implementation()}/{platform.python_version()}' -) - UTC = pendulum.timezone('UTC') JSON_CONTENT_TYPE = 'application/json' ACTIVITYPUB_CONTENT_TYPE = 'application/activity+json' @@ -40,11 +34,8 @@ class PostFetcher: Pleroma(api_base_url=self.config['site'], access_token=self.config['access_token']), ) self._http = await stack.enter_async_context( - aiohttp.ClientSession( - headers={ - 'User-Agent': USER_AGENT, - 'Accept': ', '.join([JSON_CONTENT_TYPE, ACTIVITYPUB_CONTENT_TYPE]), - }, + http_session_factory( + headers={'Accept': ', '.join([JSON_CONTENT_TYPE, ACTIVITYPUB_CONTENT_TYPE])}, trust_env=True, raise_for_status=True, ), diff --git a/pleroma.py b/pleroma.py deleted file mode 100644 index c726d0d..0000000 --- a/pleroma.py +++ /dev/null @@ -1,164 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-only - -import sys -import yarl -import json -import hashlib -import aiohttp -from http import HTTPStatus - -def http_session_factory(headers={}): - py_version = '.'.join(map(str, sys.version_info)) - user_agent = ( - 'pleroma-ebooks (https://github.com/ioistired/pleroma-ebooks); ' - 'aiohttp/{aiohttp.__version__}; ' - 'python/{py_version}' - ) - return aiohttp.ClientSession( - headers={'User-Agent': user_agent, **headers}, - ) - -class BadRequest(Exception): - pass - -class BadResponse(Exception): - pass - -class Pleroma: - def __init__(self, *, api_base_url, access_token): - self.api_base_url = api_base_url.rstrip('/') - self.access_token = access_token - self._session = http_session_factory({'Authorization': 'Bearer ' + access_token}) - self._logged_in_id = None - - async def __aenter__(self): - self._session = await self._session.__aenter__() - return self - - async def __aexit__(self, *excinfo): - return await self._session.__aexit__(*excinfo) - - async def request(self, method, path, **kwargs): - # blocklist of some horrible instances - if hashlib.sha256( - yarl.URL(self.api_base_url).host.encode() - + bytes.fromhex('d590e3c48d599db6776e89dfc8ebaf53c8cd84866a76305049d8d8c5d4126ce1') - ).hexdigest() in { - '56704d4d95b882e81c8e7765e9079be0afc4e353925ba9add8fd65976f52db83', - '1932431fa41a0baaccce7815115b01e40e0237035bb155713712075b887f5a19', - 'a42191105a9f3514a1d5131969c07a95e06d0fdf0058f18e478823bf299881c9', - }: - raise RuntimeError('stop being a chud') - - async with self._session.request(method, self.api_base_url + path, **kwargs) as resp: - if resp.status == HTTPStatus.BAD_REQUEST: - raise BadRequest((await resp.json())['error']) - if resp.status == HTTPStatus.INTERNAL_SERVER_ERROR: - raise BadResponse((await resp.json())) - #resp.raise_for_status() - return await resp.json() - - async def verify_credentials(self): - return await self.request('GET', '/api/v1/accounts/verify_credentials') - - me = verify_credentials - - async def _get_logged_in_id(self): - if self._logged_in_id is None: - self._logged_in_id = (await self.me())['id'] - return self._logged_in_id - - async def following(self, account_id=None): - account_id = account_id or await self._get_logged_in_id() - return await self.request('GET', f'/api/v1/accounts/{account_id}/following') - - @staticmethod - def _unpack_id(obj): - if isinstance(obj, dict) and 'id' in obj: - return obj['id'] - return obj - - async def status_context(self, id): - id = self._unpack_id(id) - return await self.request('GET', f'/api/v1/statuses/{id}/context') - - async def post(self, content, *, in_reply_to_id=None, cw=None, visibility=None): - if visibility not in {None, 'private', 'public', 'unlisted', 'direct'}: - raise ValueError('invalid visibility', visibility) - - data = dict(status=content) - if in_reply_to_id := self._unpack_id(in_reply_to_id): - data['in_reply_to_id'] = in_reply_to_id - if visibility is not None: - data['visibility'] = visibility - # normally, this would be a check against None. - # however, apparently Pleroma serializes posts without CWs as posts with an empty string - # as a CW, so per the robustness principle we'll accept that too. - if cw: - data['spoiler_text'] = cw - - return await self.request('POST', '/api/v1/statuses', data=data) - - async def reply(self, to_status, content, *, cw=None): - user_id = await self._get_logged_in_id() - - mentioned_accounts = {} - mentioned_accounts[to_status['account']['id']] = to_status['account']['acct'] - for account in to_status['mentions']: - if account['id'] != user_id and account['id'] not in mentioned_accounts: - mentioned_accounts[account['id']] = account['acct'] - - content = ''.join('@' + x + ' ' for x in mentioned_accounts.values()) + content - - visibility = 'unlisted' if to_status['visibility'] == 'public' else to_status['visibility'] - if not cw and 'spoiler_text' in to_status and to_status['spoiler_text']: - cw = 're: ' + to_status['spoiler_text'] - - return await self.post(content, in_reply_to_id=to_status['id'], cw=cw, visibility=visibility) - - async def favorite(self, id): - id = self._unpack_id(id) - return await self.request('POST', f'/api/v1/statuses/{id}/favourite') - - async def unfavorite(self, id): - id = self._unpack_id(id) - return await self.request('POST', f'/api/v1/statuses/{id}/unfavourite') - - async def react(self, id, reaction): - id = self._unpack_id(id) - return await self.request('PUT', f'/api/v1/pleroma/statuses/{id}/reactions/{reaction}') - - async def remove_reaction(self, id, reaction): - id = self._unpack_id(id) - return await self.request('DELETE', f'/api/v1/pleroma/statuses/{id}/reactions/{reaction}') - - async def pin(self, id): - id = self._unpack_id(id) - return await self.request('POST', f'/api/v1/statuses/{id}/pin') - - async def unpin(self, id): - id = self._unpack_id(id) - return await self.request('POST', f'/api/v1/statuses/{id}/unpin') - - async def stream(self, stream_name, *, target_event_type=None): - async with self._session.ws_connect( - self.api_base_url + f'/api/v1/streaming?stream={stream_name}&access_token={self.access_token}' - ) as ws: - async for msg in ws: - if msg.type == aiohttp.WSMsgType.TEXT: - event = msg.json() - # the only event type that doesn't define `payload` is `filters_changed` - if event['event'] == 'filters_changed': - yield event - elif target_event_type is None or event['event'] == target_event_type: - # don't ask me why the payload is also JSON encoded smh - yield json.loads(event['payload']) - - async def stream_notifications(self): - async for notif in self.stream('user:notification', target_event_type='notification'): - yield notif - - async def stream_mentions(self): - async for notif in self.stream_notifications(): - if notif['type'] == 'mention': - yield notif diff --git a/requirements/base.txt b/requirements/base.txt index 290f67b..fd9f720 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,4 +1,5 @@ beautifulsoup4 ~= 4.9 +pleroma.py ~= 0.0.1 aiohttp ~= 3.0 json5 ~= 0.9.5 anyio ~= 3.0 diff --git a/utils.py b/utils.py index 4a802d6..0bdf8d2 100644 --- a/utils.py +++ b/utils.py @@ -1,10 +1,10 @@ # SPDX-License-Identifier: AGPL-3.0-only +import sys import anyio +import aiohttp import contextlib from functools import wraps -from datetime import datetime, timezone -from dateutil.parser import parse as parsedate def as_corofunc(f): @wraps(f) @@ -36,34 +36,14 @@ def removeprefix(s, prefix): # compatibility for pre-3.9 return s[len(prefix):] if s.startswith(prefix) else s -async def sleep_until(dt): - await anyio.sleep((dt - datetime.now(timezone.utc)).total_seconds()) - -class HandleRateLimits: - def __init__(self, http): - self.http = http - - def request(self, *args, **kwargs): - return _RateLimitContextManager(self.http, args, kwargs) - -class _RateLimitContextManager(contextlib.AbstractAsyncContextManager): - def __init__(self, http, args, kwargs): - self.http = http - self.args = args - self.kwargs = kwargs - - async def __aenter__(self): - self._request_cm = self.http.request(*self.args, **self.kwargs) - return await self._do_enter() - - async def _do_enter(self): - resp = await self._request_cm.__aenter__() - if resp.headers.get('X-RateLimit-Remaining') not in {'0', '1'}: - return resp - - await sleep_until(parsedate(resp.headers['X-RateLimit-Reset'])) - await self._request_cm.__aexit__(*(None,)*3) - return await self.__aenter__() - - async def __aexit__(self, *excinfo): - return await self._request_cm.__aexit__(*excinfo) +def http_session_factory(headers={}, **kwargs): + py_version = '.'.join(map(str, sys.version_info)) + user_agent = ( + 'pleroma-ebooks (https://github.com/ioistired/pleroma-ebooks); ' + f'aiohttp/{aiohttp.__version__}; ' + f'python/{py_version}' + ) + return aiohttp.ClientSession( + headers={'User-Agent': user_agent, **headers}, + **kwargs, + ) From 59e9efe1189f30c4cefb0cc267f6f731cdb86ac8 Mon Sep 17 00:00:00 2001 From: autumn Date: Sun, 5 Mar 2023 21:33:59 +0000 Subject: [PATCH 07/10] remove any generated @'s in replies to prevent accidentally mentioning somebody else on the same instance --- README.md | 3 +++ reply.py | 1 + 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index fda1675..f736ca4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # pleroma-ebooks +this is a very slightly modified version of +current changes: removes any generated @'s in replies to prevent accidentally mentioning somebody else on the same instance + It's like [@AgathaSorceress's mstdn-ebooks] but it supports Pleroma better. [@AgathaSorceress's mstdn-ebooks]: https://github.com/AgathaSorceress/mstdn-ebooks diff --git a/reply.py b/reply.py index 57fe2a6..960067d 100755 --- a/reply.py +++ b/reply.py @@ -82,6 +82,7 @@ class ReplyBot: async def reply(self, notification): toot = await utils.make_post(self.cfg) # generate a toot + toot = re.sub(r"@\S+\s", r"", toot) # remove any generated @'s await self.pleroma.reply(notification['status'], toot, cw=self.cfg['cw']) @staticmethod From c03c0257737ff92657c0b3312404675b90f7327a Mon Sep 17 00:00:00 2001 From: smitten Date: Thu, 20 Jul 2023 00:10:54 -0400 Subject: [PATCH 08/10] Support for CW partial regexp match by word. Update documentation with 'How to Use' --- README.md | 15 ++++++++++++--- generators/markov.py | 9 ++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f736ca4..bcb07f0 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,15 @@ It's like [@AgathaSorceress's mstdn-ebooks] but it supports Pleroma better. ## Secure Fetch Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by pleroma-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch). + +## How to Use +1. Create your bot account on the server. +2. Follow the user(s) you want to base the model on. +3. Get an access token for your bot. See [mastodon-bot](https://tinysubversions.com/notes/mastodon-bot/) for details. +4. Copy `config.defaults.json` to `config.json` and set as `access_token`. Make any other config tweaks you'd like. +5. Run `fetch_posts.py` to collect the posts from the followed user(s). +6. Run `gen.py` to generate the sentence and write it to the server. + ## Compatibility | Software | Downloading statuses | Posting | Replying | |-----------|-------------------------------------------------------------------|---------|-------------------------------------------------------------| @@ -31,11 +40,11 @@ Configuring pleroma-ebooks is accomplished by editing `config.json`. If you want | site | https://botsin.space | The instance your bot will log in to and post from. This must start with `https://` or `http://` (preferably the latter) | | cw | null | The content warning (aka subject) pleroma-ebooks will apply to non-error posts. | | learn_from_cw | false | If true, pleroma-ebooks will learn from CW'd posts. | -| ignored_cws | [] | If `learn_from_cw` is true, do not learn from posts with these CWs. +| ignored_cws | [] | If `learn_from_cw` is true, do not learn from posts with these CW words. matches case-insensitive and by word. | mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). | | max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. | | strip_paired_punctuation | false | If true, pleroma-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. | -| limit_length | false | If true, the sentence length will be random between `length_lower_limit` and `length_upper_limit` | +| limit_length | false | If true, the sentence word length will be random between `length_lower_limit` and `length_upper_limit` | | length_lower_limit | 5 | The lower bound in the random number range above. Only matters if `limit_length` is true. | | length_upper_limit | 50 | The upper bound in the random number range above. Can be the same as `length_lower_limit` to disable randomness. Only matters if `limit_length` is true. | | overlap_ratio_enabled | false | If true, checks the output's similarity to the original posts. | @@ -51,4 +60,4 @@ Please don't feel obligated to donate at all. This is released under the AGPLv3 (only) license, and based on Lynnesbian's fork which is under the MPL 2.0 license. See LICENSE-AGPL.md and LICENSE-MPL for details. -**This means you must publish the source code of any ebooks bot you make with this.** A link back to this repository on your bot's profile page or profile metadata will suffice. If you make changes to the code you need to link to your fork/repo instead +**This means you must publish the source code of any ebooks bot you make with this.** A link back to this repository on your bot's profile page or profile metadata will suffice. If you make changes to the code you need to link to your fork/repo instead. diff --git a/generators/markov.py b/generators/markov.py index 5bf7a0e..4352c5e 100644 --- a/generators/markov.py +++ b/generators/markov.py @@ -2,6 +2,7 @@ import sqlite3 import markovify +import regex def make_sentence(cfg): class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences @@ -10,19 +11,21 @@ def make_sentence(cfg): db = sqlite3.connect(cfg["db_path"]) db.text_factory = str + def cw_regexp(x): + p = regex.compile(r"\L", words=cfg["ignored_cws"],flags=regex.IGNORECASE) + return 1 if p.search(x) else 0 + db.create_function('cwregexp', 1, cw_regexp) c = db.cursor() if cfg['learn_from_cw']: - ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")" toots = c.execute( f""" SELECT content FROM posts WHERE summary IS NULL - OR summary NOT IN {ignored_cws_query_params} + OR NOT CWREGEXP(summary) ORDER BY RANDOM() LIMIT 10000 """, - cfg["ignored_cws"], ).fetchall() else: toots = c.execute( From d08f1f94ec79728964f0b3e96497c2fac2e0d3c6 Mon Sep 17 00:00:00 2001 From: smitten Date: Thu, 20 Jul 2023 00:24:18 -0400 Subject: [PATCH 09/10] Update docs --- README.md | 15 +++++++++------ generators/markov.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index bcb07f0..47d7bcf 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,15 @@ # pleroma-ebooks -this is a very slightly modified version of -current changes: removes any generated @'s in replies to prevent accidentally mentioning somebody else on the same instance +this is a very slightly modified version of which is modified from + +_autumn's changes_: removes any generated @'s in replies to prevent accidentally mentioning somebody else on the same instance + +_smitten's changes_: adjusts CW filtering to be word-based regexp instead of exact match It's like [@AgathaSorceress's mstdn-ebooks] but it supports Pleroma better. [@AgathaSorceress's mstdn-ebooks]: https://github.com/AgathaSorceress/mstdn-ebooks -## Secure Fetch -Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by pleroma-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch). - - ## How to Use 1. Create your bot account on the server. 2. Follow the user(s) you want to base the model on. @@ -18,6 +17,10 @@ Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is 4. Copy `config.defaults.json` to `config.json` and set as `access_token`. Make any other config tweaks you'd like. 5. Run `fetch_posts.py` to collect the posts from the followed user(s). 6. Run `gen.py` to generate the sentence and write it to the server. +7. (optional) set up cron or other periodic auto-run. run `reply.py` in the background. + +## Secure Fetch +Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by pleroma-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch). ## Compatibility | Software | Downloading statuses | Posting | Replying | diff --git a/generators/markov.py b/generators/markov.py index 4352c5e..a777f90 100644 --- a/generators/markov.py +++ b/generators/markov.py @@ -11,8 +11,8 @@ def make_sentence(cfg): db = sqlite3.connect(cfg["db_path"]) db.text_factory = str + p = regex.compile(r"\L", words=cfg["ignored_cws"],flags=regex.IGNORECASE) def cw_regexp(x): - p = regex.compile(r"\L", words=cfg["ignored_cws"],flags=regex.IGNORECASE) return 1 if p.search(x) else 0 db.create_function('cwregexp', 1, cw_regexp) c = db.cursor() From eacfe97d9813dd9c728991463910d6dbd328a981 Mon Sep 17 00:00:00 2001 From: Yuki Date: Thu, 21 Mar 2024 00:51:37 +0100 Subject: [PATCH 10/10] fix some things for sharkey support --- .gitignore | 5 +++++ fetch_posts.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/.gitignore b/.gitignore index 7b8cac2..b8a7f3f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,8 @@ __pycache__/ !*.defaults.json venv/ .venv/ + +bin +lib +lib64 +pyenv.cfg diff --git a/fetch_posts.py b/fetch_posts.py index 48859d8..d5eaa2f 100755 --- a/fetch_posts.py +++ b/fetch_posts.py @@ -128,6 +128,11 @@ class PostFetcher: obj = activity['object'] + try: + obj['summary'] + except KeyError: + obj['summary'] = None + await self._db.execute( """ INSERT INTO posts (post_id, summary, content, published_at)