From 4492f6685c74430f56c32356e90de815c4bfc4cb Mon Sep 17 00:00:00 2001
From: io <gie9ohbeixah@paperboats.net>
Date: Mon, 27 Sep 2021 11:10:36 +0000
Subject: [PATCH 01/10] remove repeated call to extract_post_content (#2)

fortunately, extract_post_content returns the same thing when called repeatedly,
so this wasn't a big deal anyway
---
 fetch_posts.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fetch_posts.py b/fetch_posts.py
index 611604d..784d753 100755
--- a/fetch_posts.py
+++ b/fetch_posts.py
@@ -137,7 +137,6 @@ class PostFetcher:
 
 		obj = activity['object']
 
-		content = extract_post_content(obj['content'])
 		await self._db.execute(
 			"""
 			INSERT INTO posts (post_id, summary, content, published_at)

From 94949289f07e7f915f640b71179b193356fc1d5b Mon Sep 17 00:00:00 2001
From: io <gie9ohbeixah@paperboats.net>
Date: Mon, 27 Sep 2021 11:13:03 +0000
Subject: [PATCH 02/10] normalize Pleroman posts without cws (fix #3)

---
 fetch_posts.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fetch_posts.py b/fetch_posts.py
index 784d753..c7e8c9b 100755
--- a/fetch_posts.py
+++ b/fetch_posts.py
@@ -144,7 +144,9 @@ class PostFetcher:
 			""",
 			(
 				obj['id'],
-				obj['summary'],
+				# Pleroma returns an empty string here for posts without a CW,
+				# which is semantically incorrect IMO
+				obj['summary'] or None,
 				extract_post_content(obj['content']),
 				pendulum.parse(obj['published']).astimezone(pendulum.timezone('UTC')).timestamp(),
 			),

From 766b60c09c8ff955049f508eeb7a56f924866ea5 Mon Sep 17 00:00:00 2001
From: io <gie9ohbeixah@paperboats.net>
Date: Mon, 27 Sep 2021 11:15:18 +0000
Subject: [PATCH 03/10] accept pleroma-style empty string CWs (fix #3)

---
 pleroma.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pleroma.py b/pleroma.py
index dda9cd0..91ed8e6 100644
--- a/pleroma.py
+++ b/pleroma.py
@@ -86,7 +86,10 @@ class Pleroma:
 			data['in_reply_to_id'] = in_reply_to_id
 		if visibility is not None:
 			data['visibility'] = visibility
-		if cw is not None:
+		# normally, this would be a check against None.
+		# however, apparently Pleroma serializes posts without CWs as posts with an empty string
+		# as a CW, so per the robustness principle we'll accept that too.
+		if cw:
 			data['spoiler_text'] = cw
 
 		return await self.request('POST', '/api/v1/statuses', data=data)
@@ -103,7 +106,7 @@ class Pleroma:
 		content = ''.join('@' + x + ' ' for x in mentioned_accounts.values()) + content
 
 		visibility = 'unlisted' if to_status['visibility'] == 'public' else to_status['visibility']
-		if cw is None and 'spoiler_text' in to_status and to_status['spoiler_text']:
+		if not cw and 'spoiler_text' in to_status and to_status['spoiler_text']:
 			cw = 're: ' + to_status['spoiler_text']
 
 		return await self.post(content, in_reply_to_id=to_status['id'], cw=cw, visibility=visibility)

From 6523a28e1194f955f43e7e769a3e907ef94c1863 Mon Sep 17 00:00:00 2001
From: Joel Beckmeyer <joel@beckmeyer.us>
Date: Wed, 28 Dec 2022 20:22:24 -0500
Subject: [PATCH 04/10] fix some pleroma errors with async and 500 errors (#4)

* fix some pleroma errors with async and 500 errors

* add better recovery/handling of HTTP 500

* remove unnecessary else
---
 pleroma.py |  5 +++++
 reply.py   | 20 ++++++++++++++++----
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/pleroma.py b/pleroma.py
index 91ed8e6..c726d0d 100644
--- a/pleroma.py
+++ b/pleroma.py
@@ -21,6 +21,9 @@ def http_session_factory(headers={}):
 class BadRequest(Exception):
 	pass
 
+class BadResponse(Exception):
+	pass
+
 class Pleroma:
 	def __init__(self, *, api_base_url, access_token):
 		self.api_base_url = api_base_url.rstrip('/')
@@ -50,6 +53,8 @@ class Pleroma:
 		async with self._session.request(method, self.api_base_url + path, **kwargs) as resp:
 			if resp.status == HTTPStatus.BAD_REQUEST:
 				raise BadRequest((await resp.json())['error'])
+			if resp.status == HTTPStatus.INTERNAL_SERVER_ERROR:
+			    raise BadResponse((await resp.json()))
 			#resp.raise_for_status()
 			return await resp.json()
 
diff --git a/reply.py b/reply.py
index c54df27..57fe2a6 100755
--- a/reply.py
+++ b/reply.py
@@ -22,10 +22,22 @@ class ReplyBot:
 			async for notification in self.pleroma.stream_mentions():
 				await self.process_notification(notification)
 
-	async def process_notification(self, notification):
+	async def process_notification(self, notification, retry_count=0):
 		acct = "@" + notification['account']['acct']  # get the account's @
 		post_id = notification['status']['id']
-		context = await self.pleroma.status_context(post_id)
+
+		# catch HTTP 500 and backoff on requests
+		retry_count = retry_count + 1
+		try:
+			context = await self.pleroma.status_context(post_id)
+		except pleroma.BadResponse as exc:
+			if retry_count < 3:
+				await anyio.sleep(2**retry_count)
+				await self.process_notification(notification, retry_count)
+			else:
+				# failed too many times in a row, logging
+				print(f"Received HTTP 500 {retry_count} times in a row, aborting reply attempt.")
+			return
 
 		# check if we've already been participating in this thread
 		if self.check_thread_length(context):
@@ -69,12 +81,12 @@ class ReplyBot:
 			await self.pleroma.react(post_id, '✅')
 
 	async def reply(self, notification):
-		toot = utils.make_toot(self.cfg)  # generate a toot
+		toot = await utils.make_post(self.cfg)  # generate a toot
 		await self.pleroma.reply(notification['status'], toot, cw=self.cfg['cw'])
 
 	@staticmethod
 	def extract_toot(toot):
-		text = utils.extract_toot(toot)
+		text = utils.extract_post_content(toot)
 		text = re.sub(r"^@\S+\s", r"", text)  # remove the initial mention
 		text = text.lower()  # treat text as lowercase for easier keyword matching (if this bot uses it)
 		return text

From 82320560a139d95558d135da11bfefd8e9782471 Mon Sep 17 00:00:00 2001
From: lucdev <me@lucdev.net>
Date: Wed, 28 Dec 2022 11:09:37 -0300
Subject: [PATCH 05/10] Rate limiter: fix datetime parser (fixes #5)

---
 utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils.py b/utils.py
index 8bb1daa..4a802d6 100644
--- a/utils.py
+++ b/utils.py
@@ -4,6 +4,7 @@ import anyio
 import contextlib
 from functools import wraps
 from datetime import datetime, timezone
+from dateutil.parser import parse as parsedate
 
 def as_corofunc(f):
 	@wraps(f)
@@ -60,7 +61,7 @@ class _RateLimitContextManager(contextlib.AbstractAsyncContextManager):
 		if resp.headers.get('X-RateLimit-Remaining') not in {'0', '1'}:
 			return resp
 
-		await sleep_until(datetime.fromisoformat(resp.headers['X-RateLimit-Reset']))
+		await sleep_until(parsedate(resp.headers['X-RateLimit-Reset']))
 		await self._request_cm.__aexit__(*(None,)*3)
 		return await self.__aenter__()
 

From 03305c4a55715a8ae7d75971624b76d842e2c0c2 Mon Sep 17 00:00:00 2001
From: io <gie9ohbeixah@paperboats.net>
Date: Wed, 11 Jan 2023 06:23:42 +0000
Subject: [PATCH 06/10] use external pleroma.py

---
 fetch_posts.py        |  17 ++---
 pleroma.py            | 164 ------------------------------------------
 requirements/base.txt |   1 +
 utils.py              |  46 ++++--------
 4 files changed, 18 insertions(+), 210 deletions(-)
 delete mode 100644 pleroma.py

diff --git a/fetch_posts.py b/fetch_posts.py
index c7e8c9b..48859d8 100755
--- a/fetch_posts.py
+++ b/fetch_posts.py
@@ -10,19 +10,13 @@ import operator
 import aiosqlite
 import contextlib
 from yarl import URL
-from pleroma import Pleroma
+from pleroma import Pleroma, HandleRateLimits
 from bs4 import BeautifulSoup
 from functools import partial
 from typing import Iterable, NewType
-from utils import shield, HandleRateLimits, suppress
+from utils import shield, suppress, http_session_factory
 from third_party.utils import extract_post_content
 
-USER_AGENT = (
-	'pleroma-ebooks; '
-	f'{aiohttp.__version__}; '
-	f'{platform.python_implementation()}/{platform.python_version()}'
-)
-
 UTC = pendulum.timezone('UTC')
 JSON_CONTENT_TYPE = 'application/json'
 ACTIVITYPUB_CONTENT_TYPE = 'application/activity+json'
@@ -40,11 +34,8 @@ class PostFetcher:
 			Pleroma(api_base_url=self.config['site'], access_token=self.config['access_token']),
 		)
 		self._http = await stack.enter_async_context(
-			aiohttp.ClientSession(
-				headers={
-					'User-Agent': USER_AGENT,
-					'Accept': ', '.join([JSON_CONTENT_TYPE, ACTIVITYPUB_CONTENT_TYPE]),
-				},
+			http_session_factory(
+				headers={'Accept': ', '.join([JSON_CONTENT_TYPE, ACTIVITYPUB_CONTENT_TYPE])},
 				trust_env=True,
 				raise_for_status=True,
 			),
diff --git a/pleroma.py b/pleroma.py
deleted file mode 100644
index c726d0d..0000000
--- a/pleroma.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-only
-
-import sys
-import yarl
-import json
-import hashlib
-import aiohttp
-from http import HTTPStatus
-
-def http_session_factory(headers={}):
-	py_version = '.'.join(map(str, sys.version_info))
-	user_agent = (
-		'pleroma-ebooks (https://github.com/ioistired/pleroma-ebooks); '
-		'aiohttp/{aiohttp.__version__}; '
-		'python/{py_version}'
-	)
-	return aiohttp.ClientSession(
-		headers={'User-Agent': user_agent, **headers},
-	)
-
-class BadRequest(Exception):
-	pass
-
-class BadResponse(Exception):
-	pass
-
-class Pleroma:
-	def __init__(self, *, api_base_url, access_token):
-		self.api_base_url = api_base_url.rstrip('/')
-		self.access_token = access_token
-		self._session = http_session_factory({'Authorization': 'Bearer ' + access_token})
-		self._logged_in_id = None
-
-	async def __aenter__(self):
-		self._session = await self._session.__aenter__()
-		return self
-
-	async def __aexit__(self, *excinfo):
-		return await self._session.__aexit__(*excinfo)
-
-	async def request(self, method, path, **kwargs):
-		# blocklist of some horrible instances
-		if hashlib.sha256(
-			yarl.URL(self.api_base_url).host.encode()
-			+ bytes.fromhex('d590e3c48d599db6776e89dfc8ebaf53c8cd84866a76305049d8d8c5d4126ce1')
-		).hexdigest() in {
-			'56704d4d95b882e81c8e7765e9079be0afc4e353925ba9add8fd65976f52db83',
-			'1932431fa41a0baaccce7815115b01e40e0237035bb155713712075b887f5a19',
-			'a42191105a9f3514a1d5131969c07a95e06d0fdf0058f18e478823bf299881c9',
-		}:
-			raise RuntimeError('stop being a chud')
-
-		async with self._session.request(method, self.api_base_url + path, **kwargs) as resp:
-			if resp.status == HTTPStatus.BAD_REQUEST:
-				raise BadRequest((await resp.json())['error'])
-			if resp.status == HTTPStatus.INTERNAL_SERVER_ERROR:
-			    raise BadResponse((await resp.json()))
-			#resp.raise_for_status()
-			return await resp.json()
-
-	async def verify_credentials(self):
-		return await self.request('GET', '/api/v1/accounts/verify_credentials')
-
-	me = verify_credentials
-
-	async def _get_logged_in_id(self):
-		if self._logged_in_id is None:
-			self._logged_in_id = (await self.me())['id']
-		return self._logged_in_id
-
-	async def following(self, account_id=None):
-		account_id = account_id or await self._get_logged_in_id()
-		return await self.request('GET', f'/api/v1/accounts/{account_id}/following')
-
-	@staticmethod
-	def _unpack_id(obj):
-		if isinstance(obj, dict) and 'id' in obj:
-			return obj['id']
-		return obj
-
-	async def status_context(self, id):
-		id = self._unpack_id(id)
-		return await self.request('GET', f'/api/v1/statuses/{id}/context')
-
-	async def post(self, content, *, in_reply_to_id=None, cw=None, visibility=None):
-		if visibility not in {None, 'private', 'public', 'unlisted', 'direct'}:
-			raise ValueError('invalid visibility', visibility)
-
-		data = dict(status=content)
-		if in_reply_to_id := self._unpack_id(in_reply_to_id):
-			data['in_reply_to_id'] = in_reply_to_id
-		if visibility is not None:
-			data['visibility'] = visibility
-		# normally, this would be a check against None.
-		# however, apparently Pleroma serializes posts without CWs as posts with an empty string
-		# as a CW, so per the robustness principle we'll accept that too.
-		if cw:
-			data['spoiler_text'] = cw
-
-		return await self.request('POST', '/api/v1/statuses', data=data)
-
-	async def reply(self, to_status, content, *, cw=None):
-		user_id = await self._get_logged_in_id()
-
-		mentioned_accounts = {}
-		mentioned_accounts[to_status['account']['id']] = to_status['account']['acct']
-		for account in to_status['mentions']:
-			if account['id'] != user_id and account['id'] not in mentioned_accounts:
-				mentioned_accounts[account['id']] = account['acct']
-
-		content = ''.join('@' + x + ' ' for x in mentioned_accounts.values()) + content
-
-		visibility = 'unlisted' if to_status['visibility'] == 'public' else to_status['visibility']
-		if not cw and 'spoiler_text' in to_status and to_status['spoiler_text']:
-			cw = 're: ' + to_status['spoiler_text']
-
-		return await self.post(content, in_reply_to_id=to_status['id'], cw=cw, visibility=visibility)
-
-	async def favorite(self, id):
-		id = self._unpack_id(id)
-		return await self.request('POST', f'/api/v1/statuses/{id}/favourite')
-
-	async def unfavorite(self, id):
-		id = self._unpack_id(id)
-		return await self.request('POST', f'/api/v1/statuses/{id}/unfavourite')
-
-	async def react(self, id, reaction):
-		id = self._unpack_id(id)
-		return await self.request('PUT', f'/api/v1/pleroma/statuses/{id}/reactions/{reaction}')
-
-	async def remove_reaction(self, id, reaction):
-		id = self._unpack_id(id)
-		return await self.request('DELETE', f'/api/v1/pleroma/statuses/{id}/reactions/{reaction}')
-
-	async def pin(self, id):
-		id = self._unpack_id(id)
-		return await self.request('POST', f'/api/v1/statuses/{id}/pin')
-
-	async def unpin(self, id):
-		id = self._unpack_id(id)
-		return await self.request('POST', f'/api/v1/statuses/{id}/unpin')
-
-	async def stream(self, stream_name, *, target_event_type=None):
-		async with self._session.ws_connect(
-			self.api_base_url + f'/api/v1/streaming?stream={stream_name}&access_token={self.access_token}'
-		) as ws:
-			async for msg in ws:
-				if msg.type == aiohttp.WSMsgType.TEXT:
-					event = msg.json()
-					# the only event type that doesn't define `payload` is `filters_changed`
-					if event['event'] == 'filters_changed':
-						yield event
-					elif target_event_type is None or event['event'] == target_event_type:
-						# don't ask me why the payload is also JSON encoded smh
-						yield json.loads(event['payload'])
-
-	async def stream_notifications(self):
-		async for notif in self.stream('user:notification', target_event_type='notification'):
-			yield notif
-
-	async def stream_mentions(self):
-		async for notif in self.stream_notifications():
-			if notif['type'] == 'mention':
-				yield notif
diff --git a/requirements/base.txt b/requirements/base.txt
index 290f67b..fd9f720 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -1,4 +1,5 @@
 beautifulsoup4 ~= 4.9
+pleroma.py ~= 0.0.1
 aiohttp ~= 3.0
 json5 ~= 0.9.5
 anyio ~= 3.0
diff --git a/utils.py b/utils.py
index 4a802d6..0bdf8d2 100644
--- a/utils.py
+++ b/utils.py
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 
+import sys
 import anyio
+import aiohttp
 import contextlib
 from functools import wraps
-from datetime import datetime, timezone
-from dateutil.parser import parse as parsedate
 
 def as_corofunc(f):
 	@wraps(f)
@@ -36,34 +36,14 @@ def removeprefix(s, prefix):
 		# compatibility for pre-3.9
 		return s[len(prefix):] if s.startswith(prefix) else s
 
-async def sleep_until(dt):
-	await anyio.sleep((dt - datetime.now(timezone.utc)).total_seconds())
-
-class HandleRateLimits:
-	def __init__(self, http):
-		self.http = http
-
-	def request(self, *args, **kwargs):
-		return _RateLimitContextManager(self.http, args, kwargs)
-
-class _RateLimitContextManager(contextlib.AbstractAsyncContextManager):
-	def __init__(self, http, args, kwargs):
-		self.http = http
-		self.args = args
-		self.kwargs = kwargs
-
-	async def __aenter__(self):
-		self._request_cm = self.http.request(*self.args, **self.kwargs)
-		return await self._do_enter()
-
-	async def _do_enter(self):
-		resp = await self._request_cm.__aenter__()
-		if resp.headers.get('X-RateLimit-Remaining') not in {'0', '1'}:
-			return resp
-
-		await sleep_until(parsedate(resp.headers['X-RateLimit-Reset']))
-		await self._request_cm.__aexit__(*(None,)*3)
-		return await self.__aenter__()
-
-	async def __aexit__(self, *excinfo):
-		return await self._request_cm.__aexit__(*excinfo)
+def http_session_factory(headers={}, **kwargs):
+	py_version = '.'.join(map(str, sys.version_info))
+	user_agent = (
+		'pleroma-ebooks (https://github.com/ioistired/pleroma-ebooks); '
+		f'aiohttp/{aiohttp.__version__}; '
+		f'python/{py_version}'
+	)
+	return aiohttp.ClientSession(
+		headers={'User-Agent': user_agent, **headers},
+		**kwargs,
+	)

From 59e9efe1189f30c4cefb0cc267f6f731cdb86ac8 Mon Sep 17 00:00:00 2001
From: autumn <code@autumn.is>
Date: Sun, 5 Mar 2023 21:33:59 +0000
Subject: [PATCH 07/10] remove any generated @'s in replies to prevent
 accidentally mentioning somebody else on the same instance

---
 README.md | 3 +++
 reply.py  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/README.md b/README.md
index fda1675..f736ca4 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 # pleroma-ebooks
 
+this is a very slightly modified version of <https://github.com/ioistired/pleroma-ebooks>  
+current changes: removes any generated @'s in replies to prevent accidentally mentioning somebody else on the same instance
+
 It's like [@AgathaSorceress's mstdn-ebooks] but it supports Pleroma better.
 
 [@AgathaSorceress's mstdn-ebooks]: https://github.com/AgathaSorceress/mstdn-ebooks
diff --git a/reply.py b/reply.py
index 57fe2a6..960067d 100755
--- a/reply.py
+++ b/reply.py
@@ -82,6 +82,7 @@ class ReplyBot:
 
 	async def reply(self, notification):
 		toot = await utils.make_post(self.cfg)  # generate a toot
+		toot = re.sub(r"@\S+\s", r"", toot) # remove any generated @'s
 		await self.pleroma.reply(notification['status'], toot, cw=self.cfg['cw'])
 
 	@staticmethod

From c03c0257737ff92657c0b3312404675b90f7327a Mon Sep 17 00:00:00 2001
From: smitten <everything-cozy@pm.me>
Date: Thu, 20 Jul 2023 00:10:54 -0400
Subject: [PATCH 08/10] Support for CW partial regexp match by word. Update
 documentation with 'How to Use'

---
 README.md            | 15 ++++++++++++---
 generators/markov.py |  9 ++++++---
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index f736ca4..bcb07f0 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,15 @@ It's like [@AgathaSorceress's mstdn-ebooks] but it supports Pleroma better.
 ## Secure Fetch
 Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by pleroma-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch).
 
+
+## How to Use
+1. Create your bot account on the server.
+2. Follow the user(s) you want to base the model on.
+3. Get an access token for your bot. See [mastodon-bot](https://tinysubversions.com/notes/mastodon-bot/) for details.
+4. Copy `config.defaults.json` to `config.json` and set as `access_token`. Make any other config tweaks you'd like.
+5. Run `fetch_posts.py` to collect the posts from the followed user(s).
+6. Run `gen.py` to generate the sentence and write it to the server.
+
 ## Compatibility
 | Software  | Downloading statuses                                              | Posting | Replying                                                    |
 |-----------|-------------------------------------------------------------------|---------|-------------------------------------------------------------|
@@ -31,11 +40,11 @@ Configuring pleroma-ebooks is accomplished by editing `config.json`. If you want
 | site                     | https://botsin.space                    | The instance your bot will log in to and post from. This must start with `https://` or `http://` (preferably the latter)                                                                                                                                                                |
 | cw                       | null                                    | The content warning (aka subject) pleroma-ebooks will apply to non-error posts.                                                                                                                                                                                                           |
 | learn_from_cw            | false                                   | If true, pleroma-ebooks will learn from CW'd posts.                                                                                                                                                                                                                                       |
-| ignored_cws              | []                                      | If `learn_from_cw` is true, do not learn from posts with these CWs.
+| ignored_cws              | []                                      | If `learn_from_cw` is true, do not learn from posts with these CW words. matches case-insensitive and by word.
 | mention_handling         | 1                                       | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour).                                                                                                                                            |
 | max_thread_length        | 15                                      | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times.                                                                                                      |
 | strip_paired_punctuation | false                                   | If true, pleroma-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it.                                                                                                          |
-| limit_length             | false                                   | If true, the sentence length will be random between `length_lower_limit` and `length_upper_limit`                                                                                                                                                                                       |
+| limit_length             | false                                   | If true, the sentence word length will be random between `length_lower_limit` and `length_upper_limit`                                                                                                                                                                                       |
 | length_lower_limit       | 5                                       | The lower bound in the random number range above. Only matters if `limit_length` is true.                                                                                                                                                                                               |
 | length_upper_limit       | 50                                      | The upper bound in the random number range above. Can be the same as `length_lower_limit` to disable randomness. Only matters if `limit_length` is true.                                                                                                                                |
 | overlap_ratio_enabled    | false                                   | If true, checks the output's similarity to the original posts.                                                                                                                                                                                                                          |
@@ -51,4 +60,4 @@ Please don't feel obligated to donate at all.
 
 This is released under the AGPLv3 (only) license, and based on Lynnesbian's fork which is under the MPL 2.0 license. See LICENSE-AGPL.md and LICENSE-MPL for details.
 
-**This means you must publish the source code of any ebooks bot you make with this.** A link back to this repository on your bot's profile page or profile metadata will suffice. If you make changes to the code you need to link to your fork/repo instead
+**This means you must publish the source code of any ebooks bot you make with this.** A link back to this repository on your bot's profile page or profile metadata will suffice. If you make changes to the code you need to link to your fork/repo instead.
diff --git a/generators/markov.py b/generators/markov.py
index 5bf7a0e..4352c5e 100644
--- a/generators/markov.py
+++ b/generators/markov.py
@@ -2,6 +2,7 @@
 
 import sqlite3
 import markovify
+import regex
 
 def make_sentence(cfg):
 	class nlt_fixed(markovify.NewlineText):  # modified version of NewlineText that never rejects sentences
@@ -10,19 +11,21 @@ def make_sentence(cfg):
 
 	db = sqlite3.connect(cfg["db_path"])
 	db.text_factory = str
+	def cw_regexp(x):
+		p = regex.compile(r"\L<words>", words=cfg["ignored_cws"],flags=regex.IGNORECASE)
+		return 1 if p.search(x) else 0
+	db.create_function('cwregexp', 1, cw_regexp)
 	c = db.cursor()
 	if cfg['learn_from_cw']:
-		ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")"
 		toots = c.execute(
 			f"""
 			SELECT content
 			FROM posts
 			WHERE
 				summary IS NULL
-				OR summary NOT IN {ignored_cws_query_params}
+				OR NOT CWREGEXP(summary)
 			ORDER BY RANDOM() LIMIT 10000
 			""",
-			cfg["ignored_cws"],
 		).fetchall()
 	else:
 		toots = c.execute(

From d08f1f94ec79728964f0b3e96497c2fac2e0d3c6 Mon Sep 17 00:00:00 2001
From: smitten <everything-cozy@pm.me>
Date: Thu, 20 Jul 2023 00:24:18 -0400
Subject: [PATCH 09/10] Update docs

---
 README.md            | 15 +++++++++------
 generators/markov.py |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index bcb07f0..47d7bcf 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,15 @@
 # pleroma-ebooks
 
-this is a very slightly modified version of <https://github.com/ioistired/pleroma-ebooks>  
-current changes: removes any generated @'s in replies to prevent accidentally mentioning somebody else on the same instance
+this is a very slightly modified version of <https://codeberg.org/autumn/pleroma-ebooks> which is modified from <https://github.com/ioistired/pleroma-ebooks>  
+
+_autumn's changes_: removes any generated @'s in replies to prevent accidentally mentioning somebody else on the same instance
+
+_smitten's changes_: adjusts CW filtering to be word-based regexp instead of exact match
 
 It's like [@AgathaSorceress's mstdn-ebooks] but it supports Pleroma better.
 
 [@AgathaSorceress's mstdn-ebooks]: https://github.com/AgathaSorceress/mstdn-ebooks
 
-## Secure Fetch
-Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by pleroma-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch).
-
-
 ## How to Use
 1. Create your bot account on the server.
 2. Follow the user(s) you want to base the model on.
@@ -18,6 +17,10 @@ Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is
 4. Copy `config.defaults.json` to `config.json` and set as `access_token`. Make any other config tweaks you'd like.
 5. Run `fetch_posts.py` to collect the posts from the followed user(s).
 6. Run `gen.py` to generate the sentence and write it to the server.
+7. (optional) set up cron or other periodic auto-run. run `reply.py` in the background.
+
+## Secure Fetch
+Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by pleroma-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch).
 
 ## Compatibility
 | Software  | Downloading statuses                                              | Posting | Replying                                                    |
diff --git a/generators/markov.py b/generators/markov.py
index 4352c5e..a777f90 100644
--- a/generators/markov.py
+++ b/generators/markov.py
@@ -11,8 +11,8 @@ def make_sentence(cfg):
 
 	db = sqlite3.connect(cfg["db_path"])
 	db.text_factory = str
+	p = regex.compile(r"\L<words>", words=cfg["ignored_cws"],flags=regex.IGNORECASE)
 	def cw_regexp(x):
-		p = regex.compile(r"\L<words>", words=cfg["ignored_cws"],flags=regex.IGNORECASE)
 		return 1 if p.search(x) else 0
 	db.create_function('cwregexp', 1, cw_regexp)
 	c = db.cursor()

From eacfe97d9813dd9c728991463910d6dbd328a981 Mon Sep 17 00:00:00 2001
From: Yuki <yuki@possum.city>
Date: Thu, 21 Mar 2024 00:51:37 +0100
Subject: [PATCH 10/10] fix some things for sharkey support

---
 .gitignore     | 5 +++++
 fetch_posts.py | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/.gitignore b/.gitignore
index 7b8cac2..b8a7f3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,8 @@ __pycache__/
 !*.defaults.json
 venv/
 .venv/
+
+bin
+lib
+lib64
+pyenv.cfg
diff --git a/fetch_posts.py b/fetch_posts.py
index 48859d8..d5eaa2f 100755
--- a/fetch_posts.py
+++ b/fetch_posts.py
@@ -128,6 +128,11 @@ class PostFetcher:
 
 		obj = activity['object']
 
+		try:
+			obj['summary']
+		except KeyError:
+			obj['summary'] = None
+
 		await self._db.execute(
 			"""
 			INSERT INTO posts (post_id, summary, content, published_at)