# SPDX-License-Identifier: MPL-2.0

import sqlite3
import markovify
import regex

def make_sentence(cfg):
	class nlt_fixed(markovify.NewlineText):  # modified version of NewlineText that never rejects sentences
		def test_sentence_input(self, sentence):
			return True  # all sentences are valid <3

	db = sqlite3.connect(cfg["db_path"])
	db.text_factory = str
	p = regex.compile(r"\L<words>", words=cfg["ignored_cws"],flags=regex.IGNORECASE)
	def cw_regexp(x):
		return 1 if p.search(x) else 0
	db.create_function('cwregexp', 1, cw_regexp)
	c = db.cursor()
	if cfg['learn_from_cw']:
		toots = c.execute(
			f"""
			SELECT content
			FROM posts
			WHERE
				summary IS NULL
				OR NOT CWREGEXP(summary)
			ORDER BY RANDOM() LIMIT 10000
			""",
		).fetchall()
	else:
		toots = c.execute(
			"""
			SELECT content
			FROM posts
			WHERE summary IS NULL
			ORDER BY RANDOM()
			LIMIT 10000
			""",
		).fetchall()

	if not toots:
		raise ValueError("Database is empty! Try running main.py.")

	nlt = markovify.NewlineText if cfg['overlap_ratio_enabled'] else nlt_fixed

	# TODO support replicating \n in output posts instead of squashing them together
	model = nlt("\n".join(toot[0].replace('\n', ' ') for toot in toots))

	db.close()

	if cfg['limit_length']:
		sentence_len = randint(cfg['length_lower_limit'], cfg['length_upper_limit'])

	sentence = None
	tries = 0
	for tries in range(10):
		if (sentence := model.make_short_sentence(
			max_chars=500,
			tries=10000,
			max_overlap_ratio=cfg['overlap_ratio'] if cfg['overlap_ratio_enabled'] else 0.7,
			max_words=sentence_len if cfg['limit_length'] else None
		)) is not None:
			break
	else:
		raise ValueError("Failed 10 times to produce a sentence!")

	return sentence