possumbot/lib/fedimbed.js
2024-08-27 15:30:14 +00:00

580 lines
18 KiB
JavaScript

import httpSignature from "@peertube/http-signature";
import path from "node:path";
import fs from "node:fs";
const FRIENDLY_USERAGENT = "PossumBot/1.0 (+https://bot.possum.city/)";
const URLS_REGEX = /(?:\s|^|\]\()(\|\|\s*)?(https?:\/\/[^\s<]+[^<.,:;"'\]|)\s])(\s*\)?\|\||\s*[\S]*?\))?/g;
const PATH_REGEX = {
mastodon: /^\/@(.+?)\/(\d+)\/?/,
mastodon2: /^\/(.+?)\/statuses\/\d+\/?/,
pleroma: /^\/objects\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\/?/,
pleroma2: /^\/notice\/[A-Za-z0-9]+\/?/,
misskey: /^\/notes\/[a-z0-9]+\/?/,
gotosocial: /^\/@(.+?)\/statuses\/[0-9A-Z]+\/?/,
lemmy: /^\/post\/\d+\/?/,
honk: /^\/u\/(.+?)\/h\/(.+?)\/?/,
cohost: /^\/[A-Za-z0-9]+\/post\/\d+-[A-Za-z0-9-]+\/?/,
};
const domainCache = new Map();
domainCache.set("cohost.org", "cohost"); // no nodeinfo
async function resolvePlatform(url) {
const urlObj = new URL(url);
if(domainCache.has(urlObj.hostname)) return domainCache.get(urlObj.hostname);
const res = await fetch(urlObj.origin + "/.well-known/nodeinfo", {
headers: {"User-Agent": FRIENDLY_USERAGENT},
}).then((res) => res.text());
if(!res.startsWith("{")) {
console.error("[fedimbed]", `No nodeinfo for "${urlObj.hostname}"???`);
domainCache.set(urlObj.hostname, null);
return null;
}
const probe = JSON.parse(res);
if(!probe?.links) {
console.error("[fedimbed]", `No nodeinfo for "${urlObj.hostname}"???`);
domainCache.set(urlObj.hostname, null);
return null;
}
const nodeinfo = await fetch(probe.links[probe.links.length - 1].href, {
headers: {"User-Agent": FRIENDLY_USERAGENT},
}).then((res) => res.json());
if(!nodeinfo?.software?.name) {
console.error("[fedimbed]", `Got nodeinfo for "${urlObj.hostname}", but missing software name.`);
domainCache.set(urlObj.hostname, null);
return null;
}
domainCache.set(urlObj.hostname, nodeinfo.software.name);
return nodeinfo.software.name;
}
const keyId = "https://" + process.env.AP_FETCH_DOMAIN + "/actor#main-key";
const privKey = fs.readFileSync("data/private.pem");
async function signedFetch(url, options) {
const urlObj = new URL(url);
const headers = {
host: urlObj.host,
date: new Date().toUTCString(),
};
const headerNames = ["(request-target)", "host", "date"];
httpSignature.sign(
{
getHeader: (name) => headers[name.toLowerCase()],
setHeader: (name, value) => (headers[name] = value),
method: options.method ?? "GET",
path: urlObj.pathname,
},
{
keyId,
key: privKey,
headers: headerNames,
authorizationHeaderName: "signature",
}
);
options.headers = Object.assign(headers, options.headers ?? {});
return await fetch(url, options);
}
async function processUrl(url) {
let spoiler = false;
let invalidUrl = false;
let urlObj;
try {
urlObj = new URL(url);
} catch(err) {
console.error("[fedimbed]", err);
invalidUrl = true;
}
if(invalidUrl) return {};
// some lemmy instances have old reddit frontend subdomains
// but these frontends are just frontends and dont actually expose the API
if(urlObj.hostname.startsWith("old.")) {
urlObj.hostname = urlObj.hostname.replace("old.", "");
url = urlObj.href;
}
let platform = (await resolvePlatform(url)) ?? "<no nodeinfo>";
let platformName = platform
.replace("gotosocial", "GoToSocial")
.replace("birdsitelive", '"Twitter" (BirdsiteLive)')
.replace(/^(.)/, (_, c) => c.toUpperCase())
.replace("Cohost", "cohost");
const files = [];
let content,
cw,
author,
timestamp,
title,
poll,
emotes = [],
sensitive = false;
// Fetch post
let rawPostData;
try {
rawPostData = await signedFetch(url, {
headers: {
"User-Agent": FRIENDLY_USERAGENT,
Accept: 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"',
},
}).then((res) => res.text());
} catch (err) {
console.error("[fedimbed]", `Failed to signed fetch "${url}", retrying unsigned: ${err}`);
}
if(!rawPostData) {
try {
rawPostData = await fetch(url, {
headers: {
"User-Agent": FRIENDLY_USERAGENT,
Accept: 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"',
},
}).then((res) => res.text());
} catch (err) {
console.error("[fedimbed]", `Failed to fetch "${url}": ${err}`);
}
}
let postData;
if(rawPostData?.startsWith("{")) {
try {
postData = JSON.parse(rawPostData);
} catch (err) {
console.error("[fedimbed]", `Failed to decode JSON for "${url}": ${err}\n "${rawPostData}"`);
}
} else {
console.warn("[fedimbed]", `Got non-JSON for "${url}": ${rawPostData}`);
}
if(postData?.error) {
console.error("[fedimbed]", `Received error for "${url}": ${postData.error}`);
console.error("[fedimbed]", postData);
return postData;
}
if(!postData) {
// We failed to get post.
// Assume it was due to AFM or forced HTTP signatures and use MastoAPI
// Follow redirect from /object since we need the ID from /notice
if(PATH_REGEX.pleroma.test(urlObj.pathname)) {
url = await signedFetch(url, {
method: "HEAD",
headers: {
"User-Agent": FRIENDLY_USERAGENT,
},
redirect: "manual",
}).then((res) => res.headers.get("location"));
if(url.startsWith("/")) {
url = urlObj.origin + url;
}
urlObj = new URL(url);
}
let redirUrl;
const options = {};
const headers = {};
if(PATH_REGEX.pleroma2.test(urlObj.pathname)) {
redirUrl = url.replace("notice", "api/v1/statuses");
} else if(PATH_REGEX.mastodon.test(urlObj.pathname)) {
const postId = urlObj.pathname.match(PATH_REGEX.mastodon)?.[2];
redirUrl = urlObj.origin + "/api/v1/statuses/" + postId;
} else if(PATH_REGEX.mastodon2.test(urlObj.pathname)) {
redirUrl = url.replace(/^\/(.+?)\/statuses/, "/api/v1/statuses");
} else if(PATH_REGEX.misskey.test(urlObj.pathname)) {
let noteId = url.split("/notes/")[1];
if(noteId.indexOf("/") > -1) {
noteId = noteId.split("/")[0];
} else if(noteId.indexOf("?") > -1) {
noteId = noteId.split("?")[0];
} else if(noteId.indexOf("#") > -1) {
noteId = noteId.split("#")[0];
}
console.log("[fedimbed]", "Misskey post ID: " + noteId);
redirUrl = urlObj.origin + "/api/notes/show/";
options.method = "POST";
options.body = JSON.stringify({noteId});
headers["Content-Type"] = "application/json";
} else {
console.error("[fedimbed]", `Missing MastoAPI replacement for "${platform}"`);
}
if(redirUrl) {
console.log(
"[fedimbed]",
`Redirecting "${url}" to "${redirUrl}": ${JSON.stringify(options)}, ${JSON.stringify(headers)}`
);
let rawPostData2;
try {
rawPostData2 = await signedFetch(
redirUrl,
Object.assign(options, {
headers: Object.assign(headers, {
"User-Agent": FRIENDLY_USERAGENT,
}),
})
).then((res) => res.text());
} catch (err) {
console.error("[fedimbed]", `Failed to signed fetch "${url}" via MastoAPI, retrying unsigned: ${err}`);
}
if(!rawPostData2) {
try {
rawPostData2 = await signedFetch(
redirUrl,
Object.assign(options, {
headers: Object.assign(headers, {
"User-Agent": FRIENDLY_USERAGENT,
}),
})
).then((res) => res.text());
} catch (err) {
console.error("[fedimbed]", `Failed to fetch "${url}" via MastoAPI: ${err}`);
}
}
let postData2;
if(rawPostData2?.startsWith("{")) {
postData2 = JSON.parse(rawPostData2);
} else {
console.warn("[fedimbed]", `Got non-JSON for "${url}" via MastoAPI: ${rawPostData2}`);
}
if(!postData2) {
console.warn("[fedimbed]", `Bailing trying to re-embed "${url}": Failed to get post from normal and MastoAPI.`);
} else if(postData2.error) {
console.error(
"[fedimbed]",
`Bailing trying to re-embed "${url}", MastoAPI gave us error: ${JSON.stringify(postData2.error)}`
);
} else {
cw = postData2.spoiler_warning ?? postData2.spoiler_text ?? postData2.cw;
content =
postData2.akkoma?.source?.content ??
postData2.pleroma?.content?.["text/plain"] ??
postData2.text ??
postData2.content;
author = {
name:
postData2.account?.display_name ??
postData2.account?.username ??
postData2.user?.name ??
postData2.user?.username,
handle:
postData2.account?.fqn ?? `${postData2.account?.username ?? postData2.user?.username}@${urlObj.hostname}`,
url: postData2.account?.url ?? `${urlObj.origin}/@${postData2.account?.username ?? postData2.user?.username}`,
avatar: postData2.account?.avatar ?? postData2.user?.avatarUrl,
};
timestamp = postData2.created_at ?? postData2.createdAt;
emotes = postData2.emojis.filter((x) => !x.name.endsWith("@.")).map((x) => ({name: `:${x.name}:`, url: x.url}));
sensitive = postData2.sensitive;
const attachments = postData2.media_attachments ?? postData2.files;
if(attachments) {
for(const attachment of attachments) {
const contentType = await fetch(attachment.url, {
method: "HEAD",
}).then((res) => res.headers.get("Content-Type"));
if(contentType) {
if(contentType.startsWith("image/") || contentType.startsWith("video/") || contentType.startsWith("audio/")) {
files.push({
url: attachment.url,
desc: attachment.description ?? attachment.comment,
type: contentType
});
}
} else {
const type = attachment.type?.toLowerCase();
const fileType =
attachment.pleroma?.mime_type ?? type.indexOf("/") > -1
? type
: type +
"/" +
(url.match(/\.([a-z0-9]{3,4})$/)?.[0] ?? type == "image"
? "png"
: type == "video"
? "mp4"
: "mpeg");
if(type.startsWith("image") || type.startsWith("video") || type.startsWith("audio")) {
files.push({
url: attachment.url,
desc: attachment.description ?? attachment.comment,
type: type
});
}
}
}
}
if(postData2.sensitive && attachments.length > 0) {
spoiler = true;
}
if(postData2.poll) {
poll = {
end: new Date(postData2.poll.expires_at),
total: postData2.poll.votes_count,
options: postData2.poll.options.map((o) => ({
name: o.title,
count: o.votes_count,
})),
};
}
}
}
} else {
if(postData.id) {
const realUrlObj = new URL(postData.id);
if(realUrlObj.origin != urlObj.origin) {
platform = await resolvePlatform(postData.id);
platformName = platform.replace("gotosocial", "GoToSocial").replace(/^(.)/, (_, c) => c.toUpperCase());
url = postData.id;
}
}
content = postData._misskey_content ?? postData.source?.content ?? postData.content;
cw = postData.summary;
timestamp = postData.published;
sensitive = postData.sensitive;
if(postData.tag) {
let tag = postData.tag;
// gts moment
if(!Array.isArray(tag)) tag = [tag];
emotes = tag.filter((x) => !!x.icon).map((x) => ({name: x.name, url: x.icon.url}));
}
// NB: gts doesnt send singular attachments as array
const attachments = Array.isArray(postData.attachment) ? postData.attachment : [postData.attachment];
for(const attachment of attachments) {
if(!attachment) continue;
if(attachment.mediaType) {
if(attachment.mediaType.startsWith("video/") || attachment.mediaType.startsWith("image/") || attachment.mediaType.startsWith("audio/")) {
files.push({
url: attachment.url,
desc: attachment.name ?? attachment.description ?? attachment.comment,
type: attachment.mediaType,
});
}
} else if(attachment.url) {
const contentType = await fetch(attachment.url, {
method: "HEAD",
}).then((res) => res.headers.get("Content-Type"));
if(contentType) {
if(contentType.startsWith("image/") || contentType.startsWith("video/") || contentType.startsWith("audio/")) {
files.push({
url: attachment.url,
desc: attachment.name ?? attachment.description ?? attachment.comment,
type: contentType
});
}
} else {
const type = attachment.type?.toLowerCase();
const fileType =
type.indexOf("/") > -1
? type
: type +
"/" +
(url.match(/\.([a-z0-9]{3,4})$/)?.[0] ?? type == "image" ? "png" : type == "video" ? "mp4" : "mpeg");
if(type.startsWith("image") || type.startsWith("video") || type.startsWith("audio")) {
files.push({
url: attachment.url,
desc: attachment.name ?? attachment.description ?? attachment.comment,
type: type
});
}
}
} else {
console.warn("[fedimbed]", `Unhandled attachment structure! ${JSON.stringify(attachment)}`);
}
}
if(postData.sensitive && attachments.length > 0) {
spoiler = true;
}
if(postData.image?.url) {
const imageUrl = new URL(postData.image.url);
const contentType = await fetch(postData.image.url, {
method: "HEAD",
}).then((res) => res.headers.get("Content-Type"));
files.push({
url: postData.image.url,
desc: "",
type: contentType ?? "image/" + imageUrl.pathname.substring(imageUrl.pathname.lastIndexOf(".") + 1),
});
}
if(postData.name) title = postData.name;
// Author data is not sent with the post with AS2
const authorData = await signedFetch(postData.actor ?? postData.attributedTo, {
headers: {
"User-Agent": FRIENDLY_USERAGENT,
Accept: 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"',
},
})
.then((res) => res.json())
.catch((err) => {
// only posts can be activity+json right now, reduce log spam
if(platform !== "cohost") console.error("[fedimbed]", `Failed to get author for "${url}": ${err}`);
});
if(authorData) {
const authorUrlObj = new URL(authorData.url ?? authorData.id);
author = {
name: authorData.name,
handle: `${authorData.preferredUsername}@${authorUrlObj.hostname}`,
url: authorData.url,
avatar: authorData.icon?.url,
};
} else {
// bootleg author, mainly for cohost
const authorUrl = postData.actor ?? postData.attributedTo;
const authorUrlObj = new URL(authorUrl);
const name = authorUrlObj.pathname.substring(authorUrlObj.pathname.lastIndexOf("/") + 1);
author = {
name,
handle: `${name}@${authorUrlObj.hostname}`,
url: authorUrl,
};
}
if(postData.endTime && postData.oneOf && postData.votersCount) {
poll = {
end: new Date(postData.endTime),
total: postData.votersCount,
options: postData.oneOf.map((o) => ({
name: o.name,
count: o.replies.totalItems,
})),
};
}
}
// We could just continue without author but it'd look ugly and be confusing.
if(!author) {
console.warn("[fedimbed]", `Bailing trying to re-embed "${url}": Failed to get author.`);
return {};
}
// Start constructing embed
content = content ?? "";
cw = cw ?? "";
//content = htmlToMarkdown(content);
//for(const emote of emotes) {
// content = content.replaceAll(emote.name, `<img src="${emote.url}" alt="${emote.name}" />`);
//}
//cw = htmlToMarkdown(cw);
let desc = cw;
if((cw != "" || sensitive) && files.length) {
desc += "<span data-mx-spoiler>" + content + "</span>";
} else {
desc = content;
}
const user = author.name ? `${author.name} (${author.handle})` : author.handle;
const baseEmbed = {
url,
timestamp,
description: desc,
title: title ?? user,
author: title
? {
name: user,
url: author.url,
}
: null,
footer: {
text: platformName,
},
thumbnail: {
url: author.avatar,
},
fields: [],
};
if(poll) {
baseEmbed.fields.push({
name: "Poll",
value:
poll.options
.map((o) => {
const percent = o.count / poll.total;
const bar = Math.round(percent * 30);
return `**${o.name}** (${o.count}, ${Math.round(percent * 100)}%)\n\`[${"=".repeat(bar)}${" ".repeat(
30 - bar
)}]\``;
})
.join("\n\n") + `\n\n${poll.total} votes \u2022 Ends <t:${Math.floor(poll.end.getTime() / 1000)}:R>`,
});
}
const embeds = [baseEmbed];
return {
content:
cw != "" && (files.length > 0)
? `:warning: ${cw} <span data-mx-spoiler>${url}</span>`
: spoiler
? `<span data-mx-spoiler>${url}</span>`
: "",
embeds,
files,
emotes
};
}
async function fedimbed(msg) {
if(URLS_REGEX.test(msg)) {
const urls = msg.match(URLS_REGEX);
for(let url of urls) {
let urlObj;
try {
urlObj = new URL(url);
} catch {
console.error("[fedimbed]", `Invalid URL "${url}"`);
// noop
}
for(const service of Object.keys(PATH_REGEX)) {
const regex = PATH_REGEX[service];
if(urlObj && regex.test(urlObj.pathname)) {
console.log("[fedimbed]", `Hit "${service}" for "${url}", processing now.`);
try {
const response = await processUrl(url);
return response;
} catch (err) {
console.error("[fedimbed]", `Error processing "${url}":\n` + err.stack);
}
break;
}
}
}
}
}
export default fedimbed;