import httpSignature from "@peertube/http-signature"; import path from "node:path"; import fs from "node:fs"; const FRIENDLY_USERAGENT = "PossumBot/1.0 (+https://bot.possum.city/)"; const URLS_REGEX = /(?:\s|^|\]\()(\|\|\s*)?(https?:\/\/[^\s<]+[^<.,:;"'\]|)\s])(\s*\)?\|\||\s*[\S]*?\))?/g; const PATH_REGEX = { mastodon: /^\/@(.+?)\/(\d+)\/?/, mastodon2: /^\/(.+?)\/statuses\/\d+\/?/, pleroma: /^\/objects\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\/?/, pleroma2: /^\/notice\/[A-Za-z0-9]+\/?/, misskey: /^\/notes\/[a-z0-9]+\/?/, gotosocial: /^\/@(.+?)\/statuses\/[0-9A-Z]+\/?/, lemmy: /^\/post\/\d+\/?/, honk: /^\/u\/(.+?)\/h\/(.+?)\/?/, cohost: /^\/[A-Za-z0-9]+\/post\/\d+-[A-Za-z0-9-]+\/?/, }; const domainCache = new Map(); domainCache.set("cohost.org", "cohost"); // no nodeinfo async function resolvePlatform(url) { const urlObj = new URL(url); if(domainCache.has(urlObj.hostname)) return domainCache.get(urlObj.hostname); const res = await fetch(urlObj.origin + "/.well-known/nodeinfo", { headers: {"User-Agent": FRIENDLY_USERAGENT}, }).then((res) => res.text()); if(!res.startsWith("{")) { console.error("[fedimbed]", `No nodeinfo for "${urlObj.hostname}"???`); domainCache.set(urlObj.hostname, null); return null; } const probe = JSON.parse(res); if(!probe?.links) { console.error("[fedimbed]", `No nodeinfo for "${urlObj.hostname}"???`); domainCache.set(urlObj.hostname, null); return null; } const nodeinfo = await fetch(probe.links[probe.links.length - 1].href, { headers: {"User-Agent": FRIENDLY_USERAGENT}, }).then((res) => res.json()); if(!nodeinfo?.software?.name) { console.error("[fedimbed]", `Got nodeinfo for "${urlObj.hostname}", but missing software name.`); domainCache.set(urlObj.hostname, null); return null; } domainCache.set(urlObj.hostname, nodeinfo.software.name); return nodeinfo.software.name; } const keyId = "https://" + process.env.AP_FETCH_DOMAIN + "/actor#main-key"; const privKey = fs.readFileSync("data/private.pem"); async function signedFetch(url, options) { const urlObj = new URL(url); const headers = { host: urlObj.host, date: new Date().toUTCString(), }; const headerNames = ["(request-target)", "host", "date"]; httpSignature.sign( { getHeader: (name) => headers[name.toLowerCase()], setHeader: (name, value) => (headers[name] = value), method: options.method ?? "GET", path: urlObj.pathname, }, { keyId, key: privKey, headers: headerNames, authorizationHeaderName: "signature", } ); options.headers = Object.assign(headers, options.headers ?? {}); return await fetch(url, options); } async function processUrl(url) { let spoiler = false; let invalidUrl = false; let urlObj; try { urlObj = new URL(url); } catch(err) { console.error("[fedimbed]", err); invalidUrl = true; } if(invalidUrl) return {}; // some lemmy instances have old reddit frontend subdomains // but these frontends are just frontends and dont actually expose the API if(urlObj.hostname.startsWith("old.")) { urlObj.hostname = urlObj.hostname.replace("old.", ""); url = urlObj.href; } let platform = (await resolvePlatform(url)) ?? ""; let platformName = platform .replace("gotosocial", "GoToSocial") .replace("birdsitelive", '"Twitter" (BirdsiteLive)') .replace(/^(.)/, (_, c) => c.toUpperCase()) .replace("Cohost", "cohost"); const files = []; let content, cw, author, timestamp, title, poll, emotes = [], sensitive = false; // Fetch post let rawPostData; try { rawPostData = await signedFetch(url, { headers: { "User-Agent": FRIENDLY_USERAGENT, Accept: 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"', }, }).then((res) => res.text()); } catch (err) { console.error("[fedimbed]", `Failed to signed fetch "${url}", retrying unsigned: ${err}`); } if(!rawPostData) { try { rawPostData = await fetch(url, { headers: { "User-Agent": FRIENDLY_USERAGENT, Accept: 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"', }, }).then((res) => res.text()); } catch (err) { console.error("[fedimbed]", `Failed to fetch "${url}": ${err}`); } } let postData; if(rawPostData?.startsWith("{")) { try { postData = JSON.parse(rawPostData); } catch (err) { console.error("[fedimbed]", `Failed to decode JSON for "${url}": ${err}\n "${rawPostData}"`); } } else { console.warn("[fedimbed]", `Got non-JSON for "${url}": ${rawPostData}`); } if(postData?.error) { console.error("[fedimbed]", `Received error for "${url}": ${postData.error}`); console.error("[fedimbed]", postData); return postData; } if(!postData) { // We failed to get post. // Assume it was due to AFM or forced HTTP signatures and use MastoAPI // Follow redirect from /object since we need the ID from /notice if(PATH_REGEX.pleroma.test(urlObj.pathname)) { url = await signedFetch(url, { method: "HEAD", headers: { "User-Agent": FRIENDLY_USERAGENT, }, redirect: "manual", }).then((res) => res.headers.get("location")); if(url.startsWith("/")) { url = urlObj.origin + url; } urlObj = new URL(url); } let redirUrl; const options = {}; const headers = {}; if(PATH_REGEX.pleroma2.test(urlObj.pathname)) { redirUrl = url.replace("notice", "api/v1/statuses"); } else if(PATH_REGEX.mastodon.test(urlObj.pathname)) { const postId = urlObj.pathname.match(PATH_REGEX.mastodon)?.[2]; redirUrl = urlObj.origin + "/api/v1/statuses/" + postId; } else if(PATH_REGEX.mastodon2.test(urlObj.pathname)) { redirUrl = url.replace(/^\/(.+?)\/statuses/, "/api/v1/statuses"); } else if(PATH_REGEX.misskey.test(urlObj.pathname)) { let noteId = url.split("/notes/")[1]; if(noteId.indexOf("/") > -1) { noteId = noteId.split("/")[0]; } else if(noteId.indexOf("?") > -1) { noteId = noteId.split("?")[0]; } else if(noteId.indexOf("#") > -1) { noteId = noteId.split("#")[0]; } console.log("[fedimbed]", "Misskey post ID: " + noteId); redirUrl = urlObj.origin + "/api/notes/show/"; options.method = "POST"; options.body = JSON.stringify({noteId}); headers["Content-Type"] = "application/json"; } else { console.error("[fedimbed]", `Missing MastoAPI replacement for "${platform}"`); } if(redirUrl) { console.log( "[fedimbed]", `Redirecting "${url}" to "${redirUrl}": ${JSON.stringify(options)}, ${JSON.stringify(headers)}` ); let rawPostData2; try { rawPostData2 = await signedFetch( redirUrl, Object.assign(options, { headers: Object.assign(headers, { "User-Agent": FRIENDLY_USERAGENT, }), }) ).then((res) => res.text()); } catch (err) { console.error("[fedimbed]", `Failed to signed fetch "${url}" via MastoAPI, retrying unsigned: ${err}`); } if(!rawPostData2) { try { rawPostData2 = await signedFetch( redirUrl, Object.assign(options, { headers: Object.assign(headers, { "User-Agent": FRIENDLY_USERAGENT, }), }) ).then((res) => res.text()); } catch (err) { console.error("[fedimbed]", `Failed to fetch "${url}" via MastoAPI: ${err}`); } } let postData2; if(rawPostData2?.startsWith("{")) { postData2 = JSON.parse(rawPostData2); } else { console.warn("[fedimbed]", `Got non-JSON for "${url}" via MastoAPI: ${rawPostData2}`); } if(!postData2) { console.warn("[fedimbed]", `Bailing trying to re-embed "${url}": Failed to get post from normal and MastoAPI.`); } else if(postData2.error) { console.error( "[fedimbed]", `Bailing trying to re-embed "${url}", MastoAPI gave us error: ${JSON.stringify(postData2.error)}` ); } else { cw = postData2.spoiler_warning ?? postData2.spoiler_text ?? postData2.cw; content = postData2.akkoma?.source?.content ?? postData2.pleroma?.content?.["text/plain"] ?? postData2.text ?? postData2.content; author = { name: postData2.account?.display_name ?? postData2.account?.username ?? postData2.user?.name ?? postData2.user?.username, handle: postData2.account?.fqn ?? `${postData2.account?.username ?? postData2.user?.username}@${urlObj.hostname}`, url: postData2.account?.url ?? `${urlObj.origin}/@${postData2.account?.username ?? postData2.user?.username}`, avatar: postData2.account?.avatar ?? postData2.user?.avatarUrl, }; timestamp = postData2.created_at ?? postData2.createdAt; emotes = postData2.emojis.filter((x) => !x.name.endsWith("@.")).map((x) => ({name: `:${x.name}:`, url: x.url})); sensitive = postData2.sensitive; const attachments = postData2.media_attachments ?? postData2.files; if(attachments) { for(const attachment of attachments) { const contentType = await fetch(attachment.url, { method: "HEAD", }).then((res) => res.headers.get("Content-Type")); if(contentType) { if(contentType.startsWith("image/") || contentType.startsWith("video/") || contentType.startsWith("audio/")) { files.push({ url: attachment.url, desc: attachment.description ?? attachment.comment, type: contentType }); } } else { const type = attachment.type?.toLowerCase(); const fileType = attachment.pleroma?.mime_type ?? type.indexOf("/") > -1 ? type : type + "/" + (url.match(/\.([a-z0-9]{3,4})$/)?.[0] ?? type == "image" ? "png" : type == "video" ? "mp4" : "mpeg"); if(type.startsWith("image") || type.startsWith("video") || type.startsWith("audio")) { files.push({ url: attachment.url, desc: attachment.description ?? attachment.comment, type: type }); } } } } if(postData2.sensitive && attachments.length > 0) { spoiler = true; } if(postData2.poll) { poll = { end: new Date(postData2.poll.expires_at), total: postData2.poll.votes_count, options: postData2.poll.options.map((o) => ({ name: o.title, count: o.votes_count, })), }; } } } } else { if(postData.id) { const realUrlObj = new URL(postData.id); if(realUrlObj.origin != urlObj.origin) { platform = await resolvePlatform(postData.id); platformName = platform.replace("gotosocial", "GoToSocial").replace(/^(.)/, (_, c) => c.toUpperCase()); url = postData.id; } } content = postData._misskey_content ?? postData.source?.content ?? postData.content; cw = postData.summary; timestamp = postData.published; sensitive = postData.sensitive; if(postData.tag) { let tag = postData.tag; // gts moment if(!Array.isArray(tag)) tag = [tag]; emotes = tag.filter((x) => !!x.icon).map((x) => ({name: x.name, url: x.icon.url})); } // NB: gts doesnt send singular attachments as array const attachments = Array.isArray(postData.attachment) ? postData.attachment : [postData.attachment]; for(const attachment of attachments) { if(!attachment) continue; if(attachment.mediaType) { if(attachment.mediaType.startsWith("video/") || attachment.mediaType.startsWith("image/") || attachment.mediaType.startsWith("audio/")) { files.push({ url: attachment.url, desc: attachment.name ?? attachment.description ?? attachment.comment, type: attachment.mediaType, }); } } else if(attachment.url) { const contentType = await fetch(attachment.url, { method: "HEAD", }).then((res) => res.headers.get("Content-Type")); if(contentType) { if(contentType.startsWith("image/") || contentType.startsWith("video/") || contentType.startsWith("audio/")) { files.push({ url: attachment.url, desc: attachment.name ?? attachment.description ?? attachment.comment, type: contentType }); } } else { const type = attachment.type?.toLowerCase(); const fileType = type.indexOf("/") > -1 ? type : type + "/" + (url.match(/\.([a-z0-9]{3,4})$/)?.[0] ?? type == "image" ? "png" : type == "video" ? "mp4" : "mpeg"); if(type.startsWith("image") || type.startsWith("video") || type.startsWith("audio")) { files.push({ url: attachment.url, desc: attachment.name ?? attachment.description ?? attachment.comment, type: type }); } } } else { console.warn("[fedimbed]", `Unhandled attachment structure! ${JSON.stringify(attachment)}`); } } if(postData.sensitive && attachments.length > 0) { spoiler = true; } if(postData.image?.url) { const imageUrl = new URL(postData.image.url); const contentType = await fetch(postData.image.url, { method: "HEAD", }).then((res) => res.headers.get("Content-Type")); files.push({ url: postData.image.url, desc: "", type: contentType ?? "image/" + imageUrl.pathname.substring(imageUrl.pathname.lastIndexOf(".") + 1), }); } if(postData.name) title = postData.name; // Author data is not sent with the post with AS2 const authorData = await signedFetch(postData.actor ?? postData.attributedTo, { headers: { "User-Agent": FRIENDLY_USERAGENT, Accept: 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"', }, }) .then((res) => res.json()) .catch((err) => { // only posts can be activity+json right now, reduce log spam if(platform !== "cohost") console.error("[fedimbed]", `Failed to get author for "${url}": ${err}`); }); if(authorData) { const authorUrlObj = new URL(authorData.url ?? authorData.id); author = { name: authorData.name, handle: `${authorData.preferredUsername}@${authorUrlObj.hostname}`, url: authorData.url, avatar: authorData.icon?.url, }; } else { // bootleg author, mainly for cohost const authorUrl = postData.actor ?? postData.attributedTo; const authorUrlObj = new URL(authorUrl); const name = authorUrlObj.pathname.substring(authorUrlObj.pathname.lastIndexOf("/") + 1); author = { name, handle: `${name}@${authorUrlObj.hostname}`, url: authorUrl, }; } if(postData.endTime && postData.oneOf && postData.votersCount) { poll = { end: new Date(postData.endTime), total: postData.votersCount, options: postData.oneOf.map((o) => ({ name: o.name, count: o.replies.totalItems, })), }; } } // We could just continue without author but it'd look ugly and be confusing. if(!author) { console.warn("[fedimbed]", `Bailing trying to re-embed "${url}": Failed to get author.`); return {}; } // Start constructing embed content = content ?? ""; cw = cw ?? ""; //content = htmlToMarkdown(content); //for(const emote of emotes) { // content = content.replaceAll(emote.name, `${emote.name}`); //} //cw = htmlToMarkdown(cw); let desc = cw; if((cw != "" || sensitive) && files.length) { desc += "" + content + ""; } else { desc = content; } const user = author.name ? `${author.name} (${author.handle})` : author.handle; const baseEmbed = { url, timestamp, description: desc, title: title ?? user, author: title ? { name: user, url: author.url, } : null, footer: { text: platformName, }, thumbnail: { url: author.avatar, }, fields: [], }; if(poll) { baseEmbed.fields.push({ name: "Poll", value: poll.options .map((o) => { const percent = o.count / poll.total; const bar = Math.round(percent * 30); return `**${o.name}** (${o.count}, ${Math.round(percent * 100)}%)\n\`[${"=".repeat(bar)}${" ".repeat( 30 - bar )}]\``; }) .join("\n\n") + `\n\n${poll.total} votes \u2022 Ends `, }); } const embeds = [baseEmbed]; return { content: cw != "" && (files.length > 0) ? `:warning: ${cw} ${url}` : spoiler ? `${url}` : "", embeds, files, emotes }; } async function fedimbed(msg) { if(URLS_REGEX.test(msg)) { const urls = msg.match(URLS_REGEX); for(let url of urls) { let urlObj; try { urlObj = new URL(url); } catch { console.error("[fedimbed]", `Invalid URL "${url}"`); // noop } for(const service of Object.keys(PATH_REGEX)) { const regex = PATH_REGEX[service]; if(urlObj && regex.test(urlObj.pathname)) { console.log("[fedimbed]", `Hit "${service}" for "${url}", processing now.`); try { const response = await processUrl(url); return response; } catch (err) { console.error("[fedimbed]", `Error processing "${url}":\n` + err.stack); } break; } } } } } export default fedimbed;