Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/events/spam-detection/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ import { rules } from "./rules-config.js";
export const MAX_RULE_TIMEFRAME = Math.max(
...rules.filter((rule) => rule.type !== "contentBased").map((rule) => rule.timeframe)
);

export const MESSAGE_SIMILARITY_THRESHOLD = 0.8;
14 changes: 11 additions & 3 deletions src/events/spam-detection/detectors.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { Message } from "discord.js";
import { replaceSpoilerHack, stripCode } from "../../utils/messages.js";
import { jaccardSimilarity, replaceSpoilerHack, stripCode } from "../../utils/messages.js";
import { MESSAGE_SIMILARITY_THRESHOLD } from "./constants.js";

export const containsLink = (message: Message): boolean => {
const withoutCode = stripCode(message.content);
Expand All @@ -24,11 +25,18 @@ export const containsSpoilerHack = (message: Message) => {
};

export const isDuplicate = (message: Message, oldMessage: Message) => {
return message.content.toLowerCase().trim() === oldMessage.content.toLowerCase().trim();
// cheaper comparison first
const a = message.content.toLowerCase().trim();
const b = oldMessage.content.toLowerCase().trim();
if (a === b) {
return true;
}
// followed by jaccard for catching reordered/slightly altered messages with high similarity
return jaccardSimilarity(a, b) > MESSAGE_SIMILARITY_THRESHOLD;
};

export const isCrossPost = (message: Message, oldMessage: Message) => {
return isDuplicate(message, oldMessage) && message.channelId !== oldMessage.channelId;
return message.channelId !== oldMessage.channelId && isDuplicate(message, oldMessage);
};

export const anyMessage = () => true;
35 changes: 34 additions & 1 deletion src/utils/messages.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import assert from "node:assert";
import { describe, it } from "node:test";
import { replaceSpoilerHack, stripCode, stripEmoji } from "./messages.js";
import { replaceSpoilerHack, stripCode, stripEmoji, jaccardSimilarity } from "./messages.js";

describe("utils/messages -> stripCode", () => {
it("should remove inline code blocks", () => {
Expand Down Expand Up @@ -71,3 +71,36 @@ describe("utils/messages -> replaceSpoilerHack", () => {
assert.strictEqual(actual, expected);
});
});

describe("jaccardSimilarity - crosspost detection", () => {
it("catches identical self-promotion spam", () => {
const msg1 = "Check out my new portfolio website! Built with React and Tailwind";
const msg2 = "Check out my new portfolio website! Built with React and Tailwind";
const actual = jaccardSimilarity(msg1, msg2);

assert.strictEqual(actual, 1);
});

it("catches copy-paste spam with minor punctuation differences", () => {
const msg1 = "hey guys check out my new website!";
const msg2 = "hey guys, check out my new website";
const actual = jaccardSimilarity(msg1, msg2);

assert.strictEqual(actual, 1);
});

it("catches reordered messages", () => {
const msg1 = "I just launched my SaaS app! Check it out and let me know what you think";
const msg2 = "Check it out and let me know what you think! I just launched my SaaS app";
const actual = jaccardSimilarity(msg1, msg2);
assert.strictEqual(actual, 1);
});

it("does not flag similar but different questions", () => {
const msg1 = "How do I center a div in CSS?";
const msg2 = "How do I align a div to the right in CSS?";
const actual = jaccardSimilarity(msg1, msg2); // 0.5833333333333334

assert.ok(actual > 0.5 && actual < 0.8);
});
});
20 changes: 20 additions & 0 deletions src/utils/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,23 @@ export const stripEmoji = (content: string): string => content.replace(/:\w+:/g,
export function replaceSpoilerHack(messageContent: string | null, replacement = "[...]") {
return (messageContent ?? "").replace(/(\|\|\u200b\|\|)+/g, replacement);
}

// https://en.wikipedia.org/wiki/Jaccard_index
export function jaccardSimilarity(text1: string, text2: string): number {
const words1 = new Set(normalizeText(text1));
const words2 = new Set(normalizeText(text2));

const intersection = words1.intersection(words2);
const union = words1.union(words2);

return union.size === 0 ? 0 : intersection.size / union.size;
}

const normalizeText = (text: string) => {
return text
.toLowerCase()
.replace(/[^\w\s]/g, "") // Remove punctuation & symbols
.trim()
.split(/\s+/)
.filter(Boolean);
};
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "ES2022",
"lib": ["ES2022"],
"lib": ["ESNext"],
"module": "nodenext",
"moduleResolution": "nodenext",
"allowSyntheticDefaultImports": true,
Expand Down