diff --git a/packages/dota/src/twitch/index.ts b/packages/dota/src/twitch/index.ts index 338efc73..d498da52 100644 --- a/packages/dota/src/twitch/index.ts +++ b/packages/dota/src/twitch/index.ts @@ -70,7 +70,10 @@ twitchChat.on( messageId: string }, ) => { - if (!channelId) return + if (!channelId) { + logger.error('No channelId', { channel, user, text }) + return + } // Skip rank check for mods and broadcasters const isStaff = userInfo.isMod || userInfo.isBroadcaster @@ -88,7 +91,9 @@ twitchChat.on( logger.info('[TWITCH] Missing user', { channelId, channel, user, reason }) chatClient.say(channel, t('missingUser', { lng: 'en' })) lastMissingUserMessageTimestamps[channel] = now + return } + logger.error('No client', { channel, user, text, reason, channelId }) return } @@ -197,6 +202,7 @@ twitchChat.on( !toggleCommand?.aliases?.includes(text.replace('!', '').split(' ')[0]) && text.split(' ')[0] !== '!toggle' ) { + logger.info('Bot is disabled', { channel, user, text }) return } diff --git a/packages/profanity-filter/src/utils/profanity-wordlists.ts b/packages/profanity-filter/src/utils/profanity-wordlists.ts index 3c7c0dff..43459b47 100644 --- a/packages/profanity-filter/src/utils/profanity-wordlists.ts +++ b/packages/profanity-filter/src/utils/profanity-wordlists.ts @@ -14,6 +14,14 @@ import { normalizeRepeatedChars, } from './text-normalization.js' +// Load profanity wordlists from external JSON files +import russianWords from '../../wordlists/russian.json' with { type: 'json' } +import chineseWords from '../../wordlists/chinese.json' with { type: 'json' } +import spanishWords from '../../wordlists/spanish.json' with { type: 'json' } +import frenchWords from '../../wordlists/french.json' with { type: 'json' } +import germanWords from '../../wordlists/german.json' with { type: 'json' } +import spanishLeetWords from '../../wordlists/spanish-leetspeak.json' with { type: 'json' } + /** * Compresses repeated characters to single characters * e.g., "fuuuuck" becomes "fuck" @@ -23,201 +31,20 @@ export function compressRepeatedCharacters(text: string): string { } // Russian profanity terms (common ones) -export const russianProfanityList = [ - 'сука', - 'блять', - 'пидор', - 'пидар', - 'хуй', - 'ебать', - 'нахуй', - 'пизда', - 'залупа', - 'бля', - 'ебал', - 'ебаный', - 'ебанько', - 'ебанный', - 'ебанат', - 'пидр', - 'пизд', - 'хуе', - 'хуи', - 'хуя', - 'хер', - 'херня', - 'херь', - 'мудак', - 'мудила', - 'блядь', - 'чурка', - 'хач', - 'кацап', - 'жид', - 'хахол', - // Additional ethnic and racial slurs - 'москаль', // Derogatory term for Russians - 'хохол', // Corrected spelling of slur for Ukrainians - 'пшек', // Slur for Polish people - 'бульбаш', // Slur for Belarusians - 'укроп', // Derogatory for Ukrainians - 'рагуль', // Rural/uncultured person slur - 'малорос', // Derogatory for Ukrainians - 'ватник', // Political slur - 'колорад', // Political derogatory term - 'азер', // Derogatory for Azerbaijanis - 'армяшка', // Derogatory for Armenians - 'грызун', // Derogatory for Georgians - 'чурбан', // Variation of чурка - 'чуркестан', // Derogatory term - 'чурбанье', // Collective derogatory form - 'абрек', // Derogatory for Caucasians - 'зверь', // Derogatory for Caucasians when used in ethnic context - 'черномазый', // Equivalent to the n-word - 'хачик', // Variation of хач - 'урюк', // Derogatory for Central Asians - 'чернота', // Racial slur - 'чалма', // Derogatory for Muslims - 'сарацин', // Derogatory for Muslims - 'баклажан', // Racial slur based on skin color - 'жидовка', // Female form of жид - 'жидяра', // Intensified form of жид - 'юде', // Derogatory for Jews - 'пархатый', // Antisemitic slur - 'шнобель', // Antisemitic reference - 'пейсатый', // Antisemitic reference - 'циган', // Gypsy/Roma slur - 'цыганва', // Derogatory for Roma - 'гой', // Used in antisemitic context - 'узкоглазый', // Slant-eye slur - 'китаеза', // Derogatory for Chinese - 'япошка', // Derogatory for Japanese - 'монгол', // When used pejoratively - 'желтый', // Yellow, racial slur - 'самурай', // When used pejoratively - // Common transliterated slurs - 'nigger', - 'kike', - 'spic', - 'chink', - 'wop', - 'polack', - 'paki', - 'gook', - 'dago', - 'jewed', // Antisemitic slur based on Jewish stereotypes - 'jew', -] +const russianProfanityList: string[] = russianWords // Chinese profanity terms -export const chineseProfanityList = [ - '操你妈', - '肏你妈', - '草你妈', - '妈的', - '他妈的', - '傻逼', - '滚蛋', - '废物', - '婊子', - '贱人', - '靠', - '操', - '屁眼', - '混蛋', - '王八蛋', - '去死', - '白痴', - '吃屎', - '你妈逼', - '日你妈', -] +const chineseProfanityList: string[] = chineseWords // Additional European languages profanity -export const europeanProfanityList = { - // Spanish - spanish: [ - 'puta', - 'mierda', - 'cojones', - 'joder', - 'follar', - 'cabron', - 'cabrón', - 'coño', - 'gilipollas', - 'hijo de puta', - 'hijoputa', - 'imbécil', - 'capullo', - 'idiota', - ], - // French - french: [ - 'putain', - 'merde', - 'connard', - 'baise', - 'foutre', - 'con', - 'salope', - 'pute', - 'enculé', - 'fils de pute', - 'bordel', - 'cul', - 'branler', - 'crétin', - 'connasse', - ], - // German - german: [ - 'scheiße', - 'arschloch', - 'fotze', - 'fick', - 'wichser', - 'hurensohn', - 'mistkerl', - 'schwuchtel', - 'schwanz', - 'verfickt', - 'verdammt', - 'schlampe', - 'hure', - ], +const europeanProfanityList = { + spanish: spanishWords as string[], + french: frenchWords as string[], + german: germanWords as string[], } // Spanish profanity in leetspeak form (pre-generated common variants) -const spanishLeetSpeakList = [ - 'h1j0 d3 put4', - 'h1j0d3put4', - 'h1jod3put4', - 'hij0 d3 put4', - 'p0r0n', - 'p0rn0', - 'p0rnogr4f14', - 'p0rn0gr4f14', - 'p0rn0gr4fi4', - 'c0j0n3s', - 'c0jon3s', - 'j0d3r', - 'c4br0n', - 'c4bron', - 'c0ñ0', - 'c0n0', - 'pu74', - 'put4', - 'mierd4', - 'm13rd4', - 'h1j0pu74', - 'pv74', - 'pvt4', - 'c4pull0', - 'c4pull0', - '1mb3c1l', - '1d10t4', -] +const spanishLeetSpeakList: string[] = spanishLeetWords // Common character substitutions for leetspeak in European languages const europeanLeetSpeakMap: Record = { @@ -237,7 +64,7 @@ export const evasionPatterns = [ // Leetspeak patterns /[f]+[\s_]*[u]+[\s_]*[c]+[\s_]*[k]+/i, // F*u*c*k variations /[s]+[\s_]*[h]+[\s_]*[i]+[\s_]*[t]+/i, // S*h*i*t variations - // Explicit IPA character sequence patterns + // Explicit IPA character sequence patterns /[ʂ][\s\W_]*[ɧ][\s\W_]*[ıɪ][\s\W_]*[ɬ]/i, // Specific IPA "shit" pattern /[ʃ][\s\W_]*[ɨɪı][\s\W_]*[ʈʇʦ]/i, // Alternative IPA "shit" pattern /[a]+[\s_]*[s]+[\s_]*[s]+/i, // A*s*s variations @@ -255,20 +82,20 @@ export const evasionPatterns = [ // More generalized patterns for detecting common evasion techniques // This handles compound words where offensive terms are combined with prefixes/suffixes /(?:pre|post|trans|anti|pro)?[\W_]*[n][\W_]*[i1!][\W_]*([gq]|9)[\W_]*(?:[gq]|9)[\W_]*[ae3][\W_]*[r]/i, - + // Enhanced patterns for better detection /[fφƒ][\W_]*[uνüùúûũūŭůűųμ@4]+[\W_]*[cçćčĉċс¢<({]+[\W_]*[kķк{]+/i, // Extended f*u*c*k with unicode variations /[sśšşсsc5$]+[\W_]*[hĥħη#]+[\W_]*[iìíîïĩīĭįı!1|]+[\W_]*[tťţт7+]+/i, // Extended s*h*i*t with unicode variations /\W*[fφ]\W*[aeiouæøåäàáâãéèêëíìîïóòôõöúùûü@4]\W*[gkq]\W*/i, // f*g variations - + // Symbols and descriptions in text (to catch cases like "f(asterisk)u(asterisk)c(asterisk)k") /f\s*(?:\(.*?\)|<.*?>|\[.*?\]|\{.*?\})\s*u\s*(?:\(.*?\)|<.*?>|\[.*?\]|\{.*?\})\s*c\s*(?:\(.*?\)|<.*?>|\[.*?\]|\{.*?\})\s*k/i, /s\s*(?:\(.*?\)|<.*?>|\[.*?\]|\{.*?\})\s*h\s*(?:\(.*?\)|<.*?>|\[.*?\]|\{.*?\})\s*i\s*(?:\(.*?\)|<.*?>|\[.*?\]|\{.*?\})\s*t/i, - + // Special patterns for non-standard alphabets - /[ƒϝ𝐟𝒇𝕗][\s\W_]*[𝐮𝒖𝕦υ][\s\W_]*[𝐜𝒄𝕔ϲ][\s\W_]*[𝐤𝒌𝕜κϰ]/i, // Mathematical and other special Unicode font variants for "fuck" - /[𝐬𝒔𝕤ʂ][\s\W_]*[𝐡𝒉𝕙ɧ][\s\W_]*[𝐢𝒊𝕚ɪ][\s\W_]*[𝐭𝒕𝕥ƭ]/i, // Mathematical and IPA-like Unicode font variants for "shit" - + /[ƒϝ𝐟𝒇𝕗][\s\W_]*[𝐮𝒖𝕦υ][\s\W_]*[𝐜𝒄𝕔ϲ][\s\W_]*[𝐤𝒌𝕜κϰ]/iu, // Mathematical and other special Unicode font variants for "fuck" + /[𝐬𝒔𝕤ʂ][\s\W_]*[𝐡𝒉𝕙ɧ][\s\W_]*[𝐢𝒊𝕚ɪ][\s\W_]*[𝐭𝒕𝕥ƭ]/iu, // Mathematical and IPA-like Unicode font variants for "shit" + // Homoglyphs for common profanity (characters that look similar but have different Unicode code points) /[fḟƒғֆ][uüṳṵṷụűữųʉư][cċćĉčçсς][kḱǩķҝқҡκ]/i, // Homoglyphs for "fuck" /[sšśŝșсςʂ][hħȟҥհɧ][iíìîịĭīįἰἱὶίιɪı][tťțţτтɬ]/i, // Homoglyphs for "shit" including IPA characters ʂɧıɬ @@ -345,10 +172,10 @@ export function detectRussianProfanity(text: string): boolean { normalizeRepeatedChars(text.toLowerCase()), // Normalize repeated chars ...createRussianLatinVariations(text) // Add Latin-Cyrillic variations ]; - + // Remove duplicates const uniqueVariations = [...new Set(variations)]; - + // Generic substitutions for both Latin and Cyrillic characters const commonSubstitutions: Record = { 'о': ['o', '0', 'о', 'ο', 'օ'], // Cyrillic о, Latin o, zero, Greek omicron, Armenian o @@ -362,7 +189,7 @@ export function detectRussianProfanity(text: string): boolean { 'р': ['p', 'р', 'r'], // Cyrillic р, Latin p/r 'у': ['y', 'у'], // Cyrillic у, Latin y }; - + // Check each variation against the Russian profanity list for (const variation of uniqueVariations) { const lowerVariation = variation.toLowerCase(); @@ -372,14 +199,14 @@ export function detectRussianProfanity(text: string): boolean { if (lowerVariation.includes(word)) { return true; } - + // Generate variants of the word with common substitutions let substitutionVariants = [word]; - + // Apply character substitutions to the Russian word for (const [char, replacements] of Object.entries(commonSubstitutions)) { const newVariants: string[] = []; - + for (const variant of substitutionVariants) { if (variant.includes(char)) { for (const replacement of replacements) { @@ -389,10 +216,10 @@ export function detectRussianProfanity(text: string): boolean { newVariants.push(variant); } } - + substitutionVariants = [...new Set(newVariants)]; } - + // Check all word variants against the variation for (const wordVariant of substitutionVariants) { if (lowerVariation.includes(wordVariant)) { @@ -433,7 +260,7 @@ export function detectEvasionTactics(text: string): boolean { text.toLowerCase().replace(/\s+/g, ''), // Remove all spaces applyAggressiveLeetSpeak(text), // Convert to aggressive leetspeak ]; - + // Add variations where different sections are normalized differently // This helps catch mixed obfuscation techniques const words = text.toLowerCase().split(/\s+/); @@ -443,27 +270,27 @@ export function detectEvasionTactics(text: string): boolean { const wordsCopy = [...words]; wordsCopy[i] = removeSeparators(wordsCopy[i]); variations.push(wordsCopy.join(' ')); - + const wordsCopy2 = [...words]; wordsCopy2[i] = normalizeRepeatedChars(wordsCopy2[i]); variations.push(wordsCopy2.join(' ')); } } - + // Add a variation that combines different normalization techniques const combined = normalizeText(removeSeparators(normalizeRepeatedChars(text.toLowerCase()))); variations.push(combined); - + // Remove duplicates const uniqueVariations = [...new Set(variations)]; - + // Check all patterns against all text variations for (const variant of uniqueVariations) { if (evasionPatterns.some((pattern) => pattern.test(variant))) { return true; } } - + // Special check for advanced obfuscation: separated characters with arbitrary characters // This helps detect cases like "f*u#c%k" that might slip through other checks const strippedText = stripNonAlphanumeric(text.toLowerCase()); @@ -597,25 +424,25 @@ export function detectAgeRestrictions(text: string): boolean { normalizeRepeatedChars(text.toLowerCase()), // Handle repeated characters removeSeparators(text.toLowerCase()), // Remove separators like dots, spaces between letters ] - + // Number substitutions that might be used to evade detection const numberSubstitutions: Record = { 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'eleven': '11', 'twelve': '12', } - + // Add a variation with number words replaced by digits let numberWordsReplaced = text.toLowerCase(); for (const [word, digit] of Object.entries(numberSubstitutions)) { numberWordsReplaced = numberWordsReplaced.replace(new RegExp(`\\b${word}\\b`, 'gi'), digit); } variations.push(numberWordsReplaced); - + // Regex patterns to catch variations of "I'm X", "Im X", "I am X", etc. const patterns = [ /\bi'?m\s*(\d+)/, // Matches "i'm 12", "im12" - /\bi\s*am\s*(\d+)/, // Matches "i am 12", "iam12" + /\bi\s*am\s*(\d+)/, // Matches "i am 12", "iam12" /\biam\s*(\d+)/, // Matches "iam12" /\bme\s*(\d+)/, // Matches "me 12" /\bage\s*[:|=]?\s*(\d+)/, // Matches "age: 12", "age=12" @@ -629,28 +456,28 @@ export function detectAgeRestrictions(text: string): boolean { /\bi'?m\s*a\s*kid/, // Matches "i'm a kid" /\bmy\s*age\s*is\s*(\d+)/, // Matches "my age is 12" ] - + // Check all variations against all patterns for (const variant of variations) { for (const pattern of patterns) { const match = variant.match(pattern) - if (match && match[1]) { + if (match?.length && match[1]) { const age = Number.parseInt(match[1], 10) // Flag if age is under 13 (COPPA compliance age) if (age < 13 && age > 0) { return true } } - + // Special check for minor/underage without explicit age // Use an even more flexible pattern to catch heavily obfuscated cases - if (/\b(?:under\s*age|und[e3]r.?[a@]g[e3]|m[i1]n[o0]r|k[i1]d)\b/.test(variant) || + if (/\b(?:under\s*age|und[e3]r.?[a@]g[e3]|m[i1]n[o0]r|k[i1]d)\b/.test(variant) || /\s*u\s*n\s*d\s*e\s*r\s*a\s*g\s*e\s*/.test(variant)) { return true } } } - + return false } @@ -667,7 +494,7 @@ export function detectTransphobicContent(text: string): boolean { normalizeRepeatedChars(text.toLowerCase()), // Handle repeated characters removeSeparators(text.toLowerCase()), // Remove separators like dots, spaces ]; - + // Always detect transsexual in test mode - this is specifically for test compatibility if (process.env.NODE_ENV === 'test') { for (const variant of variations) { @@ -676,14 +503,14 @@ export function detectTransphobicContent(text: string): boolean { } } } - + // Flag specific transphobic terms - expanded with creative variation attempts const transphobicTerms = [ - 'tranny', 'tr4nny', 'tr@nny', 'tr4nn13', 'tr@nn13', 'tr@nn1', + 'tranny', 'tr4nny', 'tr@nny', 'tr4nn13', 'tr@nn13', 'tr@nn1', 'shemale', 'sh3m4l3', 'sh3male', 'sh3m@le', 'shem@le', 'she male', 't-slur', 'tslur', ]; - + // Check for standalone slur terms across all variations for (const variant of variations) { // Split by word boundaries for exact matches @@ -695,45 +522,45 @@ export function detectTransphobicContent(text: string): boolean { } } } - + // Enhanced hateful phrases with more variation handling const hatefulPhrases = [ // Mental illness related /(?:trans(?:gender|sexual|vestite)?|tr[a@]n[s5]g[e3]nd[e3]r(?:i[s5]m)?)\s*(?:i[s5]|are)\s*(?:a\s*)?(?:m[e3]nt[a@]l\s*(?:illn[e3][s5][s5]|d[i1][s5][o0]rd[e3]r|d[i1][s5][e3][a@][s5][e3])|d[e3]lu[s5][i1][o0]n|d[e3]lu[s5][i1][o0]n[a@]l)/i, /tr[a@]n[s5]\s*(?:p[e3][o0]pl[e3]\s*)?(?:[a@]r[e3]|i[s5])\s*(?:m[e3]nt[a@]lly\s*[i1]ll|[s5][i1]ck|[s5][i1]ckn[e3][s5][s5]|p[e3]rv[e3]rt[s5]?|gr[o0]{2}m[e3]r[s5]?)/i, - + // Existence denial with enhanced character variations /tr[a@]n[s5](?:g[e3]nd[e3]r|[s5][e3]xu[a@]l)?\s*(?:p[e3][o0]pl[e3]\s*)?(?:[a@]r[e3]n'?t|n[o0]t)\s*r[e3][a@]l/i, /(?:m[e3]n|m[a@]l[e3][s5])\s*c[a@]n'?t\s*(?:b[e3](?:c[o0]m[e3])?|[a@]r[e3]n'?t)\s*w[o0]m[e3]n/i, /(?:w[o0]m[e3]n|f[e3]m[a@]l[e3][s5])\s*c[a@]n'?t\s*(?:b[e3](?:c[o0]m[e3])?|[a@]r[e3]n'?t)\s*m[e3]n/i, /f[a@]k[e3]\s*(?:w[o0]m[e3]n|m[e3]n|g[i1]rl[s5]?|b[o0]y[s5]?)/i, - + // Gender essentialism with character substitutions /[o0]nly\s*(?:tw[o0]|2)\s*g[e3]nd[e3]r[s5]/i, /g[e3]nd[e3]r\s*(?:[i1][s5]\s*)?(?:d[e3]t[e3]rm[i1]n[e3]d\s*by|b[a@][s5][e3]d\s*[o0]n)\s*(?:b[i1][o0]l[o0]gy|chr[o0]m[o0][s5][o0]m[e3][s5]|b[i1]rth|[s5][e3]x)/i, - + // Violence or hatred promotion with character substitutions /(?:h[a@]t[e3]|k[i1]ll|[a@]tt[a@]ck)\s*(?:[a@]ll\s*)?tr[a@]n[s5]/i, /tr[a@]n[s5]\s*(?:p[e3][o0]pl[e3]\s*)?(?:[s5]h[o0]uld\s*(?:n[o0]t\s*[e3]x[i1][s5]t|d[i1][e3]|b[e3]\s*k[i1]ll[e3]d|b[e3]\s*b[a@]nn[e3]d))/i, - + // Conspiracy theories with character variations /tr[a@]n[s5](?:g[e3]nd[e3]r)?\s*(?:[a@]g[e3]nd[a@]|[i1]d[e3][o0]l[o0]gy|pr[o0]p[a@]g[a@]nd[a@]|cult)/i, /tr[a@]n[s5](?:g[e3]nd[e3]r)?\s*[i1][s5]\s*(?:[a@]g[a@][i1]n[s5]t\s*)?(?:n[a@]tur[e3]|n[a@]tur[a@]l|b[i1][o0]l[o0]gy|g[o0]d)/i, - + // "Harmful to children" narratives with character variations /tr[a@]n[s5](?:g[e3]nd[e3]r)?\s*(?:k[i1]d[s5]|ch[i1]ldr[e3]n|y[o0]uth|m[i1]n[o0]r[s5])/i, /(?:tr[a@]n[s5][i1]t[i1][o0]n[i1]ng|pub[e3]rty\s*bl[o0]ck[e3]r[s5])\s*(?:h[a@]rm[s5]?|d[a@]m[a@]g[e3][s5]?|ru[i1]n[s5]?|d[e3][s5]tr[o0]y[s5]?)\s*(?:k[i1]d[s5]|ch[i1]ldr[e3]n|y[o0]uth|m[i1]n[o0]r[s5])/i, - - // Derogatory or othering language with character variations + + // Derogatory or othering language with character variations /tr[a@]nn(?:y|[i1][e3][s5])/i, /tr4nn(?:y|[i1][e3][s5])/i, /tr@nn(?:y|[i1][e3][s5])/i, /tr[a@]nn[¡i1][e3][s5]/i, // Enhanced to catch more unicode variants /[s5]h[e3]m[a@]l[e3][s5]?/i, - + // Flag specific hateful phrases with transsexual /tr[a@]n[s5][s5][e3]xu[a@]l[s5]?\s*[a@]r[e3]\s*d[e3]lu[s5][i1][o0]n[a@]l/i, - + // Additional variants for common transphobic phrases /m[e3]n\s*[i1]n\s*dr[e3][s5][s5][e3][s5]/i, /b[i1][o0]l[o0]g[i1]c[a@]l\s*[s5][e3]x\s*c[a@]n'?t\s*ch[a@]ng[e3]/i, @@ -742,7 +569,7 @@ export function detectTransphobicContent(text: string): boolean { /[i1]'?m\s*tr[a@]n[s5]ph[o0]b[i1]c/i, /h[a@]t[e3]\s*tr[a@]n[s5]/i, ]; - + // Check all variations against all hateful phrases for (const variant of variations) { for (const phrase of hatefulPhrases) { @@ -751,14 +578,14 @@ export function detectTransphobicContent(text: string): boolean { } } } - + // Don't flag standalone terms like "trans" or "transgender" if they're not in hateful context const neutralTerms = [ 'trans', 'transgender', 'transvestite', ]; - + // Only flag neutral terms if in test mode if (process.env.NODE_ENV === 'test') { for (const variant of variations) { @@ -770,7 +597,7 @@ export function detectTransphobicContent(text: string): boolean { } } } - + return false; } diff --git a/packages/profanity-filter/wordlists/chinese.json b/packages/profanity-filter/wordlists/chinese.json new file mode 100644 index 00000000..2286c24f --- /dev/null +++ b/packages/profanity-filter/wordlists/chinese.json @@ -0,0 +1,22 @@ +[ + "操你妈", + "肏你妈", + "草你妈", + "妈的", + "他妈的", + "傻逼", + "滚蛋", + "废物", + "婊子", + "贱人", + "靠", + "操", + "屁眼", + "混蛋", + "王八蛋", + "去死", + "白痴", + "吃屎", + "你妈逼", + "日你妈" +] diff --git a/packages/profanity-filter/wordlists/french.json b/packages/profanity-filter/wordlists/french.json new file mode 100644 index 00000000..1c9da90b --- /dev/null +++ b/packages/profanity-filter/wordlists/french.json @@ -0,0 +1,17 @@ +[ + "putain", + "merde", + "connard", + "baise", + "foutre", + "con", + "salope", + "pute", + "enculé", + "fils de pute", + "bordel", + "cul", + "branler", + "crétin", + "connasse" +] diff --git a/packages/profanity-filter/wordlists/german.json b/packages/profanity-filter/wordlists/german.json new file mode 100644 index 00000000..3b9bb206 --- /dev/null +++ b/packages/profanity-filter/wordlists/german.json @@ -0,0 +1,15 @@ +[ + "scheiße", + "arschloch", + "fotze", + "fick", + "wichser", + "hurensohn", + "mistkerl", + "schwuchtel", + "schwanz", + "verfickt", + "verdammt", + "schlampe", + "hure" +] diff --git a/packages/profanity-filter/wordlists/russian.json b/packages/profanity-filter/wordlists/russian.json new file mode 100644 index 00000000..221b505a --- /dev/null +++ b/packages/profanity-filter/wordlists/russian.json @@ -0,0 +1,83 @@ +[ + "сука", + "блять", + "пидор", + "пидар", + "хуй", + "ебать", + "нахуй", + "пизда", + "залупа", + "бля", + "ебал", + "ебаный", + "ебанько", + "ебанный", + "ебанат", + "пидр", + "пизд", + "хуе", + "хуи", + "хуя", + "хер", + "херня", + "херь", + "мудак", + "мудила", + "блядь", + "чурка", + "хач", + "кацап", + "жид", + "хахол", + "москаль", + "хохол", + "пшек", + "бульбаш", + "укроп", + "рагуль", + "малорос", + "ватник", + "колорад", + "азер", + "армяшка", + "грызун", + "чурбан", + "чуркестан", + "чурбанье", + "абрек", + "зверь", + "черномазый", + "хачик", + "урюк", + "чернота", + "чалма", + "сарацин", + "баклажан", + "жидовка", + "жидяра", + "юде", + "пархатый", + "шнобель", + "пейсатый", + "циган", + "цыганва", + "гой", + "узкоглазый", + "китаеза", + "япошка", + "монгол", + "желтый", + "самурай", + "nigger", + "kike", + "spic", + "chink", + "wop", + "polack", + "paki", + "gook", + "dago", + "jewed", + "jew" +] diff --git a/packages/profanity-filter/wordlists/spanish-leetspeak.json b/packages/profanity-filter/wordlists/spanish-leetspeak.json new file mode 100644 index 00000000..dc7bf9b1 --- /dev/null +++ b/packages/profanity-filter/wordlists/spanish-leetspeak.json @@ -0,0 +1,29 @@ +[ + "h1j0 d3 put4", + "h1j0d3put4", + "h1jod3put4", + "hij0 d3 put4", + "p0r0n", + "p0rn0", + "p0rnogr4f14", + "p0rn0gr4f14", + "p0rn0gr4fi4", + "c0j0n3s", + "c0jon3s", + "j0d3r", + "c4br0n", + "c4bron", + "c0ñ0", + "c0n0", + "pu74", + "put4", + "mierd4", + "m13rd4", + "h1j0pu74", + "pv74", + "pvt4", + "c4pull0", + "c4pull0", + "1mb3c1l", + "1d10t4" +] diff --git a/packages/profanity-filter/wordlists/spanish.json b/packages/profanity-filter/wordlists/spanish.json new file mode 100644 index 00000000..7fa4bdc0 --- /dev/null +++ b/packages/profanity-filter/wordlists/spanish.json @@ -0,0 +1,16 @@ +[ + "puta", + "mierda", + "cojones", + "joder", + "follar", + "cabron", + "cabrón", + "coño", + "gilipollas", + "hijo de puta", + "hijoputa", + "imbécil", + "capullo", + "idiota" +]