The text to extract words from.
Array of meaningful words.
export function extractMeaningfulWords(text: string): string[] {
const cached = getCacheEntry(meaningfulWordsCache, text);
if (cached !== undefined) {
return cached.slice();
}
// Use a lighter normalization for word extraction that preserves spaces
let normalized = text.trim().toLowerCase();
// Remove common ignorable patterns
for (const pattern of IGNORABLE_PATTERNS) {
normalized = normalized.replaceAll(pattern, "");
}
// Normalize punctuation but keep spaces
normalized = normalized
.replaceAll(/[^\w\s]/g, " ") // Replace punctuation with spaces
.replaceAll(/\s+/g, " ") // Normalize multiple spaces to single space
.trim();
const words = normalized
.split(/\s+/)
.filter((word) => word.length > 1 && !STOP_WORDS.has(word));
setCacheEntry(
meaningfulWordsCache,
text,
words,
MEANINGFUL_WORDS_CACHE_LIMIT,
);
return words.slice();
}
Extracts meaningful words from text after filtering out stop words.
Normalizes punctuation while preserving word boundaries, then filters out common stop words. Results are cached for performance.