• Extracts meaningful words from text after filtering out stop words.

    Normalizes punctuation while preserving word boundaries, then filters out common stop words. Results are cached for performance.

    Parameters

    • text: string

      The text to extract words from.

    Returns string[]

    Array of meaningful words.

    export function extractMeaningfulWords(text: string): string[] {
    const cached = getCacheEntry(meaningfulWordsCache, text);
    if (cached !== undefined) {
    return cached.slice();
    }

    // Use a lighter normalization for word extraction that preserves spaces
    let normalized = text.trim().toLowerCase();

    // Remove common ignorable patterns
    for (const pattern of IGNORABLE_PATTERNS) {
    normalized = normalized.replaceAll(pattern, "");
    }

    // Normalize punctuation but keep spaces
    normalized = normalized
    .replaceAll(/[^\w\s]/g, " ") // Replace punctuation with spaces
    .replaceAll(/\s+/g, " ") // Normalize multiple spaces to single space
    .trim();

    const words = normalized
    .split(/\s+/)
    .filter((word) => word.length > 1 && !STOP_WORDS.has(word));

    setCacheEntry(
    meaningfulWordsCache,
    text,
    words,
    MEANINGFUL_WORDS_CACHE_LIMIT,
    );

    return words.slice();
    }