• Normalizes text for similarity comparison using enhanced Unicode and punctuation handling.

    Removes diacritics, normalizes punctuation and Japanese characters, expands abbreviations, and removes spaces for consistent matching. Results are cached for performance.

    Parameters

    • text: string

      The text to normalize.

    Returns string

    The normalized text string.

    export function enhancedNormalize(text: string): string {
    if (!text) return "";

    const cached = getCacheEntry(normalizeCache, text);
    if (cached !== undefined) {
    return cached;
    }

    let normalized = text.trim();

    // Remove common ignorable patterns
    for (const pattern of IGNORABLE_PATTERNS) {
    normalized = normalized.replaceAll(pattern, "");
    }

    // Normalize Unicode characters
    normalized = normalized.normalize("NFD");

    // Convert full-width characters to half-width
    normalized = normalized.replaceAll(/[\uff01-\uff5e]/g, (char) =>
    String.fromCodePoint((char.codePointAt(0) ?? 0) - 0xfee0),
    );

    // Remove diacritics/accents
    normalized = normalized.replaceAll(/[\u0300-\u036f]/g, "");

    // Normalize punctuation and special characters
    normalized = normalized
    .replaceAll("'", "'") // Normalize apostrophes
    .replaceAll('" ', '"') // Normalize quotes
    .replaceAll(/[\u2013\u2014]/g, "-") // Normalize dashes
    .replaceAll("\u2026", "...") // Normalize ellipsis
    .replaceAll("\u00d7", "x") // Normalize multiplication sign
    .replaceAll("\uff01", "!") // Japanese exclamation
    .replaceAll("\uff1f", "?") // Japanese question mark
    .replaceAll("\uff1a", ":") // Japanese colon
    .replaceAll("\uff1b", ";") // Japanese semicolon
    .replaceAll("\uff0c", ",") // Japanese comma
    .replaceAll("\u3002", ".") // Japanese period
    .replaceAll("\uff08", "(") // Japanese left parenthesis
    .replaceAll("\uff09", ")") // Japanese right parenthesis
    .replaceAll("\u300c", '"') // Japanese left quote
    .replaceAll("\u300d", '"') // Japanese right quote
    .replaceAll("\u300e", '"') // Japanese left double quote
    .replaceAll("\u300f", '"'); // Japanese right double quote

    // Handle common abbreviations
    for (const [abbrev, expansion] of ABBREVIATION_MAP) {
    const regex = new RegExp(String.raw`\b${abbrev}\b`, "gi");
    normalized = normalized.replaceAll(regex, expansion);
    }

    // Normalize whitespace and special characters
    normalized = normalized
    .replaceAll(/[^\w\s\-']/g, " ") // Replace most special chars with space
    .replaceAll("-", "") // Remove dashes to match manga-search-service normalization
    .replaceAll(/\s+/g, "") // Remove all spaces for more consistent matching
    .toLowerCase()
    .trim();

    setCacheEntry(normalizeCache, text, normalized, NORMALIZE_CACHE_LIMIT);

    return normalized;
    }