The text to normalize.
The normalized text string.
export function enhancedNormalize(text: string): string {
if (!text) return "";
const cached = getCacheEntry(normalizeCache, text);
if (cached !== undefined) {
return cached;
}
let normalized = text.trim();
// Remove common ignorable patterns
for (const pattern of IGNORABLE_PATTERNS) {
normalized = normalized.replaceAll(pattern, "");
}
// Normalize Unicode characters
normalized = normalized.normalize("NFD");
// Convert full-width characters to half-width
normalized = normalized.replaceAll(/[\uff01-\uff5e]/g, (char) =>
String.fromCodePoint((char.codePointAt(0) ?? 0) - 0xfee0),
);
// Remove diacritics/accents
normalized = normalized.replaceAll(/[\u0300-\u036f]/g, "");
// Normalize punctuation and special characters
normalized = normalized
.replaceAll("'", "'") // Normalize apostrophes
.replaceAll('" ', '"') // Normalize quotes
.replaceAll(/[\u2013\u2014]/g, "-") // Normalize dashes
.replaceAll("\u2026", "...") // Normalize ellipsis
.replaceAll("\u00d7", "x") // Normalize multiplication sign
.replaceAll("\uff01", "!") // Japanese exclamation
.replaceAll("\uff1f", "?") // Japanese question mark
.replaceAll("\uff1a", ":") // Japanese colon
.replaceAll("\uff1b", ";") // Japanese semicolon
.replaceAll("\uff0c", ",") // Japanese comma
.replaceAll("\u3002", ".") // Japanese period
.replaceAll("\uff08", "(") // Japanese left parenthesis
.replaceAll("\uff09", ")") // Japanese right parenthesis
.replaceAll("\u300c", '"') // Japanese left quote
.replaceAll("\u300d", '"') // Japanese right quote
.replaceAll("\u300e", '"') // Japanese left double quote
.replaceAll("\u300f", '"'); // Japanese right double quote
// Handle common abbreviations
for (const [abbrev, expansion] of ABBREVIATION_MAP) {
const regex = new RegExp(String.raw`\b${abbrev}\b`, "gi");
normalized = normalized.replaceAll(regex, expansion);
}
// Normalize whitespace and special characters
normalized = normalized
.replaceAll(/[^\w\s\-']/g, " ") // Replace most special chars with space
.replaceAll("-", "") // Remove dashes to match manga-search-service normalization
.replaceAll(/\s+/g, "") // Remove all spaces for more consistent matching
.toLowerCase()
.trim();
setCacheEntry(normalizeCache, text, normalized, NORMALIZE_CACHE_LIMIT);
return normalized;
}
Normalizes text for similarity comparison using enhanced Unicode and punctuation handling.
Removes diacritics, normalizes punctuation and Japanese characters, expands abbreviations, and removes spaces for consistent matching. Results are cached for performance.