• Calculates enhanced title similarity using multiple algorithms with weighted combination. Combines exact matching, character similarity, semantic matching, and edit distances. Results are cached for performance with support for custom configuration.

    Parameters

    • str1: string

      First string to compare.

    • str2: string

      Second string to compare.

    • config: Partial<SimilarityConfig> = {}

      Optional custom similarity configuration (weights, thresholds, debug).

    Returns number

    Similarity score between 0 and 100.

    export function calculateEnhancedSimilarity(
    str1: string,
    str2: string,
    config: Partial<SimilarityConfig> = {},
    ): number {
    const finalConfig = { ...DEFAULT_SIMILARITY_CONFIG, ...config };

    if (!str1 || !str2) return 0;
    if (str1 === str2) return 100;

    const norm1 = enhancedNormalize(str1);
    const norm2 = enhancedNormalize(str2);

    if (norm1 === norm2) return 100;
    if (norm1.length === 0 || norm2.length === 0) return 0;

    let cacheKey: string | null = null;
    if (!finalConfig.debug) {
    const pairKey = makeOrderedPairKey(norm1, norm2);
    const configKey = createConfigKey(finalConfig);
    cacheKey = `${pairKey}::${configKey}`;

    const cached = getCacheEntry(enhancedSimilarityCache, cacheKey);
    if (cached !== undefined) {
    return cached;
    }
    }

    // Check for extreme length differences
    const lengthRatio =
    Math.min(norm1.length, norm2.length) / Math.max(norm1.length, norm2.length);
    if (lengthRatio < finalConfig.lengthDifferenceThreshold) {
    // Apply penalty for very different lengths, but don't completely eliminate the match
    const lengthPenalty = lengthRatio;

    // Still calculate basic similarity but apply the penalty
    const basicSimilarity = stringSimilarity.compareTwoStrings(norm1, norm2);
    const penalizedScore = basicSimilarity * lengthPenalty;
    const roundedPenaltyScore = Math.round(penalizedScore * 100);

    if (finalConfig.debug) {
    console.debug(
    `[Similarity] Length penalty applied: ${str1} vs ${str2}, ratio: ${lengthRatio.toFixed(2)}, score: ${(penalizedScore * 100).toFixed(1)}`,
    );
    }

    const boundedPenaltyScore = Math.min(100, Math.max(0, roundedPenaltyScore));

    if (cacheKey) {
    setCacheEntry(
    enhancedSimilarityCache,
    cacheKey,
    boundedPenaltyScore,
    PAIR_SIMILARITY_CACHE_LIMIT,
    );
    }

    return boundedPenaltyScore;
    }

    // Calculate different types of similarity
    const exactMatch = calculateExactMatch(str1, str2);
    const substringMatch = calculateSubstringMatch(str1, str2);
    const wordOrderSim = calculateWordOrderSimilarity(str1, str2);
    const characterSim = calculateCharacterSimilarity(str1, str2);
    const semanticSim = calculateSemanticSimilarity(str1, str2);
    const jaroWinklerSim = calculateJaroWinklerSimilarity(norm1, norm2);
    const ngramSim = calculateNgramSimilarity(norm1, norm2);

    // Calculate weighted average with all similarity metrics
    const totalWeight =
    finalConfig.exactMatchWeight +
    finalConfig.substringMatchWeight +
    finalConfig.wordOrderWeight +
    finalConfig.characterSimilarityWeight +
    finalConfig.semanticWeight +
    finalConfig.jaroWinklerWeight +
    finalConfig.ngramWeight;

    const weightedScore =
    (exactMatch * finalConfig.exactMatchWeight +
    substringMatch * finalConfig.substringMatchWeight +
    wordOrderSim * finalConfig.wordOrderWeight +
    characterSim * finalConfig.characterSimilarityWeight +
    semanticSim * finalConfig.semanticWeight +
    jaroWinklerSim * finalConfig.jaroWinklerWeight +
    ngramSim * finalConfig.ngramWeight) /
    totalWeight;

    const roundedScore = Math.round(weightedScore * 100);
    const boundedScore = Math.min(100, Math.max(0, roundedScore));

    if (finalConfig.debug) {
    console.debug(
    `[Similarity] Similarity calculation for "${str1}" vs "${str2}":`,
    );
    console.debug(`[Similarity] Exact: ${(exactMatch * 100).toFixed(1)}%`);
    console.debug(
    `[Similarity] Substring: ${(substringMatch * 100).toFixed(1)}%`,
    );
    console.debug(
    `[Similarity] Word Order: ${(wordOrderSim * 100).toFixed(1)}%`,
    );
    console.debug(
    `[Similarity] Character: ${(characterSim * 100).toFixed(1)}%`,
    );
    console.debug(
    `[Similarity] Semantic: ${(semanticSim * 100).toFixed(1)}%`,
    );
    console.debug(
    `[Similarity] Jaro-Winkler: ${(jaroWinklerSim * 100).toFixed(1)}%`,
    );
    console.debug(`[Similarity] N-gram: ${(ngramSim * 100).toFixed(1)}%`);
    console.debug(`[Similarity] Final: ${boundedScore}%`);
    }

    if (cacheKey) {
    setCacheEntry(
    enhancedSimilarityCache,
    cacheKey,
    boundedScore,
    PAIR_SIMILARITY_CACHE_LIMIT,
    );
    }

    return boundedScore;
    }