First array of words to compare
Second array of words to compare
Order similarity score between 0 and 1
export function calculateWordOrderSimilarity(
primaryWords: string[],
comparisonWords: string[],
): number {
// If either array is empty, no match
if (primaryWords.length === 0 || comparisonWords.length === 0) return 0;
// Filter for words that appear in both arrays
const commonPrimaryWords = primaryWords.filter((word) =>
comparisonWords.includes(word),
);
// If no common words, no order similarity
if (commonPrimaryWords.length === 0) return 0;
// Calculate longest common subsequence (LCS) length
// This gives us the longest sequence of words that appear in same order
const longestCommonSubsequenceLength =
calculateLongestCommonSubsequenceLength(primaryWords, comparisonWords);
// Calculate order preservation score
// Higher LCS means better order preservation
const maxLength = Math.max(primaryWords.length, comparisonWords.length);
const lcsScore = longestCommonSubsequenceLength / maxLength;
// Calculate position distance penalty
// Words at similar positions get bonus
let positionScore = 0;
const minLength = Math.min(primaryWords.length, comparisonWords.length);
for (let i = 0; i < minLength; i++) {
if (primaryWords[i] === comparisonWords[i]) {
positionScore += 1;
} else if (comparisonWords.includes(primaryWords[i])) {
// Word exists but in different position, give partial credit
const actualPos = comparisonWords.indexOf(primaryWords[i]);
const distance = Math.abs(i - actualPos);
positionScore += Math.max(0, 1 - distance / maxLength);
}
}
positionScore /= maxLength;
// Calculate coverage (what portion of words are common)
const coverage = commonPrimaryWords.length / maxLength;
// Combine scores with weights
// LCS is most important for order, then position, then coverage
return lcsScore * 0.5 + positionScore * 0.3 + coverage * 0.2;
}
Calculate word order similarity using longest common subsequence. Combines order preservation (50%), position proximity (30%), and word coverage (20%).