|
| 1 | +// Copyright 2025 The Chromium Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a BSD-style license that can be |
| 3 | +// found in the LICENSE file. |
| 4 | + |
| 5 | +/** |
| 6 | + * Fuzzy model name matcher for finding the closest available model |
| 7 | + * when an exact match isn't found. |
| 8 | + */ |
| 9 | + |
| 10 | +/** |
| 11 | + * Calculate Levenshtein distance between two strings |
| 12 | + */ |
| 13 | +function levenshteinDistance(a: string, b: string): number { |
| 14 | + const matrix: number[][] = []; |
| 15 | + |
| 16 | + // Initialize matrix |
| 17 | + for (let i = 0; i <= b.length; i++) { |
| 18 | + matrix[i] = [i]; |
| 19 | + } |
| 20 | + for (let j = 0; j <= a.length; j++) { |
| 21 | + matrix[0][j] = j; |
| 22 | + } |
| 23 | + |
| 24 | + // Fill matrix |
| 25 | + for (let i = 1; i <= b.length; i++) { |
| 26 | + for (let j = 1; j <= a.length; j++) { |
| 27 | + if (b.charAt(i - 1) === a.charAt(j - 1)) { |
| 28 | + matrix[i][j] = matrix[i - 1][j - 1]; |
| 29 | + } else { |
| 30 | + matrix[i][j] = Math.min( |
| 31 | + matrix[i - 1][j - 1] + 1, // substitution |
| 32 | + matrix[i][j - 1] + 1, // insertion |
| 33 | + matrix[i - 1][j] + 1 // deletion |
| 34 | + ); |
| 35 | + } |
| 36 | + } |
| 37 | + } |
| 38 | + |
| 39 | + return matrix[b.length][a.length]; |
| 40 | +} |
| 41 | + |
| 42 | +/** |
| 43 | + * Calculate similarity score between two strings (0-1) |
| 44 | + */ |
| 45 | +function similarity(a: string, b: string): number { |
| 46 | + const distance = levenshteinDistance(a, b); |
| 47 | + const maxLen = Math.max(a.length, b.length); |
| 48 | + return maxLen === 0 ? 1 : 1 - distance / maxLen; |
| 49 | +} |
| 50 | + |
| 51 | +/** |
| 52 | + * Normalize model name for comparison by removing dates, versions, and separators |
| 53 | + */ |
| 54 | +function normalizeModelName(name: string): string { |
| 55 | + return name |
| 56 | + .toLowerCase() |
| 57 | + .replace(/[-_]/g, '') // Remove separators |
| 58 | + .replace(/\d{4}-?\d{2}-?\d{2}$/g, '') // Remove date suffixes (2025-04-14 or 20250514) |
| 59 | + .replace(/\d{8}$/g, '') // Remove date suffixes without dashes |
| 60 | + .trim(); |
| 61 | +} |
| 62 | + |
| 63 | +/** |
| 64 | + * Check if target is a prefix of candidate (case-insensitive) |
| 65 | + */ |
| 66 | +function isPrefixMatch(target: string, candidate: string): boolean { |
| 67 | + const normalizedTarget = target.toLowerCase().replace(/[._]/g, '-'); |
| 68 | + const normalizedCandidate = candidate.toLowerCase().replace(/[._]/g, '-'); |
| 69 | + return normalizedCandidate.startsWith(normalizedTarget); |
| 70 | +} |
| 71 | + |
| 72 | +/** |
| 73 | + * Find the closest matching model from available options |
| 74 | + * |
| 75 | + * Matching strategy (in priority order): |
| 76 | + * 1. Exact match - return immediately |
| 77 | + * 2. Prefix match - if target is prefix of an available model |
| 78 | + * 3. Normalized match - strip dates/versions and compare base names |
| 79 | + * 4. Levenshtein similarity - if similarity > threshold, return best match |
| 80 | + * |
| 81 | + * @param targetModel - The model name to find a match for |
| 82 | + * @param availableModels - Array of available model names |
| 83 | + * @param threshold - Minimum similarity score (0-1) for fuzzy matching (default: 0.5) |
| 84 | + * @returns The closest matching model name, or null if no good match found |
| 85 | + */ |
| 86 | +export function findClosestModel( |
| 87 | + targetModel: string, |
| 88 | + availableModels: string[], |
| 89 | + threshold: number = 0.5 |
| 90 | +): string | null { |
| 91 | + if (!targetModel || availableModels.length === 0) { |
| 92 | + return null; |
| 93 | + } |
| 94 | + |
| 95 | + // 1. Exact match |
| 96 | + if (availableModels.includes(targetModel)) { |
| 97 | + return targetModel; |
| 98 | + } |
| 99 | + |
| 100 | + // 2. Prefix match - find models where target is a prefix |
| 101 | + const prefixMatches = availableModels.filter(model => isPrefixMatch(targetModel, model)); |
| 102 | + if (prefixMatches.length > 0) { |
| 103 | + // Return the shortest prefix match (most specific) |
| 104 | + return prefixMatches.sort((a, b) => a.length - b.length)[0]; |
| 105 | + } |
| 106 | + |
| 107 | + // 3. Normalized match - compare base names without dates/versions |
| 108 | + const normalizedTarget = normalizeModelName(targetModel); |
| 109 | + for (const model of availableModels) { |
| 110 | + if (normalizeModelName(model) === normalizedTarget) { |
| 111 | + return model; |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + // 4. Levenshtein similarity on normalized names |
| 116 | + let bestMatch: string | null = null; |
| 117 | + let bestScore = 0; |
| 118 | + |
| 119 | + for (const model of availableModels) { |
| 120 | + const score = similarity(normalizedTarget, normalizeModelName(model)); |
| 121 | + if (score > bestScore && score >= threshold) { |
| 122 | + bestScore = score; |
| 123 | + bestMatch = model; |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + return bestMatch; |
| 128 | +} |
| 129 | + |
| 130 | +/** |
| 131 | + * Find closest model with detailed match info for logging |
| 132 | + */ |
| 133 | +export interface FuzzyMatchResult { |
| 134 | + match: string | null; |
| 135 | + matchType: 'exact' | 'prefix' | 'normalized' | 'similarity' | 'none'; |
| 136 | + score: number; |
| 137 | +} |
| 138 | + |
| 139 | +export function findClosestModelWithInfo( |
| 140 | + targetModel: string, |
| 141 | + availableModels: string[], |
| 142 | + threshold: number = 0.5 |
| 143 | +): FuzzyMatchResult { |
| 144 | + if (!targetModel || availableModels.length === 0) { |
| 145 | + return { match: null, matchType: 'none', score: 0 }; |
| 146 | + } |
| 147 | + |
| 148 | + // 1. Exact match |
| 149 | + if (availableModels.includes(targetModel)) { |
| 150 | + return { match: targetModel, matchType: 'exact', score: 1 }; |
| 151 | + } |
| 152 | + |
| 153 | + // 2. Prefix match |
| 154 | + const prefixMatches = availableModels.filter(model => isPrefixMatch(targetModel, model)); |
| 155 | + if (prefixMatches.length > 0) { |
| 156 | + const match = prefixMatches.sort((a, b) => a.length - b.length)[0]; |
| 157 | + return { match, matchType: 'prefix', score: targetModel.length / match.length }; |
| 158 | + } |
| 159 | + |
| 160 | + // 3. Normalized match |
| 161 | + const normalizedTarget = normalizeModelName(targetModel); |
| 162 | + for (const model of availableModels) { |
| 163 | + if (normalizeModelName(model) === normalizedTarget) { |
| 164 | + return { match: model, matchType: 'normalized', score: 1 }; |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + // 4. Levenshtein similarity |
| 169 | + let bestMatch: string | null = null; |
| 170 | + let bestScore = 0; |
| 171 | + |
| 172 | + for (const model of availableModels) { |
| 173 | + const score = similarity(normalizedTarget, normalizeModelName(model)); |
| 174 | + if (score > bestScore && score >= threshold) { |
| 175 | + bestScore = score; |
| 176 | + bestMatch = model; |
| 177 | + } |
| 178 | + } |
| 179 | + |
| 180 | + if (bestMatch) { |
| 181 | + return { match: bestMatch, matchType: 'similarity', score: bestScore }; |
| 182 | + } |
| 183 | + |
| 184 | + return { match: null, matchType: 'none', score: 0 }; |
| 185 | +} |
0 commit comments