import { Song } from '../models/Song.js'; import { MusicFile } from '../models/MusicFile.js'; import { AudioMetadataService } from './audioMetadataService.js'; export interface MatchResult { song: any; musicFile: any; confidence: number; matchType: 'exact' | 'fuzzy' | 'partial' | 'none'; matchReason: string; } export interface MatchOptions { minConfidence?: number; enableFuzzyMatching?: boolean; enablePartialMatching?: boolean; maxResults?: number; } export class SongMatchingService { private audioMetadataService: AudioMetadataService; constructor() { this.audioMetadataService = new AudioMetadataService(); } /** * Match a single music file to songs in the library with optimized performance */ async matchMusicFileToSongs( musicFile: any, options: MatchOptions = {} ): Promise { const { minConfidence = 0.3, enableFuzzyMatching = true, enablePartialMatching = true, maxResults = 5 } = options; const results: MatchResult[] = []; let exactMatches = 0; const maxExactMatches = 3; // Limit exact matches for performance // Get all songs from the library const songs = await Song.find({}); for (const song of songs) { const matchResult = this.calculateMatch(musicFile, song, { enableFuzzyMatching, enablePartialMatching }); if (matchResult.confidence >= minConfidence) { results.push(matchResult); // Early termination for exact matches if (matchResult.matchType === 'exact') { exactMatches++; if (exactMatches >= maxExactMatches) { console.log(`🎯 Found ${exactMatches} exact matches, stopping early for performance`); break; } } } } // Sort by confidence (highest first) and limit results return results .sort((a, b) => b.confidence - a.confidence) .slice(0, maxResults); } /** * Match all music files to songs in the library */ async matchAllMusicFilesToSongs( options: MatchOptions = {} ): Promise<{ musicFile: any; matches: MatchResult[] }[]> { console.log('🔍 Starting song matching for all unmatched music files...'); const musicFiles = await MusicFile.find({ songId: { $exists: false } }); console.log(`📁 Found ${musicFiles.length} unmatched music files`); const results = []; let processedCount = 0; for (const musicFile of musicFiles) { processedCount++; const progress = ((processedCount / musicFiles.length) * 100).toFixed(1); console.log(`🎵 [${progress}%] Matching: ${musicFile.originalName}`); const matches = await this.matchMusicFileToSongs(musicFile, options); if (matches.length > 0) { const bestMatch = matches[0]; console.log(`✅ Best match for ${musicFile.originalName}: ${bestMatch.song.title} (${(bestMatch.confidence * 100).toFixed(1)}% confidence)`); } else { console.log(`❌ No matches found for ${musicFile.originalName}`); } results.push({ musicFile, matches }); } console.log(`🎉 Song matching completed for ${musicFiles.length} files`); return results; } /** * Auto-match and link music files to songs with optimized performance */ async autoMatchAndLink( options: MatchOptions = {} ): Promise<{ linked: number; unmatched: number }> { console.log('🔗 Starting auto-match and link process...'); const { minConfidence = 0.7, // Higher threshold for auto-linking enableFuzzyMatching = true, enablePartialMatching = false // Disable partial matching for auto-linking } = options; console.log(`⚙️ Auto-linking options: minConfidence=${minConfidence}, enableFuzzyMatching=${enableFuzzyMatching}, enablePartialMatching=${enablePartialMatching}`); const musicFiles = await MusicFile.find({ songId: { $exists: false } }); console.log(`📁 Found ${musicFiles.length} unmatched music files to process`); let linked = 0; let unmatched = 0; let processedCount = 0; const batchSize = 50; // Process in batches for better performance const updates = []; for (const musicFile of musicFiles) { processedCount++; const progress = ((processedCount / musicFiles.length) * 100).toFixed(1); console.log(`🔍 [${progress}%] Auto-matching: ${musicFile.originalName}`); const matches = await this.matchMusicFileToSongs(musicFile, { minConfidence, enableFuzzyMatching, enablePartialMatching, maxResults: 1 }); if (matches.length > 0 && matches[0].confidence >= minConfidence) { // Link the music file to the best match console.log(`🔗 Linking ${musicFile.originalName} to ${matches[0].song.title} (${(matches[0].confidence * 100).toFixed(1)}% confidence)`); // Prepare batch updates updates.push({ musicFileId: musicFile._id, songId: matches[0].song._id, s3Key: musicFile.s3Key, s3Url: musicFile.s3Url }); linked++; } else { console.log(`❌ No suitable match found for ${musicFile.originalName} (best confidence: ${matches.length > 0 ? (matches[0].confidence * 100).toFixed(1) : 0}%)`); unmatched++; } // Process batch updates if (updates.length >= batchSize) { await this.processBatchUpdates(updates); updates.length = 0; // Clear the array } } // Process remaining updates if (updates.length > 0) { await this.processBatchUpdates(updates); } console.log(`🎉 Auto-match and link completed:`); console.log(` Linked: ${linked} files`); console.log(` Unmatched: ${unmatched} files`); console.log(` Success rate: ${musicFiles.length > 0 ? ((linked / musicFiles.length) * 100).toFixed(1) : 0}%`); return { linked, unmatched }; } /** * Process batch updates for better performance */ private async processBatchUpdates(updates: any[]): Promise { console.log(`💾 Processing batch update for ${updates.length} files...`); const bulkOps = updates.map(update => ({ updateOne: { filter: { _id: update.musicFileId }, update: { $set: { songId: update.songId } } } })); const songBulkOps = updates.map(update => ({ updateOne: { filter: { _id: update.songId }, update: { $set: { 's3File.musicFileId': update.musicFileId, 's3File.s3Key': update.s3Key, 's3File.s3Url': update.s3Url, 's3File.streamingUrl': `${process.env.S3_ENDPOINT}/${process.env.S3_BUCKET_NAME}/${update.s3Key}`, 's3File.hasS3File': true } } } })); // Execute bulk operations await Promise.all([ MusicFile.bulkWrite(bulkOps), Song.bulkWrite(songBulkOps) ]); } /** * Link a music file to a song (preserves original location) */ async linkMusicFileToSong(musicFile: any, song: any): Promise { // Update the song with S3 file information song.s3File = { musicFileId: musicFile._id, s3Key: musicFile.s3Key, s3Url: musicFile.s3Url, streamingUrl: `${process.env.S3_ENDPOINT}/${process.env.S3_BUCKET_NAME}/${musicFile.s3Key}`, hasS3File: true }; await song.save(); // Also update the music file to reference the song musicFile.songId = song._id; await musicFile.save(); } /** * Unlink a music file from a song */ async unlinkMusicFileFromSong(song: any): Promise { // Remove S3 file information from song song.s3File = { musicFileId: null, s3Key: null, s3Url: null, streamingUrl: null, hasS3File: false }; await song.save(); // Remove song reference from music file if (song.s3File?.musicFileId) { const musicFile = await MusicFile.findById(song.s3File.musicFileId); if (musicFile) { musicFile.songId = undefined; await musicFile.save(); } } } /** * Calculate match confidence between a music file and a song */ private calculateMatch( musicFile: any, song: any, options: { enableFuzzyMatching: boolean; enablePartialMatching: boolean } ): MatchResult { const scores: { score: number; reason: string }[] = []; // 1. Exact filename match (highest priority) - if this matches, it's likely a 1:1 match const filenameScore = this.matchFilename(musicFile.originalName, song); if (filenameScore.score >= 0.95) { // If we have a very high filename match, return immediately return { song, musicFile, confidence: filenameScore.score, matchType: 'exact', matchReason: filenameScore.reason }; } if (filenameScore.score > 0) { scores.push(filenameScore); } // 2. Original location match (high priority for Rekordbox files) if (song.location) { const locationScore = this.matchLocation(musicFile.originalName, song.location); if (locationScore.score >= 0.9) { // If we have a very high location match, return immediately return { song, musicFile, confidence: locationScore.score, matchType: 'exact', matchReason: locationScore.reason }; } if (locationScore.score > 0) { scores.push(locationScore); } } // 3. Title match (only if filename didn't match well) if (filenameScore.score < 0.8) { const titleScore = this.matchTitle(musicFile.title, song.title); if (titleScore.score > 0) { scores.push(titleScore); } } // 4. Artist match (only if filename didn't match well) if (filenameScore.score < 0.8) { const artistScore = this.matchArtist(musicFile.artist, song.artist); if (artistScore.score > 0) { scores.push(artistScore); } } // 5. Album match (lower priority) const albumScore = this.matchAlbum(musicFile.album, song.album); if (albumScore.score > 0) { scores.push(albumScore); } // 6. Duration match (if available, as a tiebreaker) if (musicFile.duration && song.totalTime) { const durationScore = this.matchDuration(musicFile.duration, song.totalTime); if (durationScore.score > 0) { scores.push(durationScore); } } // Calculate weighted average score with filename bias let totalScore = 0; let totalWeight = 0; for (const score of scores) { let weight = 1; // Give higher weight to filename and location matches if (score.reason.includes('filename') || score.reason.includes('location')) { weight = 3; } else if (score.reason.includes('title')) { weight = 2; } else if (score.reason.includes('artist')) { weight = 1.5; } totalScore += score.score * weight; totalWeight += weight; } const averageScore = totalWeight > 0 ? totalScore / totalWeight : 0; // Determine match type let matchType: 'exact' | 'fuzzy' | 'partial' | 'none' = 'none'; let matchReason = 'No match found'; if (averageScore >= 0.9) { matchType = 'exact'; matchReason = 'Exact match found'; } else if (averageScore >= 0.7) { matchType = 'fuzzy'; matchReason = 'High confidence fuzzy match'; } else if (averageScore >= 0.5) { matchType = 'partial'; matchReason = 'Partial match'; } return { song, musicFile, confidence: averageScore, matchType, matchReason }; } /** * Match filename to song with comprehensive pattern matching */ private matchFilename(filename: string, song: any): { score: number; reason: string } { if (!filename || !song.title) return { score: 0, reason: '' }; const cleanFilename = this.cleanString(filename.replace(/\.[^/.]+$/, '')); // Remove extension const cleanTitle = this.cleanString(song.title); const cleanArtist = song.artist ? this.cleanString(song.artist) : ''; // 1. Exact filename match (highest confidence) if (cleanFilename === cleanTitle) { return { score: 1.0, reason: 'Exact filename match' }; } // 2. Artist - Title pattern matches (very common in music files) if (cleanArtist) { const patterns = [ `${cleanArtist} - ${cleanTitle}`, `${cleanTitle} - ${cleanArtist}`, `${cleanArtist} feat. ${cleanTitle}`, `${cleanTitle} feat. ${cleanArtist}`, `${cleanArtist} ft. ${cleanTitle}`, `${cleanTitle} ft. ${cleanArtist}`, `${cleanArtist} featuring ${cleanTitle}`, `${cleanTitle} featuring ${cleanArtist}`, `${cleanArtist} & ${cleanTitle}`, `${cleanTitle} & ${cleanArtist}`, `${cleanArtist} vs ${cleanTitle}`, `${cleanTitle} vs ${cleanArtist}`, `${cleanArtist} x ${cleanTitle}`, `${cleanTitle} x ${cleanArtist}` ]; for (const pattern of patterns) { if (cleanFilename === pattern) { return { score: 1.0, reason: 'Exact Artist-Title pattern match' }; } } // Partial pattern matches for (const pattern of patterns) { if (cleanFilename.includes(pattern) || pattern.includes(cleanFilename)) { return { score: 0.95, reason: 'Partial Artist-Title pattern match' }; } } } // 3. Filename contains title (common when filenames have extra info) if (cleanFilename.includes(cleanTitle) || cleanTitle.includes(cleanFilename)) { return { score: 0.9, reason: 'Filename contains title' }; } // 4. Handle common filename variations const filenameVariations = [ cleanFilename, cleanFilename.replace(/\([^)]*\)/g, '').trim(), // Remove parentheses content cleanFilename.replace(/\[[^\]]*\]/g, '').trim(), // Remove bracket content cleanFilename.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(), // Remove common suffixes cleanFilename.replace(/\s+/g, ' ').trim() // Normalize whitespace ]; for (const variation of filenameVariations) { if (variation === cleanTitle) { return { score: 0.95, reason: 'Filename variation matches title' }; } if (variation.includes(cleanTitle) || cleanTitle.includes(variation)) { return { score: 0.85, reason: 'Filename variation contains title' }; } } // 5. Handle title variations const titleVariations = [ cleanTitle, cleanTitle.replace(/\([^)]*\)/g, '').trim(), cleanTitle.replace(/\[[^\]]*\]/g, '').trim(), cleanTitle.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(), cleanTitle.replace(/\s+/g, ' ').trim() ]; for (const titleVar of titleVariations) { if (cleanFilename === titleVar) { return { score: 0.95, reason: 'Filename matches title variation' }; } if (cleanFilename.includes(titleVar) || titleVar.includes(cleanFilename)) { return { score: 0.85, reason: 'Filename contains title variation' }; } } // 6. Fuzzy match for similar filenames const similarity = this.calculateSimilarity(cleanFilename, cleanTitle); if (similarity > 0.8) { return { score: similarity * 0.8, reason: 'Fuzzy filename match' }; } return { score: 0, reason: '' }; } /** * Match original location to filename with comprehensive path handling */ private matchLocation(filename: string, location: string): { score: number; reason: string } { if (!filename || !location) return { score: 0, reason: '' }; // Decode URL-encoded sequences so Rekordbox paths with %20 etc. match S3 keys correctly const safeDecode = (s: string): string => { try { return decodeURIComponent(s); } catch { return s; } }; const cleanFilename = this.cleanString(safeDecode(filename)); const cleanLocation = this.cleanString(safeDecode(location)); // Extract filename from location path (handle different path separators) const pathParts = cleanLocation.split(/[\/\\]/); const locationFilename = pathParts[pathParts.length - 1] || cleanLocation; const locationFilenameNoExt = locationFilename.replace(/\.[^/.]+$/, ''); const filenameNoExt = cleanFilename.replace(/\.[^/.]+$/, ''); // 1. Exact filename match (highest confidence) if (filenameNoExt === locationFilenameNoExt) { return { score: 1.0, reason: 'Exact location filename match' }; } // 2. Filename contains location filename or vice versa if (filenameNoExt.includes(locationFilenameNoExt) || locationFilenameNoExt.includes(filenameNoExt)) { return { score: 0.95, reason: 'Location filename contains match' }; } // 3. Handle common filename variations in location const locationVariations = [ locationFilenameNoExt, locationFilenameNoExt.replace(/\([^)]*\)/g, '').trim(), locationFilenameNoExt.replace(/\[[^\]]*\]/g, '').trim(), locationFilenameNoExt.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(), locationFilenameNoExt.replace(/\s+/g, ' ').trim() ]; for (const variation of locationVariations) { if (filenameNoExt === variation) { return { score: 0.95, reason: 'Filename matches location variation' }; } if (filenameNoExt.includes(variation) || variation.includes(filenameNoExt)) { return { score: 0.9, reason: 'Filename contains location variation' }; } } // 4. Check if any part of the path contains the filename for (const pathPart of pathParts) { const cleanPathPart = pathPart.replace(/\.[^/.]+$/, ''); // Remove extension if (cleanPathPart && (filenameNoExt.includes(cleanPathPart) || cleanPathPart.includes(filenameNoExt))) { return { score: 0.8, reason: 'Path part contains filename' }; } } // 5. Fuzzy match for similar filenames const similarity = this.calculateSimilarity(filenameNoExt, locationFilenameNoExt); if (similarity > 0.8) { return { score: similarity * 0.7, reason: 'Fuzzy location filename match' }; } return { score: 0, reason: '' }; } /** * Match title */ private matchTitle(fileTitle: string, songTitle: string): { score: number; reason: string } { if (!fileTitle || !songTitle) return { score: 0, reason: '' }; const cleanFileTitle = this.cleanString(fileTitle); const cleanSongTitle = this.cleanString(songTitle); // Exact match if (cleanFileTitle === cleanSongTitle) { return { score: 1.0, reason: 'Exact title match' }; } // Contains match if (cleanFileTitle.includes(cleanSongTitle) || cleanSongTitle.includes(cleanFileTitle)) { return { score: 0.7, reason: 'Title contains match' }; } // Fuzzy match (simple similarity) const similarity = this.calculateSimilarity(cleanFileTitle, cleanSongTitle); if (similarity > 0.8) { return { score: similarity * 0.8, reason: 'Fuzzy title match' }; } return { score: 0, reason: '' }; } /** * Match artist */ private matchArtist(fileArtist: string, songArtist: string): { score: number; reason: string } { if (!fileArtist || !songArtist) return { score: 0, reason: '' }; const cleanFileArtist = this.cleanString(fileArtist); const cleanSongArtist = this.cleanString(songArtist); // Exact match if (cleanFileArtist === cleanSongArtist) { return { score: 0.9, reason: 'Exact artist match' }; } // Contains match if (cleanFileArtist.includes(cleanSongArtist) || cleanSongArtist.includes(cleanFileArtist)) { return { score: 0.6, reason: 'Artist contains match' }; } // Fuzzy match const similarity = this.calculateSimilarity(cleanFileArtist, cleanSongArtist); if (similarity > 0.8) { return { score: similarity * 0.6, reason: 'Fuzzy artist match' }; } return { score: 0, reason: '' }; } /** * Match album */ private matchAlbum(fileAlbum: string, songAlbum: string): { score: number; reason: string } { if (!fileAlbum || !songAlbum) return { score: 0, reason: '' }; const cleanFileAlbum = this.cleanString(fileAlbum); const cleanSongAlbum = this.cleanString(songAlbum); // Exact match if (cleanFileAlbum === cleanSongAlbum) { return { score: 0.8, reason: 'Exact album match' }; } // Contains match if (cleanFileAlbum.includes(cleanSongAlbum) || cleanSongAlbum.includes(cleanFileAlbum)) { return { score: 0.5, reason: 'Album contains match' }; } return { score: 0, reason: '' }; } /** * Match duration */ private matchDuration(fileDuration: number, songDuration: string): { score: number; reason: string } { if (!fileDuration || !songDuration) return { score: 0, reason: '' }; const songDurationMs = parseInt(songDuration) * 1000; // Convert to milliseconds const difference = Math.abs(fileDuration - songDurationMs); const tolerance = 2000; // 2 second tolerance if (difference <= tolerance) { const score = 1 - (difference / tolerance); return { score: score * 0.6, reason: 'Duration match' }; } return { score: 0, reason: '' }; } /** * Clean string for comparison */ private cleanString(str: string): string { return str .toLowerCase() .replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens .replace(/\s+/g, ' ') // Normalize whitespace .trim(); } /** * Calculate simple string similarity (0-1) */ private calculateSimilarity(str1: string, str2: string): number { const longer = str1.length > str2.length ? str1 : str2; const shorter = str1.length > str2.length ? str2 : str1; if (longer.length === 0) return 1.0; const editDistance = this.levenshteinDistance(longer, shorter); return (longer.length - editDistance) / longer.length; } /** * Calculate Levenshtein distance */ private levenshteinDistance(str1: string, str2: string): number { const matrix = []; for (let i = 0; i <= str2.length; i++) { matrix[i] = [i]; } for (let j = 0; j <= str1.length; j++) { matrix[0][j] = j; } for (let i = 1; i <= str2.length; i++) { for (let j = 1; j <= str1.length; j++) { if (str2.charAt(i - 1) === str1.charAt(j - 1)) { matrix[i][j] = matrix[i - 1][j - 1]; } else { matrix[i][j] = Math.min( matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1 ); } } } return matrix[str2.length][str1.length]; } /** * Get unmatched music files */ async getUnmatchedMusicFiles(): Promise { return await MusicFile.find({ songId: { $exists: false } }); } /** * Get matched music files */ async getMatchedMusicFiles(): Promise { return await MusicFile.find({ songId: { $exists: true } }).populate('songId'); } /** * Get songs without music files */ async getSongsWithoutMusicFiles(): Promise { return await Song.find({ 's3File.hasS3File': { $ne: true } }); } /** * Get songs with music files */ async getSongsWithMusicFiles(): Promise { return await Song.find({ 's3File.hasS3File': true }).populate('s3File.musicFileId'); } }