rekordbox-viewer/packages/backend/src/services/songMatchingService.ts

724 lines
23 KiB
TypeScript

import { Song } from '../models/Song.js';
import { MusicFile } from '../models/MusicFile.js';
import { AudioMetadataService } from './audioMetadataService.js';
export interface MatchResult {
song: any;
musicFile: any;
confidence: number;
matchType: 'exact' | 'fuzzy' | 'partial' | 'none';
matchReason: string;
}
export interface MatchOptions {
minConfidence?: number;
enableFuzzyMatching?: boolean;
enablePartialMatching?: boolean;
maxResults?: number;
}
export class SongMatchingService {
private audioMetadataService: AudioMetadataService;
constructor() {
this.audioMetadataService = new AudioMetadataService();
}
/**
* Match a single music file to songs in the library with optimized performance
*/
async matchMusicFileToSongs(
musicFile: any,
options: MatchOptions = {}
): Promise<MatchResult[]> {
const {
minConfidence = 0.3,
enableFuzzyMatching = true,
enablePartialMatching = true,
maxResults = 5
} = options;
const results: MatchResult[] = [];
let exactMatches = 0;
const maxExactMatches = 3; // Limit exact matches for performance
// Get all songs from the library
const songs = await Song.find({});
for (const song of songs) {
const matchResult = this.calculateMatch(musicFile, song, {
enableFuzzyMatching,
enablePartialMatching
});
if (matchResult.confidence >= minConfidence) {
results.push(matchResult);
// Early termination for exact matches
if (matchResult.matchType === 'exact') {
exactMatches++;
if (exactMatches >= maxExactMatches) {
console.log(`🎯 Found ${exactMatches} exact matches, stopping early for performance`);
break;
}
}
}
}
// Sort by confidence (highest first) and limit results
return results
.sort((a, b) => b.confidence - a.confidence)
.slice(0, maxResults);
}
/**
* Match all music files to songs in the library
*/
async matchAllMusicFilesToSongs(
options: MatchOptions = {}
): Promise<{ musicFile: any; matches: MatchResult[] }[]> {
console.log('🔍 Starting song matching for all unmatched music files...');
const musicFiles = await MusicFile.find({ songId: { $exists: false } });
console.log(`📁 Found ${musicFiles.length} unmatched music files`);
const results = [];
let processedCount = 0;
for (const musicFile of musicFiles) {
processedCount++;
const progress = ((processedCount / musicFiles.length) * 100).toFixed(1);
console.log(`🎵 [${progress}%] Matching: ${musicFile.originalName}`);
const matches = await this.matchMusicFileToSongs(musicFile, options);
if (matches.length > 0) {
const bestMatch = matches[0];
console.log(`✅ Best match for ${musicFile.originalName}: ${bestMatch.song.title} (${(bestMatch.confidence * 100).toFixed(1)}% confidence)`);
} else {
console.log(`❌ No matches found for ${musicFile.originalName}`);
}
results.push({ musicFile, matches });
}
console.log(`🎉 Song matching completed for ${musicFiles.length} files`);
return results;
}
/**
* Auto-match and link music files to songs with optimized performance
*/
async autoMatchAndLink(
options: MatchOptions = {}
): Promise<{ linked: number; unmatched: number }> {
console.log('🔗 Starting auto-match and link process...');
const {
minConfidence = 0.7, // Higher threshold for auto-linking
enableFuzzyMatching = true,
enablePartialMatching = false // Disable partial matching for auto-linking
} = options;
console.log(`⚙️ Auto-linking options: minConfidence=${minConfidence}, enableFuzzyMatching=${enableFuzzyMatching}, enablePartialMatching=${enablePartialMatching}`);
const musicFiles = await MusicFile.find({ songId: { $exists: false } });
console.log(`📁 Found ${musicFiles.length} unmatched music files to process`);
let linked = 0;
let unmatched = 0;
let processedCount = 0;
const batchSize = 50; // Process in batches for better performance
const updates = [];
for (const musicFile of musicFiles) {
processedCount++;
const progress = ((processedCount / musicFiles.length) * 100).toFixed(1);
console.log(`🔍 [${progress}%] Auto-matching: ${musicFile.originalName}`);
const matches = await this.matchMusicFileToSongs(musicFile, {
minConfidence,
enableFuzzyMatching,
enablePartialMatching,
maxResults: 1
});
if (matches.length > 0 && matches[0].confidence >= minConfidence) {
// Link the music file to the best match
console.log(`🔗 Linking ${musicFile.originalName} to ${matches[0].song.title} (${(matches[0].confidence * 100).toFixed(1)}% confidence)`);
// Prepare batch updates
updates.push({
musicFileId: musicFile._id,
songId: matches[0].song._id,
s3Key: musicFile.s3Key,
s3Url: musicFile.s3Url
});
linked++;
} else {
console.log(`❌ No suitable match found for ${musicFile.originalName} (best confidence: ${matches.length > 0 ? (matches[0].confidence * 100).toFixed(1) : 0}%)`);
unmatched++;
}
// Process batch updates
if (updates.length >= batchSize) {
await this.processBatchUpdates(updates);
updates.length = 0; // Clear the array
}
}
// Process remaining updates
if (updates.length > 0) {
await this.processBatchUpdates(updates);
}
console.log(`🎉 Auto-match and link completed:`);
console.log(` Linked: ${linked} files`);
console.log(` Unmatched: ${unmatched} files`);
console.log(` Success rate: ${musicFiles.length > 0 ? ((linked / musicFiles.length) * 100).toFixed(1) : 0}%`);
return { linked, unmatched };
}
/**
* Process batch updates for better performance
*/
private async processBatchUpdates(updates: any[]): Promise<void> {
console.log(`💾 Processing batch update for ${updates.length} files...`);
const bulkOps = updates.map(update => ({
updateOne: {
filter: { _id: update.musicFileId },
update: { $set: { songId: update.songId } }
}
}));
const songBulkOps = updates.map(update => ({
updateOne: {
filter: { _id: update.songId },
update: {
$set: {
's3File.musicFileId': update.musicFileId,
's3File.s3Key': update.s3Key,
's3File.s3Url': update.s3Url,
's3File.streamingUrl': `${process.env.S3_ENDPOINT}/${process.env.S3_BUCKET_NAME}/${update.s3Key}`,
's3File.hasS3File': true
}
}
}
}));
// Execute bulk operations
await Promise.all([
MusicFile.bulkWrite(bulkOps),
Song.bulkWrite(songBulkOps)
]);
}
/**
* Link a music file to a song (preserves original location)
*/
async linkMusicFileToSong(musicFile: any, song: any): Promise<void> {
// Update the song with S3 file information
song.s3File = {
musicFileId: musicFile._id,
s3Key: musicFile.s3Key,
s3Url: musicFile.s3Url,
streamingUrl: `${process.env.S3_ENDPOINT}/${process.env.S3_BUCKET_NAME}/${musicFile.s3Key}`,
hasS3File: true
};
await song.save();
// Also update the music file to reference the song
musicFile.songId = song._id;
await musicFile.save();
}
/**
* Unlink a music file from a song
*/
async unlinkMusicFileFromSong(song: any): Promise<void> {
// Remove S3 file information from song
song.s3File = {
musicFileId: null,
s3Key: null,
s3Url: null,
streamingUrl: null,
hasS3File: false
};
await song.save();
// Remove song reference from music file
if (song.s3File?.musicFileId) {
const musicFile = await MusicFile.findById(song.s3File.musicFileId);
if (musicFile) {
musicFile.songId = undefined;
await musicFile.save();
}
}
}
/**
* Calculate match confidence between a music file and a song
*/
private calculateMatch(
musicFile: any,
song: any,
options: { enableFuzzyMatching: boolean; enablePartialMatching: boolean }
): MatchResult {
const scores: { score: number; reason: string }[] = [];
// 1. Exact filename match (highest priority) - if this matches, it's likely a 1:1 match
const filenameScore = this.matchFilename(musicFile.originalName, song);
if (filenameScore.score >= 0.95) {
// If we have a very high filename match, return immediately
return {
song,
musicFile,
confidence: filenameScore.score,
matchType: 'exact',
matchReason: filenameScore.reason
};
}
if (filenameScore.score > 0) {
scores.push(filenameScore);
}
// 2. Original location match (high priority for Rekordbox files)
if (song.location) {
const locationScore = this.matchLocation(musicFile.originalName, song.location);
if (locationScore.score >= 0.9) {
// If we have a very high location match, return immediately
return {
song,
musicFile,
confidence: locationScore.score,
matchType: 'exact',
matchReason: locationScore.reason
};
}
if (locationScore.score > 0) {
scores.push(locationScore);
}
}
// 3. Title match (only if filename didn't match well)
if (filenameScore.score < 0.8) {
const titleScore = this.matchTitle(musicFile.title, song.title);
if (titleScore.score > 0) {
scores.push(titleScore);
}
}
// 4. Artist match (only if filename didn't match well)
if (filenameScore.score < 0.8) {
const artistScore = this.matchArtist(musicFile.artist, song.artist);
if (artistScore.score > 0) {
scores.push(artistScore);
}
}
// 5. Album match (lower priority)
const albumScore = this.matchAlbum(musicFile.album, song.album);
if (albumScore.score > 0) {
scores.push(albumScore);
}
// 6. Duration match (if available, as a tiebreaker)
if (musicFile.duration && song.totalTime) {
const durationScore = this.matchDuration(musicFile.duration, song.totalTime);
if (durationScore.score > 0) {
scores.push(durationScore);
}
}
// Calculate weighted average score with filename bias
let totalScore = 0;
let totalWeight = 0;
for (const score of scores) {
let weight = 1;
// Give higher weight to filename and location matches
if (score.reason.includes('filename') || score.reason.includes('location')) {
weight = 3;
} else if (score.reason.includes('title')) {
weight = 2;
} else if (score.reason.includes('artist')) {
weight = 1.5;
}
totalScore += score.score * weight;
totalWeight += weight;
}
const averageScore = totalWeight > 0 ? totalScore / totalWeight : 0;
// Determine match type
let matchType: 'exact' | 'fuzzy' | 'partial' | 'none' = 'none';
let matchReason = 'No match found';
if (averageScore >= 0.9) {
matchType = 'exact';
matchReason = 'Exact match found';
} else if (averageScore >= 0.7) {
matchType = 'fuzzy';
matchReason = 'High confidence fuzzy match';
} else if (averageScore >= 0.5) {
matchType = 'partial';
matchReason = 'Partial match';
}
return {
song,
musicFile,
confidence: averageScore,
matchType,
matchReason
};
}
/**
* Match filename to song with comprehensive pattern matching
*/
private matchFilename(filename: string, song: any): { score: number; reason: string } {
if (!filename || !song.title) return { score: 0, reason: '' };
const cleanFilename = this.cleanString(filename.replace(/\.[^/.]+$/, '')); // Remove extension
const cleanTitle = this.cleanString(song.title);
const cleanArtist = song.artist ? this.cleanString(song.artist) : '';
// 1. Exact filename match (highest confidence)
if (cleanFilename === cleanTitle) {
return { score: 1.0, reason: 'Exact filename match' };
}
// 2. Artist - Title pattern matches (very common in music files)
if (cleanArtist) {
const patterns = [
`${cleanArtist} - ${cleanTitle}`,
`${cleanTitle} - ${cleanArtist}`,
`${cleanArtist} feat. ${cleanTitle}`,
`${cleanTitle} feat. ${cleanArtist}`,
`${cleanArtist} ft. ${cleanTitle}`,
`${cleanTitle} ft. ${cleanArtist}`,
`${cleanArtist} featuring ${cleanTitle}`,
`${cleanTitle} featuring ${cleanArtist}`,
`${cleanArtist} & ${cleanTitle}`,
`${cleanTitle} & ${cleanArtist}`,
`${cleanArtist} vs ${cleanTitle}`,
`${cleanTitle} vs ${cleanArtist}`,
`${cleanArtist} x ${cleanTitle}`,
`${cleanTitle} x ${cleanArtist}`
];
for (const pattern of patterns) {
if (cleanFilename === pattern) {
return { score: 1.0, reason: 'Exact Artist-Title pattern match' };
}
}
// Partial pattern matches
for (const pattern of patterns) {
if (cleanFilename.includes(pattern) || pattern.includes(cleanFilename)) {
return { score: 0.95, reason: 'Partial Artist-Title pattern match' };
}
}
}
// 3. Filename contains title (common when filenames have extra info)
if (cleanFilename.includes(cleanTitle) || cleanTitle.includes(cleanFilename)) {
return { score: 0.9, reason: 'Filename contains title' };
}
// 4. Handle common filename variations
const filenameVariations = [
cleanFilename,
cleanFilename.replace(/\([^)]*\)/g, '').trim(), // Remove parentheses content
cleanFilename.replace(/\[[^\]]*\]/g, '').trim(), // Remove bracket content
cleanFilename.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(), // Remove common suffixes
cleanFilename.replace(/\s+/g, ' ').trim() // Normalize whitespace
];
for (const variation of filenameVariations) {
if (variation === cleanTitle) {
return { score: 0.95, reason: 'Filename variation matches title' };
}
if (variation.includes(cleanTitle) || cleanTitle.includes(variation)) {
return { score: 0.85, reason: 'Filename variation contains title' };
}
}
// 5. Handle title variations
const titleVariations = [
cleanTitle,
cleanTitle.replace(/\([^)]*\)/g, '').trim(),
cleanTitle.replace(/\[[^\]]*\]/g, '').trim(),
cleanTitle.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(),
cleanTitle.replace(/\s+/g, ' ').trim()
];
for (const titleVar of titleVariations) {
if (cleanFilename === titleVar) {
return { score: 0.95, reason: 'Filename matches title variation' };
}
if (cleanFilename.includes(titleVar) || titleVar.includes(cleanFilename)) {
return { score: 0.85, reason: 'Filename contains title variation' };
}
}
// 6. Fuzzy match for similar filenames
const similarity = this.calculateSimilarity(cleanFilename, cleanTitle);
if (similarity > 0.8) {
return { score: similarity * 0.8, reason: 'Fuzzy filename match' };
}
return { score: 0, reason: '' };
}
/**
* Match original location to filename with comprehensive path handling
*/
private matchLocation(filename: string, location: string): { score: number; reason: string } {
if (!filename || !location) return { score: 0, reason: '' };
// Decode URL-encoded sequences so Rekordbox paths with %20 etc. match S3 keys correctly
const safeDecode = (s: string): string => { try { return decodeURIComponent(s); } catch { return s; } };
const cleanFilename = this.cleanString(safeDecode(filename));
const cleanLocation = this.cleanString(safeDecode(location));
// Extract filename from location path (handle different path separators)
const pathParts = cleanLocation.split(/[\/\\]/);
const locationFilename = pathParts[pathParts.length - 1] || cleanLocation;
const locationFilenameNoExt = locationFilename.replace(/\.[^/.]+$/, '');
const filenameNoExt = cleanFilename.replace(/\.[^/.]+$/, '');
// 1. Exact filename match (highest confidence)
if (filenameNoExt === locationFilenameNoExt) {
return { score: 1.0, reason: 'Exact location filename match' };
}
// 2. Filename contains location filename or vice versa
if (filenameNoExt.includes(locationFilenameNoExt) || locationFilenameNoExt.includes(filenameNoExt)) {
return { score: 0.95, reason: 'Location filename contains match' };
}
// 3. Handle common filename variations in location
const locationVariations = [
locationFilenameNoExt,
locationFilenameNoExt.replace(/\([^)]*\)/g, '').trim(),
locationFilenameNoExt.replace(/\[[^\]]*\]/g, '').trim(),
locationFilenameNoExt.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(),
locationFilenameNoExt.replace(/\s+/g, ' ').trim()
];
for (const variation of locationVariations) {
if (filenameNoExt === variation) {
return { score: 0.95, reason: 'Filename matches location variation' };
}
if (filenameNoExt.includes(variation) || variation.includes(filenameNoExt)) {
return { score: 0.9, reason: 'Filename contains location variation' };
}
}
// 4. Check if any part of the path contains the filename
for (const pathPart of pathParts) {
const cleanPathPart = pathPart.replace(/\.[^/.]+$/, ''); // Remove extension
if (cleanPathPart && (filenameNoExt.includes(cleanPathPart) || cleanPathPart.includes(filenameNoExt))) {
return { score: 0.8, reason: 'Path part contains filename' };
}
}
// 5. Fuzzy match for similar filenames
const similarity = this.calculateSimilarity(filenameNoExt, locationFilenameNoExt);
if (similarity > 0.8) {
return { score: similarity * 0.7, reason: 'Fuzzy location filename match' };
}
return { score: 0, reason: '' };
}
/**
* Match title
*/
private matchTitle(fileTitle: string, songTitle: string): { score: number; reason: string } {
if (!fileTitle || !songTitle) return { score: 0, reason: '' };
const cleanFileTitle = this.cleanString(fileTitle);
const cleanSongTitle = this.cleanString(songTitle);
// Exact match
if (cleanFileTitle === cleanSongTitle) {
return { score: 1.0, reason: 'Exact title match' };
}
// Contains match
if (cleanFileTitle.includes(cleanSongTitle) || cleanSongTitle.includes(cleanFileTitle)) {
return { score: 0.7, reason: 'Title contains match' };
}
// Fuzzy match (simple similarity)
const similarity = this.calculateSimilarity(cleanFileTitle, cleanSongTitle);
if (similarity > 0.8) {
return { score: similarity * 0.8, reason: 'Fuzzy title match' };
}
return { score: 0, reason: '' };
}
/**
* Match artist
*/
private matchArtist(fileArtist: string, songArtist: string): { score: number; reason: string } {
if (!fileArtist || !songArtist) return { score: 0, reason: '' };
const cleanFileArtist = this.cleanString(fileArtist);
const cleanSongArtist = this.cleanString(songArtist);
// Exact match
if (cleanFileArtist === cleanSongArtist) {
return { score: 0.9, reason: 'Exact artist match' };
}
// Contains match
if (cleanFileArtist.includes(cleanSongArtist) || cleanSongArtist.includes(cleanFileArtist)) {
return { score: 0.6, reason: 'Artist contains match' };
}
// Fuzzy match
const similarity = this.calculateSimilarity(cleanFileArtist, cleanSongArtist);
if (similarity > 0.8) {
return { score: similarity * 0.6, reason: 'Fuzzy artist match' };
}
return { score: 0, reason: '' };
}
/**
* Match album
*/
private matchAlbum(fileAlbum: string, songAlbum: string): { score: number; reason: string } {
if (!fileAlbum || !songAlbum) return { score: 0, reason: '' };
const cleanFileAlbum = this.cleanString(fileAlbum);
const cleanSongAlbum = this.cleanString(songAlbum);
// Exact match
if (cleanFileAlbum === cleanSongAlbum) {
return { score: 0.8, reason: 'Exact album match' };
}
// Contains match
if (cleanFileAlbum.includes(cleanSongAlbum) || cleanSongAlbum.includes(cleanFileAlbum)) {
return { score: 0.5, reason: 'Album contains match' };
}
return { score: 0, reason: '' };
}
/**
* Match duration
*/
private matchDuration(fileDuration: number, songDuration: string): { score: number; reason: string } {
if (!fileDuration || !songDuration) return { score: 0, reason: '' };
const songDurationMs = parseInt(songDuration) * 1000; // Convert to milliseconds
const difference = Math.abs(fileDuration - songDurationMs);
const tolerance = 2000; // 2 second tolerance
if (difference <= tolerance) {
const score = 1 - (difference / tolerance);
return { score: score * 0.6, reason: 'Duration match' };
}
return { score: 0, reason: '' };
}
/**
* Clean string for comparison
*/
private cleanString(str: string): string {
return str
.toLowerCase()
.replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens
.replace(/\s+/g, ' ') // Normalize whitespace
.trim();
}
/**
* Calculate simple string similarity (0-1)
*/
private calculateSimilarity(str1: string, str2: string): number {
const longer = str1.length > str2.length ? str1 : str2;
const shorter = str1.length > str2.length ? str2 : str1;
if (longer.length === 0) return 1.0;
const editDistance = this.levenshteinDistance(longer, shorter);
return (longer.length - editDistance) / longer.length;
}
/**
* Calculate Levenshtein distance
*/
private levenshteinDistance(str1: string, str2: string): number {
const matrix = [];
for (let i = 0; i <= str2.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= str1.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= str2.length; i++) {
for (let j = 1; j <= str1.length; j++) {
if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j] + 1
);
}
}
}
return matrix[str2.length][str1.length];
}
/**
* Get unmatched music files
*/
async getUnmatchedMusicFiles(): Promise<any[]> {
return await MusicFile.find({ songId: { $exists: false } });
}
/**
* Get matched music files
*/
async getMatchedMusicFiles(): Promise<any[]> {
return await MusicFile.find({ songId: { $exists: true } }).populate('songId');
}
/**
* Get songs without music files
*/
async getSongsWithoutMusicFiles(): Promise<any[]> {
return await Song.find({ 's3File.hasS3File': { $ne: true } });
}
/**
* Get songs with music files
*/
async getSongsWithMusicFiles(): Promise<any[]> {
return await Song.find({ 's3File.hasS3File': true }).populate('s3File.musicFileId');
}
}