724 lines
23 KiB
TypeScript
724 lines
23 KiB
TypeScript
import { Song } from '../models/Song.js';
|
|
import { MusicFile } from '../models/MusicFile.js';
|
|
import { AudioMetadataService } from './audioMetadataService.js';
|
|
|
|
export interface MatchResult {
|
|
song: any;
|
|
musicFile: any;
|
|
confidence: number;
|
|
matchType: 'exact' | 'fuzzy' | 'partial' | 'none';
|
|
matchReason: string;
|
|
}
|
|
|
|
export interface MatchOptions {
|
|
minConfidence?: number;
|
|
enableFuzzyMatching?: boolean;
|
|
enablePartialMatching?: boolean;
|
|
maxResults?: number;
|
|
}
|
|
|
|
export class SongMatchingService {
|
|
private audioMetadataService: AudioMetadataService;
|
|
|
|
constructor() {
|
|
this.audioMetadataService = new AudioMetadataService();
|
|
}
|
|
|
|
/**
|
|
* Match a single music file to songs in the library with optimized performance
|
|
*/
|
|
async matchMusicFileToSongs(
|
|
musicFile: any,
|
|
options: MatchOptions = {}
|
|
): Promise<MatchResult[]> {
|
|
const {
|
|
minConfidence = 0.3,
|
|
enableFuzzyMatching = true,
|
|
enablePartialMatching = true,
|
|
maxResults = 5
|
|
} = options;
|
|
|
|
const results: MatchResult[] = [];
|
|
let exactMatches = 0;
|
|
const maxExactMatches = 3; // Limit exact matches for performance
|
|
|
|
// Get all songs from the library
|
|
const songs = await Song.find({});
|
|
|
|
for (const song of songs) {
|
|
const matchResult = this.calculateMatch(musicFile, song, {
|
|
enableFuzzyMatching,
|
|
enablePartialMatching
|
|
});
|
|
|
|
if (matchResult.confidence >= minConfidence) {
|
|
results.push(matchResult);
|
|
|
|
// Early termination for exact matches
|
|
if (matchResult.matchType === 'exact') {
|
|
exactMatches++;
|
|
if (exactMatches >= maxExactMatches) {
|
|
console.log(`🎯 Found ${exactMatches} exact matches, stopping early for performance`);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by confidence (highest first) and limit results
|
|
return results
|
|
.sort((a, b) => b.confidence - a.confidence)
|
|
.slice(0, maxResults);
|
|
}
|
|
|
|
/**
|
|
* Match all music files to songs in the library
|
|
*/
|
|
async matchAllMusicFilesToSongs(
|
|
options: MatchOptions = {}
|
|
): Promise<{ musicFile: any; matches: MatchResult[] }[]> {
|
|
console.log('🔍 Starting song matching for all unmatched music files...');
|
|
|
|
const musicFiles = await MusicFile.find({ songId: { $exists: false } });
|
|
console.log(`📁 Found ${musicFiles.length} unmatched music files`);
|
|
|
|
const results = [];
|
|
let processedCount = 0;
|
|
|
|
for (const musicFile of musicFiles) {
|
|
processedCount++;
|
|
const progress = ((processedCount / musicFiles.length) * 100).toFixed(1);
|
|
|
|
console.log(`🎵 [${progress}%] Matching: ${musicFile.originalName}`);
|
|
|
|
const matches = await this.matchMusicFileToSongs(musicFile, options);
|
|
|
|
if (matches.length > 0) {
|
|
const bestMatch = matches[0];
|
|
console.log(`✅ Best match for ${musicFile.originalName}: ${bestMatch.song.title} (${(bestMatch.confidence * 100).toFixed(1)}% confidence)`);
|
|
} else {
|
|
console.log(`❌ No matches found for ${musicFile.originalName}`);
|
|
}
|
|
|
|
results.push({ musicFile, matches });
|
|
}
|
|
|
|
console.log(`🎉 Song matching completed for ${musicFiles.length} files`);
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Auto-match and link music files to songs with optimized performance
|
|
*/
|
|
async autoMatchAndLink(
|
|
options: MatchOptions = {}
|
|
): Promise<{ linked: number; unmatched: number }> {
|
|
console.log('🔗 Starting auto-match and link process...');
|
|
|
|
const {
|
|
minConfidence = 0.7, // Higher threshold for auto-linking
|
|
enableFuzzyMatching = true,
|
|
enablePartialMatching = false // Disable partial matching for auto-linking
|
|
} = options;
|
|
|
|
console.log(`⚙️ Auto-linking options: minConfidence=${minConfidence}, enableFuzzyMatching=${enableFuzzyMatching}, enablePartialMatching=${enablePartialMatching}`);
|
|
|
|
const musicFiles = await MusicFile.find({ songId: { $exists: false } });
|
|
console.log(`📁 Found ${musicFiles.length} unmatched music files to process`);
|
|
|
|
let linked = 0;
|
|
let unmatched = 0;
|
|
let processedCount = 0;
|
|
const batchSize = 50; // Process in batches for better performance
|
|
const updates = [];
|
|
|
|
for (const musicFile of musicFiles) {
|
|
processedCount++;
|
|
const progress = ((processedCount / musicFiles.length) * 100).toFixed(1);
|
|
|
|
console.log(`🔍 [${progress}%] Auto-matching: ${musicFile.originalName}`);
|
|
|
|
const matches = await this.matchMusicFileToSongs(musicFile, {
|
|
minConfidence,
|
|
enableFuzzyMatching,
|
|
enablePartialMatching,
|
|
maxResults: 1
|
|
});
|
|
|
|
if (matches.length > 0 && matches[0].confidence >= minConfidence) {
|
|
// Link the music file to the best match
|
|
console.log(`🔗 Linking ${musicFile.originalName} to ${matches[0].song.title} (${(matches[0].confidence * 100).toFixed(1)}% confidence)`);
|
|
|
|
// Prepare batch updates
|
|
updates.push({
|
|
musicFileId: musicFile._id,
|
|
songId: matches[0].song._id,
|
|
s3Key: musicFile.s3Key,
|
|
s3Url: musicFile.s3Url
|
|
});
|
|
|
|
linked++;
|
|
} else {
|
|
console.log(`❌ No suitable match found for ${musicFile.originalName} (best confidence: ${matches.length > 0 ? (matches[0].confidence * 100).toFixed(1) : 0}%)`);
|
|
unmatched++;
|
|
}
|
|
|
|
// Process batch updates
|
|
if (updates.length >= batchSize) {
|
|
await this.processBatchUpdates(updates);
|
|
updates.length = 0; // Clear the array
|
|
}
|
|
}
|
|
|
|
// Process remaining updates
|
|
if (updates.length > 0) {
|
|
await this.processBatchUpdates(updates);
|
|
}
|
|
|
|
console.log(`🎉 Auto-match and link completed:`);
|
|
console.log(` Linked: ${linked} files`);
|
|
console.log(` Unmatched: ${unmatched} files`);
|
|
console.log(` Success rate: ${musicFiles.length > 0 ? ((linked / musicFiles.length) * 100).toFixed(1) : 0}%`);
|
|
|
|
return { linked, unmatched };
|
|
}
|
|
|
|
/**
|
|
* Process batch updates for better performance
|
|
*/
|
|
private async processBatchUpdates(updates: any[]): Promise<void> {
|
|
console.log(`💾 Processing batch update for ${updates.length} files...`);
|
|
|
|
const bulkOps = updates.map(update => ({
|
|
updateOne: {
|
|
filter: { _id: update.musicFileId },
|
|
update: { $set: { songId: update.songId } }
|
|
}
|
|
}));
|
|
|
|
const songBulkOps = updates.map(update => ({
|
|
updateOne: {
|
|
filter: { _id: update.songId },
|
|
update: {
|
|
$set: {
|
|
's3File.musicFileId': update.musicFileId,
|
|
's3File.s3Key': update.s3Key,
|
|
's3File.s3Url': update.s3Url,
|
|
's3File.streamingUrl': `${process.env.S3_ENDPOINT}/${process.env.S3_BUCKET_NAME}/${update.s3Key}`,
|
|
's3File.hasS3File': true
|
|
}
|
|
}
|
|
}
|
|
}));
|
|
|
|
// Execute bulk operations
|
|
await Promise.all([
|
|
MusicFile.bulkWrite(bulkOps),
|
|
Song.bulkWrite(songBulkOps)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Link a music file to a song (preserves original location)
|
|
*/
|
|
async linkMusicFileToSong(musicFile: any, song: any): Promise<void> {
|
|
// Update the song with S3 file information
|
|
song.s3File = {
|
|
musicFileId: musicFile._id,
|
|
s3Key: musicFile.s3Key,
|
|
s3Url: musicFile.s3Url,
|
|
streamingUrl: `${process.env.S3_ENDPOINT}/${process.env.S3_BUCKET_NAME}/${musicFile.s3Key}`,
|
|
hasS3File: true
|
|
};
|
|
|
|
await song.save();
|
|
|
|
// Also update the music file to reference the song
|
|
musicFile.songId = song._id;
|
|
await musicFile.save();
|
|
}
|
|
|
|
/**
|
|
* Unlink a music file from a song
|
|
*/
|
|
async unlinkMusicFileFromSong(song: any): Promise<void> {
|
|
// Remove S3 file information from song
|
|
song.s3File = {
|
|
musicFileId: null,
|
|
s3Key: null,
|
|
s3Url: null,
|
|
streamingUrl: null,
|
|
hasS3File: false
|
|
};
|
|
|
|
await song.save();
|
|
|
|
// Remove song reference from music file
|
|
if (song.s3File?.musicFileId) {
|
|
const musicFile = await MusicFile.findById(song.s3File.musicFileId);
|
|
if (musicFile) {
|
|
musicFile.songId = undefined;
|
|
await musicFile.save();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculate match confidence between a music file and a song
|
|
*/
|
|
private calculateMatch(
|
|
musicFile: any,
|
|
song: any,
|
|
options: { enableFuzzyMatching: boolean; enablePartialMatching: boolean }
|
|
): MatchResult {
|
|
const scores: { score: number; reason: string }[] = [];
|
|
|
|
// 1. Exact filename match (highest priority) - if this matches, it's likely a 1:1 match
|
|
const filenameScore = this.matchFilename(musicFile.originalName, song);
|
|
if (filenameScore.score >= 0.95) {
|
|
// If we have a very high filename match, return immediately
|
|
return {
|
|
song,
|
|
musicFile,
|
|
confidence: filenameScore.score,
|
|
matchType: 'exact',
|
|
matchReason: filenameScore.reason
|
|
};
|
|
}
|
|
if (filenameScore.score > 0) {
|
|
scores.push(filenameScore);
|
|
}
|
|
|
|
// 2. Original location match (high priority for Rekordbox files)
|
|
if (song.location) {
|
|
const locationScore = this.matchLocation(musicFile.originalName, song.location);
|
|
if (locationScore.score >= 0.9) {
|
|
// If we have a very high location match, return immediately
|
|
return {
|
|
song,
|
|
musicFile,
|
|
confidence: locationScore.score,
|
|
matchType: 'exact',
|
|
matchReason: locationScore.reason
|
|
};
|
|
}
|
|
if (locationScore.score > 0) {
|
|
scores.push(locationScore);
|
|
}
|
|
}
|
|
|
|
// 3. Title match (only if filename didn't match well)
|
|
if (filenameScore.score < 0.8) {
|
|
const titleScore = this.matchTitle(musicFile.title, song.title);
|
|
if (titleScore.score > 0) {
|
|
scores.push(titleScore);
|
|
}
|
|
}
|
|
|
|
// 4. Artist match (only if filename didn't match well)
|
|
if (filenameScore.score < 0.8) {
|
|
const artistScore = this.matchArtist(musicFile.artist, song.artist);
|
|
if (artistScore.score > 0) {
|
|
scores.push(artistScore);
|
|
}
|
|
}
|
|
|
|
// 5. Album match (lower priority)
|
|
const albumScore = this.matchAlbum(musicFile.album, song.album);
|
|
if (albumScore.score > 0) {
|
|
scores.push(albumScore);
|
|
}
|
|
|
|
// 6. Duration match (if available, as a tiebreaker)
|
|
if (musicFile.duration && song.totalTime) {
|
|
const durationScore = this.matchDuration(musicFile.duration, song.totalTime);
|
|
if (durationScore.score > 0) {
|
|
scores.push(durationScore);
|
|
}
|
|
}
|
|
|
|
// Calculate weighted average score with filename bias
|
|
let totalScore = 0;
|
|
let totalWeight = 0;
|
|
|
|
for (const score of scores) {
|
|
let weight = 1;
|
|
|
|
// Give higher weight to filename and location matches
|
|
if (score.reason.includes('filename') || score.reason.includes('location')) {
|
|
weight = 3;
|
|
} else if (score.reason.includes('title')) {
|
|
weight = 2;
|
|
} else if (score.reason.includes('artist')) {
|
|
weight = 1.5;
|
|
}
|
|
|
|
totalScore += score.score * weight;
|
|
totalWeight += weight;
|
|
}
|
|
|
|
const averageScore = totalWeight > 0 ? totalScore / totalWeight : 0;
|
|
|
|
// Determine match type
|
|
let matchType: 'exact' | 'fuzzy' | 'partial' | 'none' = 'none';
|
|
let matchReason = 'No match found';
|
|
|
|
if (averageScore >= 0.9) {
|
|
matchType = 'exact';
|
|
matchReason = 'Exact match found';
|
|
} else if (averageScore >= 0.7) {
|
|
matchType = 'fuzzy';
|
|
matchReason = 'High confidence fuzzy match';
|
|
} else if (averageScore >= 0.5) {
|
|
matchType = 'partial';
|
|
matchReason = 'Partial match';
|
|
}
|
|
|
|
return {
|
|
song,
|
|
musicFile,
|
|
confidence: averageScore,
|
|
matchType,
|
|
matchReason
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Match filename to song with comprehensive pattern matching
|
|
*/
|
|
private matchFilename(filename: string, song: any): { score: number; reason: string } {
|
|
if (!filename || !song.title) return { score: 0, reason: '' };
|
|
|
|
const cleanFilename = this.cleanString(filename.replace(/\.[^/.]+$/, '')); // Remove extension
|
|
const cleanTitle = this.cleanString(song.title);
|
|
const cleanArtist = song.artist ? this.cleanString(song.artist) : '';
|
|
|
|
// 1. Exact filename match (highest confidence)
|
|
if (cleanFilename === cleanTitle) {
|
|
return { score: 1.0, reason: 'Exact filename match' };
|
|
}
|
|
|
|
// 2. Artist - Title pattern matches (very common in music files)
|
|
if (cleanArtist) {
|
|
const patterns = [
|
|
`${cleanArtist} - ${cleanTitle}`,
|
|
`${cleanTitle} - ${cleanArtist}`,
|
|
`${cleanArtist} feat. ${cleanTitle}`,
|
|
`${cleanTitle} feat. ${cleanArtist}`,
|
|
`${cleanArtist} ft. ${cleanTitle}`,
|
|
`${cleanTitle} ft. ${cleanArtist}`,
|
|
`${cleanArtist} featuring ${cleanTitle}`,
|
|
`${cleanTitle} featuring ${cleanArtist}`,
|
|
`${cleanArtist} & ${cleanTitle}`,
|
|
`${cleanTitle} & ${cleanArtist}`,
|
|
`${cleanArtist} vs ${cleanTitle}`,
|
|
`${cleanTitle} vs ${cleanArtist}`,
|
|
`${cleanArtist} x ${cleanTitle}`,
|
|
`${cleanTitle} x ${cleanArtist}`
|
|
];
|
|
|
|
for (const pattern of patterns) {
|
|
if (cleanFilename === pattern) {
|
|
return { score: 1.0, reason: 'Exact Artist-Title pattern match' };
|
|
}
|
|
}
|
|
|
|
// Partial pattern matches
|
|
for (const pattern of patterns) {
|
|
if (cleanFilename.includes(pattern) || pattern.includes(cleanFilename)) {
|
|
return { score: 0.95, reason: 'Partial Artist-Title pattern match' };
|
|
}
|
|
}
|
|
}
|
|
|
|
// 3. Filename contains title (common when filenames have extra info)
|
|
if (cleanFilename.includes(cleanTitle) || cleanTitle.includes(cleanFilename)) {
|
|
return { score: 0.9, reason: 'Filename contains title' };
|
|
}
|
|
|
|
// 4. Handle common filename variations
|
|
const filenameVariations = [
|
|
cleanFilename,
|
|
cleanFilename.replace(/\([^)]*\)/g, '').trim(), // Remove parentheses content
|
|
cleanFilename.replace(/\[[^\]]*\]/g, '').trim(), // Remove bracket content
|
|
cleanFilename.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(), // Remove common suffixes
|
|
cleanFilename.replace(/\s+/g, ' ').trim() // Normalize whitespace
|
|
];
|
|
|
|
for (const variation of filenameVariations) {
|
|
if (variation === cleanTitle) {
|
|
return { score: 0.95, reason: 'Filename variation matches title' };
|
|
}
|
|
if (variation.includes(cleanTitle) || cleanTitle.includes(variation)) {
|
|
return { score: 0.85, reason: 'Filename variation contains title' };
|
|
}
|
|
}
|
|
|
|
// 5. Handle title variations
|
|
const titleVariations = [
|
|
cleanTitle,
|
|
cleanTitle.replace(/\([^)]*\)/g, '').trim(),
|
|
cleanTitle.replace(/\[[^\]]*\]/g, '').trim(),
|
|
cleanTitle.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(),
|
|
cleanTitle.replace(/\s+/g, ' ').trim()
|
|
];
|
|
|
|
for (const titleVar of titleVariations) {
|
|
if (cleanFilename === titleVar) {
|
|
return { score: 0.95, reason: 'Filename matches title variation' };
|
|
}
|
|
if (cleanFilename.includes(titleVar) || titleVar.includes(cleanFilename)) {
|
|
return { score: 0.85, reason: 'Filename contains title variation' };
|
|
}
|
|
}
|
|
|
|
// 6. Fuzzy match for similar filenames
|
|
const similarity = this.calculateSimilarity(cleanFilename, cleanTitle);
|
|
if (similarity > 0.8) {
|
|
return { score: similarity * 0.8, reason: 'Fuzzy filename match' };
|
|
}
|
|
|
|
return { score: 0, reason: '' };
|
|
}
|
|
|
|
/**
|
|
* Match original location to filename with comprehensive path handling
|
|
*/
|
|
private matchLocation(filename: string, location: string): { score: number; reason: string } {
|
|
if (!filename || !location) return { score: 0, reason: '' };
|
|
|
|
// Decode URL-encoded sequences so Rekordbox paths with %20 etc. match S3 keys correctly
|
|
const safeDecode = (s: string): string => { try { return decodeURIComponent(s); } catch { return s; } };
|
|
const cleanFilename = this.cleanString(safeDecode(filename));
|
|
const cleanLocation = this.cleanString(safeDecode(location));
|
|
|
|
// Extract filename from location path (handle different path separators)
|
|
const pathParts = cleanLocation.split(/[\/\\]/);
|
|
const locationFilename = pathParts[pathParts.length - 1] || cleanLocation;
|
|
const locationFilenameNoExt = locationFilename.replace(/\.[^/.]+$/, '');
|
|
const filenameNoExt = cleanFilename.replace(/\.[^/.]+$/, '');
|
|
|
|
// 1. Exact filename match (highest confidence)
|
|
if (filenameNoExt === locationFilenameNoExt) {
|
|
return { score: 1.0, reason: 'Exact location filename match' };
|
|
}
|
|
|
|
// 2. Filename contains location filename or vice versa
|
|
if (filenameNoExt.includes(locationFilenameNoExt) || locationFilenameNoExt.includes(filenameNoExt)) {
|
|
return { score: 0.95, reason: 'Location filename contains match' };
|
|
}
|
|
|
|
// 3. Handle common filename variations in location
|
|
const locationVariations = [
|
|
locationFilenameNoExt,
|
|
locationFilenameNoExt.replace(/\([^)]*\)/g, '').trim(),
|
|
locationFilenameNoExt.replace(/\[[^\]]*\]/g, '').trim(),
|
|
locationFilenameNoExt.replace(/remix|mix|edit|vip|extended|radio|clean|dirty/gi, '').trim(),
|
|
locationFilenameNoExt.replace(/\s+/g, ' ').trim()
|
|
];
|
|
|
|
for (const variation of locationVariations) {
|
|
if (filenameNoExt === variation) {
|
|
return { score: 0.95, reason: 'Filename matches location variation' };
|
|
}
|
|
if (filenameNoExt.includes(variation) || variation.includes(filenameNoExt)) {
|
|
return { score: 0.9, reason: 'Filename contains location variation' };
|
|
}
|
|
}
|
|
|
|
// 4. Check if any part of the path contains the filename
|
|
for (const pathPart of pathParts) {
|
|
const cleanPathPart = pathPart.replace(/\.[^/.]+$/, ''); // Remove extension
|
|
if (cleanPathPart && (filenameNoExt.includes(cleanPathPart) || cleanPathPart.includes(filenameNoExt))) {
|
|
return { score: 0.8, reason: 'Path part contains filename' };
|
|
}
|
|
}
|
|
|
|
// 5. Fuzzy match for similar filenames
|
|
const similarity = this.calculateSimilarity(filenameNoExt, locationFilenameNoExt);
|
|
if (similarity > 0.8) {
|
|
return { score: similarity * 0.7, reason: 'Fuzzy location filename match' };
|
|
}
|
|
|
|
return { score: 0, reason: '' };
|
|
}
|
|
|
|
/**
|
|
* Match title
|
|
*/
|
|
private matchTitle(fileTitle: string, songTitle: string): { score: number; reason: string } {
|
|
if (!fileTitle || !songTitle) return { score: 0, reason: '' };
|
|
|
|
const cleanFileTitle = this.cleanString(fileTitle);
|
|
const cleanSongTitle = this.cleanString(songTitle);
|
|
|
|
// Exact match
|
|
if (cleanFileTitle === cleanSongTitle) {
|
|
return { score: 1.0, reason: 'Exact title match' };
|
|
}
|
|
|
|
// Contains match
|
|
if (cleanFileTitle.includes(cleanSongTitle) || cleanSongTitle.includes(cleanFileTitle)) {
|
|
return { score: 0.7, reason: 'Title contains match' };
|
|
}
|
|
|
|
// Fuzzy match (simple similarity)
|
|
const similarity = this.calculateSimilarity(cleanFileTitle, cleanSongTitle);
|
|
if (similarity > 0.8) {
|
|
return { score: similarity * 0.8, reason: 'Fuzzy title match' };
|
|
}
|
|
|
|
return { score: 0, reason: '' };
|
|
}
|
|
|
|
/**
|
|
* Match artist
|
|
*/
|
|
private matchArtist(fileArtist: string, songArtist: string): { score: number; reason: string } {
|
|
if (!fileArtist || !songArtist) return { score: 0, reason: '' };
|
|
|
|
const cleanFileArtist = this.cleanString(fileArtist);
|
|
const cleanSongArtist = this.cleanString(songArtist);
|
|
|
|
// Exact match
|
|
if (cleanFileArtist === cleanSongArtist) {
|
|
return { score: 0.9, reason: 'Exact artist match' };
|
|
}
|
|
|
|
// Contains match
|
|
if (cleanFileArtist.includes(cleanSongArtist) || cleanSongArtist.includes(cleanFileArtist)) {
|
|
return { score: 0.6, reason: 'Artist contains match' };
|
|
}
|
|
|
|
// Fuzzy match
|
|
const similarity = this.calculateSimilarity(cleanFileArtist, cleanSongArtist);
|
|
if (similarity > 0.8) {
|
|
return { score: similarity * 0.6, reason: 'Fuzzy artist match' };
|
|
}
|
|
|
|
return { score: 0, reason: '' };
|
|
}
|
|
|
|
/**
|
|
* Match album
|
|
*/
|
|
private matchAlbum(fileAlbum: string, songAlbum: string): { score: number; reason: string } {
|
|
if (!fileAlbum || !songAlbum) return { score: 0, reason: '' };
|
|
|
|
const cleanFileAlbum = this.cleanString(fileAlbum);
|
|
const cleanSongAlbum = this.cleanString(songAlbum);
|
|
|
|
// Exact match
|
|
if (cleanFileAlbum === cleanSongAlbum) {
|
|
return { score: 0.8, reason: 'Exact album match' };
|
|
}
|
|
|
|
// Contains match
|
|
if (cleanFileAlbum.includes(cleanSongAlbum) || cleanSongAlbum.includes(cleanFileAlbum)) {
|
|
return { score: 0.5, reason: 'Album contains match' };
|
|
}
|
|
|
|
return { score: 0, reason: '' };
|
|
}
|
|
|
|
/**
|
|
* Match duration
|
|
*/
|
|
private matchDuration(fileDuration: number, songDuration: string): { score: number; reason: string } {
|
|
if (!fileDuration || !songDuration) return { score: 0, reason: '' };
|
|
|
|
const songDurationMs = parseInt(songDuration) * 1000; // Convert to milliseconds
|
|
const difference = Math.abs(fileDuration - songDurationMs);
|
|
const tolerance = 2000; // 2 second tolerance
|
|
|
|
if (difference <= tolerance) {
|
|
const score = 1 - (difference / tolerance);
|
|
return { score: score * 0.6, reason: 'Duration match' };
|
|
}
|
|
|
|
return { score: 0, reason: '' };
|
|
}
|
|
|
|
/**
|
|
* Clean string for comparison
|
|
*/
|
|
private cleanString(str: string): string {
|
|
return str
|
|
.toLowerCase()
|
|
.replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens
|
|
.replace(/\s+/g, ' ') // Normalize whitespace
|
|
.trim();
|
|
}
|
|
|
|
/**
|
|
* Calculate simple string similarity (0-1)
|
|
*/
|
|
private calculateSimilarity(str1: string, str2: string): number {
|
|
const longer = str1.length > str2.length ? str1 : str2;
|
|
const shorter = str1.length > str2.length ? str2 : str1;
|
|
|
|
if (longer.length === 0) return 1.0;
|
|
|
|
const editDistance = this.levenshteinDistance(longer, shorter);
|
|
return (longer.length - editDistance) / longer.length;
|
|
}
|
|
|
|
/**
|
|
* Calculate Levenshtein distance
|
|
*/
|
|
private levenshteinDistance(str1: string, str2: string): number {
|
|
const matrix = [];
|
|
|
|
for (let i = 0; i <= str2.length; i++) {
|
|
matrix[i] = [i];
|
|
}
|
|
|
|
for (let j = 0; j <= str1.length; j++) {
|
|
matrix[0][j] = j;
|
|
}
|
|
|
|
for (let i = 1; i <= str2.length; i++) {
|
|
for (let j = 1; j <= str1.length; j++) {
|
|
if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
|
|
matrix[i][j] = matrix[i - 1][j - 1];
|
|
} else {
|
|
matrix[i][j] = Math.min(
|
|
matrix[i - 1][j - 1] + 1,
|
|
matrix[i][j - 1] + 1,
|
|
matrix[i - 1][j] + 1
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
return matrix[str2.length][str1.length];
|
|
}
|
|
|
|
/**
|
|
* Get unmatched music files
|
|
*/
|
|
async getUnmatchedMusicFiles(): Promise<any[]> {
|
|
return await MusicFile.find({ songId: { $exists: false } });
|
|
}
|
|
|
|
/**
|
|
* Get matched music files
|
|
*/
|
|
async getMatchedMusicFiles(): Promise<any[]> {
|
|
return await MusicFile.find({ songId: { $exists: true } }).populate('songId');
|
|
}
|
|
|
|
/**
|
|
* Get songs without music files
|
|
*/
|
|
async getSongsWithoutMusicFiles(): Promise<any[]> {
|
|
return await Song.find({ 's3File.hasS3File': { $ne: true } });
|
|
}
|
|
|
|
/**
|
|
* Get songs with music files
|
|
*/
|
|
async getSongsWithMusicFiles(): Promise<any[]> {
|
|
return await Song.find({ 's3File.hasS3File': true }).populate('s3File.musicFileId');
|
|
}
|
|
}
|