fix(matching): strip diacritics in matching and quick match so accented letters (e.g., é) match plain equivalents

This commit is contained in:
Geert Rademakes 2025-08-08 11:06:48 +02:00
parent 07044c7594
commit 2e21c3b5f5
2 changed files with 9 additions and 3 deletions

View File

@ -224,13 +224,14 @@ class BackgroundJobService {
// Quick filename matching logic
// Decode URL-encoded sequences so %20, %27 etc. are compared correctly
const safeDecode = (s: string): string => { try { return decodeURIComponent(s); } catch { return s; } };
const normalizedS3Filename = safeDecode(filename).replace(/\.[^/.]+$/, '').toLowerCase();
const stripDiacritics = (s: string) => s.normalize('NFKD').replace(/[\u0300-\u036f]/g, '');
const normalizedS3Filename = stripDiacritics(safeDecode(filename)).replace(/\.[^/.]+$/, '').toLowerCase();
let matchedSong = null;
for (const song of allSongs) {
if (song.location) {
const rekordboxFilename = song.location.split(/[/\\]/).pop() || song.location;
const normalizedRekordboxFilename = safeDecode(rekordboxFilename).replace(/\.[^/.]+$/, '').toLowerCase();
const normalizedRekordboxFilename = stripDiacritics(safeDecode(rekordboxFilename)).replace(/\.[^/.]+$/, '').toLowerCase();
if (normalizedS3Filename === normalizedRekordboxFilename) {
matchedSong = song;

View File

@ -643,7 +643,12 @@ export class SongMatchingService {
* Clean string for comparison
*/
private cleanString(str: string): string {
return str
// Normalize unicode and strip diacritics so "é" -> "e"
const normalized = str
.normalize('NFKD')
.replace(/[\u0300-\u036f]/g, '');
return normalized
.toLowerCase()
.replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens
.replace(/\s+/g, ' ') // Normalize whitespace