fix(matching): strip diacritics in matching and quick match so accented letters (e.g., é) match plain equivalents

2025-08-08 11:06:48 +02:00 · 2025-08-08 11:06:48 +02:00 · 2e21c3b5f5
commit 2e21c3b5f5
parent 07044c7594
2 changed files with 9 additions and 3 deletions
--- a/packages/backend/src/services/backgroundJobService.ts
+++ b/packages/backend/src/services/backgroundJobService.ts
@ -224,13 +224,14 @@ class BackgroundJobService {
          // Quick filename matching logic
          // Decode URL-encoded sequences so %20, %27 etc. are compared correctly
          const safeDecode = (s: string): string => { try { return decodeURIComponent(s); } catch { return s; } };
-          const normalizedS3Filename = safeDecode(filename).replace(/\.[^/.]+$/, '').toLowerCase();
+          const stripDiacritics = (s: string) => s.normalize('NFKD').replace(/[\u0300-\u036f]/g, '');
+          const normalizedS3Filename = stripDiacritics(safeDecode(filename)).replace(/\.[^/.]+$/, '').toLowerCase();
          let matchedSong = null;
          
          for (const song of allSongs) {
            if (song.location) {
              const rekordboxFilename = song.location.split(/[/\\]/).pop() || song.location;
-              const normalizedRekordboxFilename = safeDecode(rekordboxFilename).replace(/\.[^/.]+$/, '').toLowerCase();
+              const normalizedRekordboxFilename = stripDiacritics(safeDecode(rekordboxFilename)).replace(/\.[^/.]+$/, '').toLowerCase();
              
              if (normalizedS3Filename === normalizedRekordboxFilename) {
                matchedSong = song;
--- a/packages/backend/src/services/songMatchingService.ts
+++ b/packages/backend/src/services/songMatchingService.ts
@ -643,7 +643,12 @@ export class SongMatchingService {
   * Clean string for comparison
   */
  private cleanString(str: string): string {
-    return str
+    // Normalize unicode and strip diacritics so "é" -> "e"
+    const normalized = str
+      .normalize('NFKD')
+      .replace(/[\u0300-\u036f]/g, '');
+
+    return normalized
      .toLowerCase()
      .replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens
      .replace(/\s+/g, ' ') // Normalize whitespace