From 7dc70c3bdf6a9182aef510d593e4b2a109165e77 Mon Sep 17 00:00:00 2001 From: Geert Rademakes Date: Fri, 8 Aug 2025 08:51:23 +0200 Subject: [PATCH] feat(duplicates): add backend endpoint to detect possible duplicates and new Config tab to display them\n\n- Backend: GET /api/songs/duplicates groups by normalized title+artist and lists playlists + paths\n- Frontend: DuplicatesViewer component and new tab in Configuration; fetch on open; adjustable min group size\n- API client: add getDuplicateSongs() --- packages/backend/src/routes/songs.ts | 84 +++++++++++++- .../src/components/DuplicatesViewer.tsx | 107 ++++++++++++++++++ packages/frontend/src/pages/Configuration.tsx | 15 ++- packages/frontend/src/services/api.ts | 23 ++++ 4 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 packages/frontend/src/components/DuplicatesViewer.tsx diff --git a/packages/backend/src/routes/songs.ts b/packages/backend/src/routes/songs.ts index 6eeab9c..6ea8d9f 100644 --- a/packages/backend/src/routes/songs.ts +++ b/packages/backend/src/routes/songs.ts @@ -245,4 +245,86 @@ router.post('/batch', async (req: Request, res: Response) => { } }); -export const songsRouter = router; \ No newline at end of file +export const songsRouter = router; + +// Identify possible duplicate songs by normalized title+artist +router.get('/duplicates', async (req: Request, res: Response) => { + try { + const minGroupSize = parseInt((req.query.minGroupSize as string) || '2', 10); + + // Load needed song fields + const songs = await Song.find({}, { id: 1, title: 1, artist: 1, location: 1, totalTime: 1 }).lean(); + + // Normalize helper + const normalize = (str?: string) => { + if (!str) return ''; + return String(str) + .toLowerCase() + .replace(/\s+/g, ' ') // collapse whitespace + .replace(/\([^)]*\)|\[[^\]]*\]|\{[^}]*\}/g, '') // remove bracketed qualifiers + .replace(/[^a-z0-9\s]/g, '') // remove punctuation + .trim(); + }; + + // Group songs by normalized key (title+artist) + const groupsMap: Record = {}; + for (const s of songs as any[]) { + const key = `${normalize(s.title as string)}|${normalize(s.artist as string)}`; + if (!groupsMap[key]) groupsMap[key] = []; + groupsMap[key].push({ + id: s.id, + title: s.title, + artist: s.artist, + location: s.location, + totalTime: s.totalTime, + }); + } + + // Build songId -> playlists mapping (names) + const songIdToPlaylists: Record = {}; + const playlistDocs = await Playlist.find({}).lean(); + + const collect = (node: any) => { + if (!node) return; + if (node.type === 'playlist' && Array.isArray(node.tracks)) { + for (const songId of node.tracks) { + if (!songIdToPlaylists[songId]) songIdToPlaylists[songId] = []; + if (!songIdToPlaylists[songId].includes(node.name)) { + songIdToPlaylists[songId].push(node.name); + } + } + } + if (Array.isArray(node.children)) { + for (const child of node.children) collect(child); + } + }; + for (const doc of playlistDocs) collect(doc); + + // Build duplicate groups response + const duplicateGroups = Object.entries(groupsMap) + .filter(([, items]) => items.length >= minGroupSize) + .map(([key, items]) => { + const [normTitle, normArtist] = key.split('|'); + return { + key, + normalizedTitle: normTitle, + normalizedArtist: normArtist, + count: items.length, + items: items.map((it: any) => ({ + songId: it.id, + title: it.title, + artist: it.artist, + location: it.location, + totalTime: it.totalTime, + playlists: songIdToPlaylists[it.id] || [], + })) + }; + }) + .sort((a, b) => b.count - a.count); + + res.json({ groups: duplicateGroups }); + } catch (error) { + console.error('Error finding duplicate songs:', error); + res.status(500).json({ message: 'Error finding duplicate songs', error }); + } +}); \ No newline at end of file diff --git a/packages/frontend/src/components/DuplicatesViewer.tsx b/packages/frontend/src/components/DuplicatesViewer.tsx new file mode 100644 index 0000000..a61c246 --- /dev/null +++ b/packages/frontend/src/components/DuplicatesViewer.tsx @@ -0,0 +1,107 @@ +import React, { useEffect, useState } from 'react'; +import { Box, VStack, Heading, Text, HStack, Badge, Select, Spinner, Table, Thead, Tr, Th, Tbody, Td } from '@chakra-ui/react'; +import { api } from '../services/api'; + +interface DuplicateItem { + songId: string; + title: string; + artist: string; + location?: string; + totalTime?: string; + playlists: string[]; +} + +interface DuplicateGroup { + key: string; + normalizedTitle: string; + normalizedArtist: string; + count: number; + items: DuplicateItem[]; +} + +export const DuplicatesViewer: React.FC = () => { + const [groups, setGroups] = useState([]); + const [loading, setLoading] = useState(false); + const [minGroupSize, setMinGroupSize] = useState(2); + + const loadDuplicates = async (minSize: number) => { + setLoading(true); + try { + const res = await api.getDuplicateSongs(minSize); + setGroups(res.groups || []); + } catch (e) { + setGroups([]); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + loadDuplicates(minGroupSize); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [minGroupSize]); + + return ( + + + Possible Duplicates + + Minimum group size + + + + + {loading ? ( + Scanning duplicates… + ) : groups.length === 0 ? ( + No duplicate groups found. + ) : ( + + {groups.map((group) => ( + + + + {group.count} items + {group.normalizedArtist} — {group.normalizedTitle} + + + + + + + + + + + + + {group.items.map((it) => ( + + + + + + + ))} + +
TitleArtistPlaylistsRekordbox Path
{it.title}{it.artist}{(it.playlists || []).join(', ')}{it.location || '-'}
+
+ ))} +
+ )} +
+ ); +}; + diff --git a/packages/frontend/src/pages/Configuration.tsx b/packages/frontend/src/pages/Configuration.tsx index 4aaf24e..51dea05 100644 --- a/packages/frontend/src/pages/Configuration.tsx +++ b/packages/frontend/src/pages/Configuration.tsx @@ -35,6 +35,7 @@ import { S3Configuration } from "./S3Configuration"; import { MusicUpload } from "../components/MusicUpload"; import { SongMatching } from "../components/SongMatching"; import { api } from "../services/api"; +import { DuplicatesViewer } from "../components/DuplicatesViewer"; import { useState, useEffect, useMemo } from "react"; interface MusicFile { @@ -76,7 +77,8 @@ export function Configuration() { UPLOAD: 1, MUSIC_LIBRARY: 2, MATCHING: 3, - S3_CONFIG: 4, + DUPLICATES: 4, + S3_CONFIG: 5, } as const; // Fetch S3 config (small and safe to do on mount) @@ -322,6 +324,12 @@ export function Configuration() { Song Matching + + + + Duplicates + + @@ -509,6 +517,11 @@ export function Configuration() { + {/* Duplicates Tab */} + + + + {/* S3 Configuration Tab */} diff --git a/packages/frontend/src/services/api.ts b/packages/frontend/src/services/api.ts index 663da15..c6a5968 100644 --- a/packages/frontend/src/services/api.ts +++ b/packages/frontend/src/services/api.ts @@ -142,6 +142,29 @@ class Api { const data = await response.json(); return data.jobs; } + + // Duplicates API + async getDuplicateSongs(minGroupSize: number = 2): Promise<{ + groups: Array<{ + key: string; + normalizedTitle: string; + normalizedArtist: string; + count: number; + items: Array<{ + songId: string; + title: string; + artist: string; + location?: string; + totalTime?: string; + playlists: string[]; + }>; + }>; + }> { + const params = new URLSearchParams({ minGroupSize: String(minGroupSize) }); + const response = await fetch(`${API_BASE_URL}/songs/duplicates?${params}`); + if (!response.ok) throw new Error('Failed to fetch duplicates'); + return response.json(); + } } export const api = new Api(); \ No newline at end of file