feat(duplicates): add backend endpoint to detect possible duplicates and new Config tab to display them\n\n- Backend: GET /api/songs/duplicates groups by normalized title+artist and lists playlists + paths\n- Frontend: DuplicatesViewer component and new tab in Configuration; fetch on open; adjustable min group size\n- API client: add getDuplicateSongs()

This commit is contained in:
Geert Rademakes 2025-08-08 08:51:23 +02:00
parent 83b4682b0e
commit 7dc70c3bdf
4 changed files with 227 additions and 2 deletions

View File

@ -246,3 +246,85 @@ router.post('/batch', async (req: Request, res: Response) => {
}); });
export const songsRouter = router; export const songsRouter = router;
// Identify possible duplicate songs by normalized title+artist
router.get('/duplicates', async (req: Request, res: Response) => {
try {
const minGroupSize = parseInt((req.query.minGroupSize as string) || '2', 10);
// Load needed song fields
const songs = await Song.find({}, { id: 1, title: 1, artist: 1, location: 1, totalTime: 1 }).lean();
// Normalize helper
const normalize = (str?: string) => {
if (!str) return '';
return String(str)
.toLowerCase()
.replace(/\s+/g, ' ') // collapse whitespace
.replace(/\([^)]*\)|\[[^\]]*\]|\{[^}]*\}/g, '') // remove bracketed qualifiers
.replace(/[^a-z0-9\s]/g, '') // remove punctuation
.trim();
};
// Group songs by normalized key (title+artist)
const groupsMap: Record<string, any[]> = {};
for (const s of songs as any[]) {
const key = `${normalize(s.title as string)}|${normalize(s.artist as string)}`;
if (!groupsMap[key]) groupsMap[key] = [];
groupsMap[key].push({
id: s.id,
title: s.title,
artist: s.artist,
location: s.location,
totalTime: s.totalTime,
});
}
// Build songId -> playlists mapping (names)
const songIdToPlaylists: Record<string, string[]> = {};
const playlistDocs = await Playlist.find({}).lean();
const collect = (node: any) => {
if (!node) return;
if (node.type === 'playlist' && Array.isArray(node.tracks)) {
for (const songId of node.tracks) {
if (!songIdToPlaylists[songId]) songIdToPlaylists[songId] = [];
if (!songIdToPlaylists[songId].includes(node.name)) {
songIdToPlaylists[songId].push(node.name);
}
}
}
if (Array.isArray(node.children)) {
for (const child of node.children) collect(child);
}
};
for (const doc of playlistDocs) collect(doc);
// Build duplicate groups response
const duplicateGroups = Object.entries(groupsMap)
.filter(([, items]) => items.length >= minGroupSize)
.map(([key, items]) => {
const [normTitle, normArtist] = key.split('|');
return {
key,
normalizedTitle: normTitle,
normalizedArtist: normArtist,
count: items.length,
items: items.map((it: any) => ({
songId: it.id,
title: it.title,
artist: it.artist,
location: it.location,
totalTime: it.totalTime,
playlists: songIdToPlaylists[it.id] || [],
}))
};
})
.sort((a, b) => b.count - a.count);
res.json({ groups: duplicateGroups });
} catch (error) {
console.error('Error finding duplicate songs:', error);
res.status(500).json({ message: 'Error finding duplicate songs', error });
}
});

View File

@ -0,0 +1,107 @@
import React, { useEffect, useState } from 'react';
import { Box, VStack, Heading, Text, HStack, Badge, Select, Spinner, Table, Thead, Tr, Th, Tbody, Td } from '@chakra-ui/react';
import { api } from '../services/api';
interface DuplicateItem {
songId: string;
title: string;
artist: string;
location?: string;
totalTime?: string;
playlists: string[];
}
interface DuplicateGroup {
key: string;
normalizedTitle: string;
normalizedArtist: string;
count: number;
items: DuplicateItem[];
}
export const DuplicatesViewer: React.FC = () => {
const [groups, setGroups] = useState<DuplicateGroup[]>([]);
const [loading, setLoading] = useState(false);
const [minGroupSize, setMinGroupSize] = useState(2);
const loadDuplicates = async (minSize: number) => {
setLoading(true);
try {
const res = await api.getDuplicateSongs(minSize);
setGroups(res.groups || []);
} catch (e) {
setGroups([]);
} finally {
setLoading(false);
}
};
useEffect(() => {
loadDuplicates(minGroupSize);
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [minGroupSize]);
return (
<VStack spacing={4} align="stretch">
<HStack justify="space-between">
<Heading size="md" color="white">Possible Duplicates</Heading>
<HStack>
<Text color="gray.400">Minimum group size</Text>
<Select
size="sm"
value={minGroupSize}
onChange={(e) => setMinGroupSize(parseInt(e.target.value, 10))}
bg="gray.700"
borderColor="gray.600"
color="white"
w="120px"
>
<option value={2}>2+</option>
<option value={3}>3+</option>
<option value={4}>4+</option>
</Select>
</HStack>
</HStack>
{loading ? (
<HStack><Spinner size="sm" /><Text color="gray.400">Scanning duplicates</Text></HStack>
) : groups.length === 0 ? (
<Text color="gray.500">No duplicate groups found.</Text>
) : (
<VStack spacing={4} align="stretch">
{groups.map((group) => (
<Box key={group.key} p={4} bg="gray.800" borderRadius="md" borderWidth="1px" borderColor="gray.700">
<HStack justify="space-between" mb={2}>
<HStack>
<Badge colorScheme="blue" variant="subtle">{group.count} items</Badge>
<Text color="gray.300">{group.normalizedArtist} {group.normalizedTitle}</Text>
</HStack>
</HStack>
<Table size="sm" variant="simple">
<Thead>
<Tr>
<Th color="gray.300">Title</Th>
<Th color="gray.300">Artist</Th>
<Th color="gray.300">Playlists</Th>
<Th color="gray.300">Rekordbox Path</Th>
</Tr>
</Thead>
<Tbody>
{group.items.map((it) => (
<Tr key={it.songId}>
<Td color="gray.200">{it.title}</Td>
<Td color="gray.200">{it.artist}</Td>
<Td color="gray.300">{(it.playlists || []).join(', ')}</Td>
<Td color="gray.400" maxW="500px" whiteSpace="nowrap" overflow="hidden" textOverflow="ellipsis">{it.location || '-'}</Td>
</Tr>
))}
</Tbody>
</Table>
</Box>
))}
</VStack>
)}
</VStack>
);
};

View File

@ -35,6 +35,7 @@ import { S3Configuration } from "./S3Configuration";
import { MusicUpload } from "../components/MusicUpload"; import { MusicUpload } from "../components/MusicUpload";
import { SongMatching } from "../components/SongMatching"; import { SongMatching } from "../components/SongMatching";
import { api } from "../services/api"; import { api } from "../services/api";
import { DuplicatesViewer } from "../components/DuplicatesViewer";
import { useState, useEffect, useMemo } from "react"; import { useState, useEffect, useMemo } from "react";
interface MusicFile { interface MusicFile {
@ -76,7 +77,8 @@ export function Configuration() {
UPLOAD: 1, UPLOAD: 1,
MUSIC_LIBRARY: 2, MUSIC_LIBRARY: 2,
MATCHING: 3, MATCHING: 3,
S3_CONFIG: 4, DUPLICATES: 4,
S3_CONFIG: 5,
} as const; } as const;
// Fetch S3 config (small and safe to do on mount) // Fetch S3 config (small and safe to do on mount)
@ -322,6 +324,12 @@ export function Configuration() {
<Text>Song Matching</Text> <Text>Song Matching</Text>
</HStack> </HStack>
</Tab> </Tab>
<Tab color="gray.300" _selected={{ bg: "gray.700", color: "white", borderColor: "gray.600" }}>
<HStack spacing={2}>
<Icon as={FiDatabase} />
<Text>Duplicates</Text>
</HStack>
</Tab>
<Tab color="gray.300" _selected={{ bg: "gray.700", color: "white", borderColor: "gray.600" }}> <Tab color="gray.300" _selected={{ bg: "gray.700", color: "white", borderColor: "gray.600" }}>
<HStack spacing={2}> <HStack spacing={2}>
<Icon as={FiSettings} /> <Icon as={FiSettings} />
@ -509,6 +517,11 @@ export function Configuration() {
<SongMatching /> <SongMatching />
</TabPanel> </TabPanel>
{/* Duplicates Tab */}
<TabPanel bg="gray.800" p={6} borderRadius="lg" borderWidth="1px" borderColor="gray.700">
<DuplicatesViewer />
</TabPanel>
{/* S3 Configuration Tab */} {/* S3 Configuration Tab */}
<TabPanel bg="gray.800" p={0}> <TabPanel bg="gray.800" p={0}>
<Box p={6}> <Box p={6}>

View File

@ -142,6 +142,29 @@ class Api {
const data = await response.json(); const data = await response.json();
return data.jobs; return data.jobs;
} }
// Duplicates API
async getDuplicateSongs(minGroupSize: number = 2): Promise<{
groups: Array<{
key: string;
normalizedTitle: string;
normalizedArtist: string;
count: number;
items: Array<{
songId: string;
title: string;
artist: string;
location?: string;
totalTime?: string;
playlists: string[];
}>;
}>;
}> {
const params = new URLSearchParams({ minGroupSize: String(minGroupSize) });
const response = await fetch(`${API_BASE_URL}/songs/duplicates?${params}`);
if (!response.ok) throw new Error('Failed to fetch duplicates');
return response.json();
}
} }
export const api = new Api(); export const api = new Api();