AdvancedCTEs & Recursive
CTE to detect and remove duplicate rows
The query
SQL
-- View duplicates first
WITH dupes AS (
SELECT
email,
COUNT(*) AS cnt,
ARRAY_AGG(cust_id ORDER BY joined_at DESC) AS ids
FROM customers
GROUP BY email
HAVING COUNT(*) > 1
)
SELECT d.email, d.cnt, d.ids[1] AS keep_id, d.ids[2:] AS delete_ids
FROM dupes d;
-- Delete duplicates, keep newest
WITH ranked AS (
SELECT cust_id, ROW_NUMBER() OVER (PARTITION BY email ORDER BY joined_at DESC) AS rn
FROM customers
)
DELETE FROM customers WHERE cust_id IN (
SELECT cust_id FROM ranked WHERE rn > 1
);Note
“Two-step: first inspect duplicates, then delete. Always inspect before deleting! ARRAY_AGG shows all duplicate IDs.