Finding duplicate values in a SQL table

SQL

The following example uses ipython-sql. To install:pip install ipython-sql

%sql is a Jupyter magic command and can be ignored when not using Jupyter notebooks.

Configure SQL for Jupyter notebook

#Load sql
%load_ext sql
#connect to sqlite
%sql sqlite://

Create a table

%%sql CREATE TABLE players2 (first_name, last_name, age);  INSERT INTO players2 (first_name, last_name, age) VALUES  ('Lebron', 'James', 33),  ('Steph', 'Curry', 30),('Mike', 'Jordan', 55), ('Mike', 'Bibby', 40);

%%sql SELECT * FROM players2

first_name	last_name	age
Lebron	James	33
Steph	Curry	30
Mike	Jordan	55
Mike	Bibby	40

Find duplicate values in SQL table based on single column

%%sql  --Select all duplicate first names, and return the count
 SELECT first_name, COUNT(first_name) 
 FROM players2
 GROUP BY first_name
 HAVING COUNT(first_name) &gt; 1

first_name	COUNT(first_name)
Mike	2

Create another table

%%sql
 CREATE TABLE players3 (first_name, last_name, age);
 INSERT INTO players3 (first_name, last_name, age) VALUES 
    ('Lebron', 'James', 33),
    ('Steph', 'Curry', 30),
    ('Mike', 'Jordan', 55),
    ('Mike', 'Bibby', 40),
    ('Mike', 'Smith', 40);

Find duplicate values in SQL table based on multiple columns

%%sql  --Where both first_name and age are duplicates,  --here should return Mike with a count of two, since there are two 40 year old Mikes
 SELECT
     first_name, age, COUNT(*)
 FROM
     players3
 GROUP BY
     first_name, age
 HAVING 
     COUNT(*) &gt; 1

 * sqlite://  Done.

first_name	age	COUNT(*)
Mike	40	2