Snowflake snippets
How to remove duplicates from a table in Snowflake
Let's suppose you have this Students
table and you want to remove duplicate rows.
CREATE TABLE Students(
Name varchar(50),
Surname varchar(50),
Period int,
Sport int,
History int,
English int,
Geography int);
INSERT INTO
Students(Name,Surname,Period,Sport,History,English,Geography)
VALUES
('Luke','Green',1,30,20,23,NULL),
('Luke','Green',1,30,20,23,NULL),
('Mary','Brown',1,17,15,NULL,30),
('John','Red',1,18,NULL,21,30),
('Walter','White',1,22,20,5,30),
('Luke','Green',2,30,20,23,NULL),
('Mary','Brown',2,NULL,15,17,30),
('John','Red',2,18,NULL,11,30),
('Walter','White',2,2,32,1,30),
('Luke','Green',3,20,15,15,12),
('Mary','Brown',3,0,3,NULL,4),
('Walter','White',2,2,32,1,30),
('Luke','Green',3,20,15,15,12),
('Mary','Brown',3,0,3,NULL,4),
('John','Red',3,18,40,21,30),
('Walter','White',3,17,19,15,30),
('Mary','Brown',1,17,15,NULL,30),
('John','Red',3,NULL,23,23,30);
You can create a transient DupliTable
with this snippet:
CREATE OR REPLACE TRANSIENT TABLE DupliTable AS (
SELECT Name,Surname,Period,Sport,History,English,Geography
FROM Students
GROUP BY Name,Surname,Period,Sport,History,English,Geography
HAVING COUNT(*)>1
);
and then use the following transaction to remove the duplicates values from Students
.
BEGIN TRANSACTION;
DELETE from Students a
USING DupliTable b
WHERE ((a.Name is null and b.Name is null) or a.Name = b.Name)
AND ((a.Surname is null and b.Surname is null) or a.Surname = b.Surname)
AND ((a.Period is null and b.Period is null) or a.Period = b.Period)
AND ((a.Sport is null and b.Sport is null) or a.Sport = b.Sport)
AND ((a.History is null and b.History is null) or a.History = b.History)
AND ((a.English is null and b.English is null) or a.English = b.English)
AND ((a.Geography is null and b.Geography is null) or a.Geography = b.Geography);
INSERT INTO Students
SELECT Name,Surname,Period,Sport,History,English,Geography
FROM DupliTable;
COMMIT;
You can use the following query to DELETE the values of Students
using the values of Rejected
, without any filter conditions.
DELETE
FROM Students
using Rejected
where Students.Name=Rejected.Name
AND Students.Surname=Rejected.Surname