SQLize Online / PHPize Online  /  SQLtest Online

A A A
Share      Blog   Popular
Copy Format Clear
CREATE TABLE student_activities ( id INTEGER, student_id INTEGER, account_id INTEGER, resource_type VARCHAR(50), score INTEGER, quiz_id INTEGER, etl_last_updated_ts TIMESTAMP ); INSERT INTO student_activities (id, student_id, account_id, resource_type, score, quiz_id, etl_last_updated_ts) VALUES (10290795, 1096217, 0, 'Movie', NULL, 0, '2023-08-21 00:47:16.000'), (10290795, 1096217, 8393098, 'Movie', NULL, 0, '2023-08-10 01:22:46.000'), (10291952, 1096217, 0, 'Quiz', 3, 57, '2023-08-21 00:47:16.000'), (10291952, 1096217, 8393098, 'Quiz', 3, 57, '2023-08-10 01:22:46.000'), (10292230, 1096217, 8393098, 'Quiz', 7, 57, '2023-08-10 01:22:46.000'), (10292230, 1096217, 0, 'Quiz', 7, 57, '2023-08-21 00:47:16.000'), (11299627, 1096217, 0, 'Movie', NULL, 0, '2023-08-21 00:47:16.000'), (11299627, 1096217, 8393098, 'Movie', NULL, 0, '2023-08-10 01:22:46.000'), (111400388, 1096217, 8393098, 'Quiz', 6, 753, '2023-08-10 01:22:46.000'), (111400388, 1096217, 0, 'Quiz', 6, 753, '2023-08-21 00:47:16.000'); -- SELECT COUNT(id) FROM student_activities; SELECT COUNT(id) FROM student_activities; -- Delete duplicates based on criteria (keeping the most recent record) DELETE sa1 FROM student_activities sa1 JOIN ( SELECT id, etl_last_updated_ts, account_id FROM student_activities GROUP BY id, etl_last_updated_ts, account_id HAVING COUNT(*) > 1 ) sa2 ON sa1.id = sa2.id AND sa1.etl_last_updated_ts = sa2.etl_last_updated_ts AND sa1.account_id = sa2.account_id LEFT JOIN ( SELECT MIN(id) AS id_to_keep FROM student_activities GROUP BY id, etl_last_updated_ts, account_id ) tdi ON sa1.id = tdi.id_to_keep WHERE tdi.id_to_keep IS NULL AND (sa1.etl_last_updated_ts, sa1.account_id) < (sa2.etl_last_updated_ts, sa2.account_id); -- Count of rows after deduplication SELECT COUNT(id) FROM student_activities; -- WITH temp_dedup_ids AS ( -- SELECT -- id, -- etl_last_updated_ts, -- account_id, -- ROW_NUMBER() OVER ( -- PARTITION BY id, etl_last_updated_ts, account_id -- ORDER BY etl_last_updated_ts DESC -- Adjust order by as per your deduplication criteria -- ) AS row_num -- FROM -- student_activities -- ) -- DELETE FROM student_activities -- WHERE (id, etl_last_updated_ts, account_id) IN ( -- SELECT id, etl_last_updated_ts, account_id -- FROM temp_dedup_ids -- WHERE row_num > 1 -- ); -- SELECT COUNT(id) FROM student_activities;

Stuck with a problem? Got Error? Ask ChatGPT!

Copy Clear