SQLize Online / PHPize Online  /  SQLtest Online

A A A
Share      Blog   Popular
Copy Format Clear
-- Step 1: Define the table schema and insert data (same as before) CREATE TABLE student_activities ( id INTEGER, student_id INTEGER, account_id INTEGER, resource_type VARCHAR(50), score INTEGER, quiz_id INTEGER, etl_last_updated_ts TIMESTAMP ); INSERT INTO student_activities (id, student_id, account_id, resource_type, score, quiz_id, etl_last_updated_ts) VALUES (10290795, 1096217, 0, 'Movie', NULL, 0, '2023-08-21 00:47:16.000'), (10290795, 1096217, 8393098, 'Movie', NULL, 0, '2023-08-10 01:22:46.000'), (10291952, 1096217, 0, 'Quiz', 3, 57, '2023-08-21 00:47:16.000'), (10291952, 1096217, 8393098, 'Quiz', 3, 57, '2023-08-10 01:22:46.000'), (10292230, 1096217, 8393098, 'Quiz', 7, 57, '2023-08-10 01:22:46.000'), (10292230, 1096217, 0, 'Quiz', 7, 57, '2023-08-21 00:47:16.000'), (11299627, 1096217, 0, 'Movie', NULL, 0, '2023-08-21 00:47:16.000'), (11299627, 1096217, 8393098, 'Movie', NULL, 0, '2023-08-10 01:22:46.000'), (111400388, 1096217, 8393098, 'Quiz', 6, 753, '2023-08-10 01:22:46.000'), (111400388, 1096217, 0, 'Quiz', 6, 753, '2023-08-21 00:47:16.000'); -- Step 2: Perform deduplication using a CTE with ROW_NUMBER() and DELETE statement WITH dedup_cte AS ( SELECT id, student_id, account_id, resource_type, score, quiz_id, etl_last_updated_ts, ROW_NUMBER() OVER ( PARTITION BY id -- Partition by id to find duplicates ORDER BY etl_last_updated_ts ASC, -- Lowest etl_last_updated_ts first account_id ASC -- Lowest account_id second (if etl_last_updated_ts is tied) ) AS row_num FROM student_activities ) DELETE FROM student_activities WHERE (id, etl_last_updated_ts, account_id) IN ( SELECT id, etl_last_updated_ts, account_id FROM dedup_cte WHERE row_num > 1 -- Keep only the first record in each partition ); -- Step 3: Display the deduplicated data SELECT * FROM student_activities;

Stuck with a problem? Got Error? Ask ChatGPT!

Copy Clear