SQLize Online / PHPize Online  /  SQLtest Online

A A A
Share      Blog   Popular
Copy Format Clear
--load the synthetic dataset create table synthetic_dataset ( employee_first_name varchar2(20), employee_last_name varchar2(30), ssn varchar2(9), date_of_birth date ); insert into synthetic_dataset (employee_first_name, employee_last_name, ssn, date_of_birth) values ('asodké-qf31ñ','örsenFeld jr','123456789','23-Oct-1912'); insert into synthetic_dataset (employee_first_name, employee_last_name, ssn, date_of_birth) values ('Mr.Rùteyndô','Plånçk-Vladímírä','612344564','22-Jul-1978'); insert into synthetic_dataset (employee_first_name, employee_last_name, ssn, date_of_birth) values ('kãmiila','ĆlasS Sr.','367863592','22-May-1981'); insert into synthetic_dataset (employee_first_name, employee_last_name, ssn, date_of_birth) values ('DR. PABLÜ','VINCCI','150007103','16-Mar-1999'); insert into synthetic_dataset (employee_first_name, employee_last_name, ssn, date_of_birth) values ('mrs. Końšttantin','Amatores','121212121','12-Dec-2018'); insert into synthetic_dataset (employee_first_name, employee_last_name, ssn, date_of_birth) values ('Káillynoa','Christensen','666997221','5-Jun-1903'); insert into synthetic_dataset (employee_last_name, ssn, date_of_birth) values ('Van Nifterik','524755718','23-Apr-1908'); insert into synthetic_dataset (employee_first_name, employee_last_name, ssn, date_of_birth) values ('Gödöfredö','Stjepanić','078051120','01-Feb-2022'); insert into synthetic_dataset (employee_first_name, employee_last_name, ssn, date_of_birth) values ('Akjhil','al-Prinsen Kiruch','818681414','18-Jun-1948'); insert into synthetic_dataset (employee_first_name, employee_last_name, date_of_birth) values ('DINN3SH - Q','Lévêque Soüçe','21-Sep-1987'); /*translate the hashing code --4. CLEAN RAW DATA --Strip spaces and symbols from names --Ensure that DOBS are valid dates --Ensure that SSNs are strings*/ update synthetic_dataset set --Clean names --Convert to lowercase and strip leading and trailing whitespace employee_first_name=trim(lower(employee_first_name)), employee_last_name=trim(lower(employee_last_name)); create table clean_data as select regexp_replace(employee_first_name, 's/^(mr\ |mrs\ |dr\ |ms\ |mr\. |mrs\. |dr\. |ms\.)+//','',1) as first_name from synthetic_dataset; select * from synthetic_dataset; select * from clean_data;
Stuck with a problem? Got Error? Ask ChatGPT!
Copy Clear