SELECT * 
FROM employee.personal_details 
LIMIT 10

#ensuring that all the rows of the 'personal details' dataset were successfully loaded into MySQL
SELECT COUNT(EmployeeName) AS Number
FROM employee.personal_details

SELECT EmployeeName, COUNT(EmployeeName)
FROM employee.personal_details
GROUP BY EmployeeName
HAVING COUNT(EmployeeName) > 1

ALTER TABLE employee.personal_details
DROP Zip,
DROP DOB,
DROP Citizenship

#double checking that the columns were properly dropped
SELECT *
FROM employee.personal_details 
LIMIT 3

SELECT *
FROM employee.personal_details
WHERE EmployeeNumber = ''
OR Age = ''
OR State = ''
OR Sex = ''
OR MaritalStatus = ''
OR Race = ''

SELECT * 
FROM employee.department_details 
LIMIT 10

#ensuring that all the rows of the 'department details' dataset were successfully loaded into MySQL
SELECT COUNT(EmployeeName) AS Number
FROM employee.department_details

SELECT EmployeeName, COUNT(EmployeeName)
FROM employee.department_details
GROUP BY EmployeeName
HAVING COUNT(EmployeeName) > 1

ALTER TABLE employee.department_details
DROP ManagerName,
DROP ManagerID,
DROP SpecialProjectsCount

#double checking that the columns were properly dropped
SELECT * 
FROM employee.department_details 
LIMIT 3

SELECT *
FROM employee.department_details
WHERE EmployeeID = ''
OR Department = ''

SELECT * 
FROM employee.recruitment_and_job_details 
LIMIT 10

#ensuring that all the entries of the 'recruitment and job details' dataset were sucessfully loaded into MySQL
SELECT COUNT(EmployeeName) AS Number
FROM employee.recruitment_and_job_details

SELECT EmployeeName, COUNT(EmployeeName)
FROM employee.recruitment_and_job_details
GROUP BY EmployeeName
HAVING COUNT(EmployeeName) > 1

ALTER TABLE employee.recruitment_and_job_details
DROP RecruitmentSource,
DROP EngagementSurvey,
DROP EmployeeSatisfaction

#double checking that the columns were properly dropped
SELECT * 
FROM employee.recruitment_and_job_details 
LIMIT 3

SELECT *
FROM employee.recruitment_and_job_details
WHERE EmployeeID = ''
OR DateOfHire = ''
OR PayRate = ''
OR DateOfTermination = ''
OR TerminationReason = '' 
LIMIT 10

#inputing "NULL" for entries with no termination reason
UPDATE employee.recruitment_and_job_details
SET TerminationReason = "NULL"
WHERE TerminationReason = ''

#inputing "NULL" for entries where there was no termination date (as those employees are still with the company)
UPDATE employee.recruitment_and_job_details
SET DateOfTermination = "NULL"
WHERE DateOfTermination = ''

#double-checking that the missing values above were successfully replaced (ie. checking that misisng values no longer exist)
SELECT *
FROM employee.recruitment_and_job_details
WHERE EmployeeID = ''
OR DateOfHire = ''
OR PayRate = ''
OR DateOfTermination = ''
OR TerminationReason = ''

#joining the 3 datasets
SELECT * FROM employee.personal_details pd
LEFT JOIN employee.department_details dd ON pd.EmployeeName = dd.EmployeeName
                                        AND pd.EmployeeNumber = dd.EmployeeID 
LEFT JOIN employee.recruitment_and_job_details rj ON pd.EmployeeName = rj.EmployeeName  
                                                 AND pd.EmployeeNumber = rj.EmployeeID
LIMIT 6

#changing EmployeeName column for department_details dataset
ALTER TABLE employee.department_details
rename column EmployeeName to EmployeeName2

#double-checking that the EmployeeName column had been changed
SELECT * FROM employee.department_details 
LIMIT 3

#changing EmployeeName column for recruitment_and_job_details dataset
ALTER TABLE employee.recruitment_and_job_details
rename column EmployeeName to EmployeeName3

#changing EmployeeID column for recruitment_and_job_details dataset
ALTER TABLE recruitment_and_job_details
rename column EmployeeID to EmployeeID2

#double checking that the recruitment_and_job_details columns had been changed
SELECT * FROM employee.recruitment_and_job_details 
LIMIT 3

#storing my join result in a dataset called "joined_dataset"
CREATE TABLE joined_dataset
SELECT * FROM employee.personal_details pd
LEFT JOIN employee.department_details dd ON pd.EmployeeName = dd.EmployeeName2
                                        AND pd.EmployeeNumber = dd.EmployeeID 
LEFT JOIN employee.recruitment_and_job_details rj ON pd.EmployeeName = rj.EmployeeName3  
                                                 AND pd.EmployeeNumber = rj.EmployeeID2

#taking a look at joined_dataset
SELECT *
FROM employee.joined_dataset 
LIMIT 3

#dropping the extra EmployeeName and EmployeeID columns
ALTER TABLE employee.joined_dataset
DROP EmployeeName2,
DROP EmployeeName3,
DROP EmployeeNumber,
DROP EmployeeID2

#double-checking that the columns mentioned above had been dropped
SELECT* 
FROM employee.joined_dataset 
LIMIT 3

#to bring the 'EmployeeID' column to be right after 'EmployeeName'
ALTER TABLE employee.joined_dataset
MODIFY EmployeeID int
AFTER EmployeeName

#double-checking that the  columns had been properly rearranged
SELECT * 
FROM employee.joined_dataset 
LIMIT 3

	EmployeeName	EmployeeNumber	Age	State	Zip	DOB	Sex	MaritalStatus	Citizenship	Race
0	Brown, Mia	1103024456	30	MA	1450	11/24/1987	Female	Married	US Citizen	Black or African American
1	LaRotonda, William	1106026572	34	MA	1460	4/26/1984	Male	Divorced	US Citizen	Black or African American
2	Steans, Tyrone	1302053333	31	MA	2703	9/1/1986	Male	Single	US Citizen	White
3	Howard, Estelle	1211050782	32	MA	2170	9/16/1985	Female	Married	US Citizen	White
4	Singh, Nan	1307059817	30	MA	2330	5/19/1988	Female	Single	US Citizen	White
5	Smith, Leigh Ann	711007713	30	MA	1844	6/14/1987	Female	Married	US Citizen	Asian
6	LeBlanc, Brandon R	1102024115	33	MA	1460	6/10/1984	Male	Married	US Citizen	White
7	Quinn, Sean	1206043417	33	MA	2045	11/6/1984	Male	Married	Eligible NonCitizen	Black or African American
8	Boutwell, Bonalyn	1307060188	31	MA	2468	4/4/1987	Female	Married	US Citizen	Asian
9	Foster-Baker, Amy	1201031308	39	MA	2050	4/16/1979	Female	Married	US Citizen	White

	EmployeeName	EmployeeNumber	Age	State	Sex	MaritalStatus	Race
0	Brown, Mia	1103024456	30	MA	Female	Married	Black or African American
1	LaRotonda, William	1106026572	34	MA	Male	Divorced	Black or African American
2	Steans, Tyrone	1302053333	31	MA	Male	Single	White

	EmployeeName	EmployeeID	Department	Position	ManagerName	ManagerID	SpecialProjectsCount
0	Brown, Mia	1103024456	Admin Offices	Accountant I	Brandon R. LeBlanc	1	6
1	LaRotonda, William	1106026572	Admin Offices	Accountant I	Brandon R. LeBlanc	1	4
2	Steans, Tyrone	1302053333	Admin Offices	Accountant I	Brandon R. LeBlanc	1	5
3	Howard, Estelle	1211050782	Admin Offices	Administrative Assistant	Brandon R. LeBlanc	1	4
4	Singh, Nan	1307059817	Admin Offices	Administrative Assistant	Brandon R. LeBlanc	1	5
5	Smith, Leigh Ann	711007713	Admin Offices	Administrative Assistant	Brandon R. LeBlanc	1	4
6	Bunbury, Jessica	1504073368	Sales	Area Sales Manager	John Smith	17	0
7	Carter, Michelle	1403065721	Sales	Area Sales Manager	John Smith	17	0
8	Dietrich, Jenna	1408069481	Sales	Area Sales Manager	John Smith	17	0
9	Digitale, Alfred	1306059197	Sales	Area Sales Manager	John Smith	17	0

	EmployeeName	EmployeeID	Department	Position
0	Brown, Mia	1103024456	Admin Offices	Accountant I
1	LaRotonda, William	1106026572	Admin Offices	Accountant I
2	Steans, Tyrone	1302053333	Admin Offices	Accountant I

	EmployeeName	EmployeeID	RecruitmentSource	DateOfHire	PayRate	EngagementSurvey	EmployeeSatisfaction	DateOfTermination	TerminationReason
0	Brown, Mia	1103024456	Diversity Job Fair	10/27/2008	28.50	2.04	2		N/A - still employed
1	LaRotonda, William	1106026572	Website Banner Ads	1/6/2014	23.00	5.00	4		N/A - still employed
2	Steans, Tyrone	1302053333	Internet Search	9/29/2014	29.00	3.90	5		N/A - still employed
3	Howard, Estelle	1211050782	Pay Per Click - Google	2/16/2015	21.50	3.24	3	4/15/2015
4	Singh, Nan	1307059817	Website Banner Ads	5/1/2015	16.56	5.00	3		N/A - still employed
5	Smith, Leigh Ann	711007713	Diversity Job Fair	9/26/2011	20.50	3.80	4	9/25/2015	career change
6	Bunbury, Jessica	1504073368	Social Networks - Facebook Twitter etc	8/15/2011	55.00	3.14	5	8/2/2015	Another position
7	Carter, Michelle	1403065721	Billboard	8/18/2014	55.00	5.00	5		N/A - still employed
8	Dietrich, Jenna	1408069481	Website Banner Ads	2/20/2012	55.00	2.30	1		N/A - still employed
9	Digitale, Alfred	1306059197	Pay Per Click - Google	8/18/2014	56.00	3.60	5		N/A - still employed

Data Wrangling in SQL¶

Step 1: Cleaning 'Personal Details' dataset¶

~ Checking for duplicate entries¶

~ Removing irrelevant columns¶

~ Handling missing values¶

Step 2: Cleaning 'Department Details' dataset¶

~ Checking for duplicate entries¶

~ Removing irrelevant columns¶

~ Handling missing values¶

Step 3: Cleaning 'Recruitment and Job Details' dataset¶

~ Checking for duplicate entries¶

~Removing irrelevant columns¶

~ Handling missing values¶

Step 4: Joining the Datasets¶