-
Notifications
You must be signed in to change notification settings - Fork 993
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6b8380c
commit f412041
Showing
1 changed file
with
301 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,301 @@ | ||
/* | ||
Cleaning Data in SQL Queries | ||
*/ | ||
|
||
|
||
Select * | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
-------------------------------------------------------------------------------------------------------------------------- | ||
|
||
-- Standardize Date Format | ||
|
||
|
||
Select saleDateConverted, CONVERT(Date,SaleDate) | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
Update NashvilleHousing | ||
SET SaleDate = CONVERT(Date,SaleDate) | ||
|
||
-- If it doesn't Update properly | ||
|
||
ALTER TABLE NashvilleHousing | ||
Add SaleDateConverted Date; | ||
|
||
Update NashvilleHousing | ||
SET SaleDateConverted = CONVERT(Date,SaleDate) | ||
|
||
|
||
-------------------------------------------------------------------------------------------------------------------------- | ||
|
||
-- Populate Property Address data | ||
|
||
Select * | ||
From PortfolioProject.dbo.NashvilleHousing | ||
--Where PropertyAddress is null | ||
order by ParcelID | ||
|
||
|
||
|
||
Select a.ParcelID, a.PropertyAddress, b.ParcelID, b.PropertyAddress, ISNULL(a.PropertyAddress,b.PropertyAddress) | ||
From PortfolioProject.dbo.NashvilleHousing a | ||
JOIN PortfolioProject.dbo.NashvilleHousing b | ||
on a.ParcelID = b.ParcelID | ||
AND a.[UniqueID ] <> b.[UniqueID ] | ||
Where a.PropertyAddress is null | ||
|
||
|
||
Update a | ||
SET PropertyAddress = ISNULL(a.PropertyAddress,b.PropertyAddress) | ||
From PortfolioProject.dbo.NashvilleHousing a | ||
JOIN PortfolioProject.dbo.NashvilleHousing b | ||
on a.ParcelID = b.ParcelID | ||
AND a.[UniqueID ] <> b.[UniqueID ] | ||
Where a.PropertyAddress is null | ||
|
||
|
||
|
||
|
||
-------------------------------------------------------------------------------------------------------------------------- | ||
|
||
-- Breaking out Address into Individual Columns (Address, City, State) | ||
|
||
|
||
Select PropertyAddress | ||
From PortfolioProject.dbo.NashvilleHousing | ||
--Where PropertyAddress is null | ||
--order by ParcelID | ||
|
||
SELECT | ||
SUBSTRING(PropertyAddress, 1, CHARINDEX(',', PropertyAddress) -1 ) as Address | ||
, SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1 , LEN(PropertyAddress)) as Address | ||
|
||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
ALTER TABLE NashvilleHousing | ||
Add PropertySplitAddress Nvarchar(255); | ||
|
||
Update NashvilleHousing | ||
SET PropertySplitAddress = SUBSTRING(PropertyAddress, 1, CHARINDEX(',', PropertyAddress) -1 ) | ||
|
||
|
||
ALTER TABLE NashvilleHousing | ||
Add PropertySplitCity Nvarchar(255); | ||
|
||
Update NashvilleHousing | ||
SET PropertySplitCity = SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1 , LEN(PropertyAddress)) | ||
|
||
|
||
|
||
|
||
Select * | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
|
||
|
||
|
||
Select OwnerAddress | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
Select | ||
PARSENAME(REPLACE(OwnerAddress, ',', '.') , 3) | ||
,PARSENAME(REPLACE(OwnerAddress, ',', '.') , 2) | ||
,PARSENAME(REPLACE(OwnerAddress, ',', '.') , 1) | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
|
||
ALTER TABLE NashvilleHousing | ||
Add OwnerSplitAddress Nvarchar(255); | ||
|
||
Update NashvilleHousing | ||
SET OwnerSplitAddress = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 3) | ||
|
||
|
||
ALTER TABLE NashvilleHousing | ||
Add OwnerSplitCity Nvarchar(255); | ||
|
||
Update NashvilleHousing | ||
SET OwnerSplitCity = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 2) | ||
|
||
|
||
|
||
ALTER TABLE NashvilleHousing | ||
Add OwnerSplitState Nvarchar(255); | ||
|
||
Update NashvilleHousing | ||
SET OwnerSplitState = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 1) | ||
|
||
|
||
|
||
Select * | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
|
||
|
||
-------------------------------------------------------------------------------------------------------------------------- | ||
|
||
|
||
-- Change Y and N to Yes and No in "Sold as Vacant" field | ||
|
||
|
||
Select Distinct(SoldAsVacant), Count(SoldAsVacant) | ||
From PortfolioProject.dbo.NashvilleHousing | ||
Group by SoldAsVacant | ||
order by 2 | ||
|
||
|
||
|
||
|
||
Select SoldAsVacant | ||
, CASE When SoldAsVacant = 'Y' THEN 'Yes' | ||
When SoldAsVacant = 'N' THEN 'No' | ||
ELSE SoldAsVacant | ||
END | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
Update NashvilleHousing | ||
SET SoldAsVacant = CASE When SoldAsVacant = 'Y' THEN 'Yes' | ||
When SoldAsVacant = 'N' THEN 'No' | ||
ELSE SoldAsVacant | ||
END | ||
|
||
|
||
|
||
|
||
|
||
|
||
----------------------------------------------------------------------------------------------------------------------------------------------------------- | ||
|
||
-- Remove Duplicates | ||
|
||
WITH RowNumCTE AS( | ||
Select *, | ||
ROW_NUMBER() OVER ( | ||
PARTITION BY ParcelID, | ||
PropertyAddress, | ||
SalePrice, | ||
SaleDate, | ||
LegalReference | ||
ORDER BY | ||
UniqueID | ||
) row_num | ||
|
||
From PortfolioProject.dbo.NashvilleHousing | ||
--order by ParcelID | ||
) | ||
Select * | ||
From RowNumCTE | ||
Where row_num > 1 | ||
Order by PropertyAddress | ||
|
||
|
||
|
||
Select * | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
|
||
|
||
--------------------------------------------------------------------------------------------------------- | ||
|
||
-- Delete Unused Columns | ||
|
||
|
||
|
||
Select * | ||
From PortfolioProject.dbo.NashvilleHousing | ||
|
||
|
||
ALTER TABLE PortfolioProject.dbo.NashvilleHousing | ||
DROP COLUMN OwnerAddress, TaxDistrict, PropertyAddress, SaleDate | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
----------------------------------------------------------------------------------------------- | ||
----------------------------------------------------------------------------------------------- | ||
|
||
--- Importing Data using OPENROWSET and BULK INSERT | ||
|
||
-- More advanced and looks cooler, but have to configure server appropriately to do correctly | ||
-- Wanted to provide this in case you wanted to try it | ||
|
||
|
||
--sp_configure 'show advanced options', 1; | ||
--RECONFIGURE; | ||
--GO | ||
--sp_configure 'Ad Hoc Distributed Queries', 1; | ||
--RECONFIGURE; | ||
--GO | ||
|
||
|
||
--USE PortfolioProject | ||
|
||
--GO | ||
|
||
--EXEC master.dbo.sp_MSset_oledb_prop N'Microsoft.ACE.OLEDB.12.0', N'AllowInProcess', 1 | ||
|
||
--GO | ||
|
||
--EXEC master.dbo.sp_MSset_oledb_prop N'Microsoft.ACE.OLEDB.12.0', N'DynamicParameters', 1 | ||
|
||
--GO | ||
|
||
|
||
---- Using BULK INSERT | ||
|
||
--USE PortfolioProject; | ||
--GO | ||
--BULK INSERT nashvilleHousing FROM 'C:\Temp\SQL Server Management Studio\Nashville Housing Data for Data Cleaning Project.csv' | ||
-- WITH ( | ||
-- FIELDTERMINATOR = ',', | ||
-- ROWTERMINATOR = '\n' | ||
--); | ||
--GO | ||
|
||
|
||
---- Using OPENROWSET | ||
--USE PortfolioProject; | ||
--GO | ||
--SELECT * INTO nashvilleHousing | ||
--FROM OPENROWSET('Microsoft.ACE.OLEDB.12.0', | ||
-- 'Excel 12.0; Database=C:\Users\alexf\OneDrive\Documents\SQL Server Management Studio\Nashville Housing Data for Data Cleaning Project.csv', [Sheet1$]); | ||
--GO | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|