Skip to content

Commit 5c95681

Browse files
Add files via upload
Initial Commit
1 parent 59aca55 commit 5c95681

File tree

1 file changed

+206
-0
lines changed

1 file changed

+206
-0
lines changed

SQL-Data-Cleaning.sql

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/*
2+
Cleaning Data in SQL Queries
3+
*/
4+
5+
SELECT *
6+
FROM PortfolioProject..NashvilleHousing
7+
8+
9+
--------------------------------------------------
10+
11+
-- Standardize Date Format
12+
13+
SELECT SaleDate,
14+
CONVERT(Date, SaleDate)
15+
FROM PortfolioProject..NashvilleHousing
16+
17+
UPDATE NashvilleHousing
18+
SET SaleDate = CONVERT(Date, SaleDate)
19+
20+
-- If it doesn't Update properly
21+
22+
ALTER TABLE NashvilleHousing
23+
ADD SaleDateConverted Date
24+
25+
UPDATE NashvilleHousing
26+
SET SaleDateConverted = CONVERT(Date, SaleDate)
27+
28+
-- Initially spelled the column name wrong that was adding as SaleDataConverted, so going to remove that column.
29+
30+
ALTER TABLE NashvilleHousing
31+
DROP COLUMN SaleDataConverted
32+
33+
34+
--------------------------------------------------
35+
36+
-- Standardize Acreage
37+
38+
SELECT Acreage,
39+
CAST(Acreage AS Decimal(5, 2))
40+
FROM PortfolioProject..NashvilleHousing
41+
42+
UPDATE NashvilleHousing
43+
SET Acreage = CAST(Acreage AS Decimal(5, 2))
44+
45+
-- If it doesn't Update properly
46+
47+
ALTER TABLE NashvilleHousing
48+
ADD AcreageConverted Decimal(5,2)
49+
50+
UPDATE NashvilleHousing
51+
SET AcreageConverted = CAST(Acreage AS Decimal(5, 2))
52+
53+
54+
--------------------------------------------------
55+
56+
-- Populate Property Address data
57+
58+
SELECT *
59+
FROM PortfolioProject..NashvilleHousing
60+
--WHERE PropertyAddress is null
61+
ORDER BY ParcelID
62+
63+
SELECT a.ParcelID,
64+
a.PropertyAddress,
65+
b.ParcelID,
66+
b.PropertyAddress,
67+
ISNULL(a.PropertyAddress,b.PropertyAddress)
68+
FROM PortfolioProject..NashvilleHousing a
69+
JOIN PortfolioProject..NashvilleHousing b
70+
ON a.ParcelID = b.ParcelID
71+
AND a.[UniqueID ] <> b.[UniqueID ]
72+
WHERE a.PropertyAddress IS NULL
73+
74+
UPDATE a
75+
SET PropertyAddress = ISNULL(a.PropertyAddress,b.PropertyAddress)
76+
FROM PortfolioProject..NashvilleHousing a
77+
JOIN PortfolioProject..NashvilleHousing b
78+
ON a.ParcelID = b.ParcelID
79+
AND a.[UniqueID ] <> b.[UniqueID ]
80+
WHERE a.PropertyAddress IS NULL
81+
82+
83+
--------------------------------------------------
84+
85+
-- Breaking out Address into Individual Columns (Address, City, State)
86+
-- First up the Property Address, using Substrings
87+
88+
SELECT PropertyAddress
89+
FROM PortfolioProject..NashvilleHousing
90+
--WHERE PropertyAddress is null
91+
--ORDER BY ParcelID
92+
93+
SELECT
94+
SUBSTRING(PropertyAddress, 1, CHARINDEX(',', PropertyAddress) - 1) AS Address,
95+
SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1, LEN(PropertyAddress)) AS City
96+
FROM PortfolioProject..NashvilleHousing
97+
98+
ALTER TABLE NashvilleHousing
99+
ADD PropertySplitAddress nvarchar(255)
100+
101+
UPDATE NashvilleHousing
102+
SET PropertySplitAddress = SUBSTRING(PropertyAddress, 1, CHARINDEX(',', PropertyAddress) - 1)
103+
104+
ALTER TABLE NashvilleHousing
105+
ADD PropertySplitCity nvarchar(255)
106+
107+
UPDATE NashvilleHousing
108+
SET PropertySplitCity = SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1, LEN(PropertyAddress))
109+
110+
SELECT *
111+
FROM PortfolioProject..NashvilleHousing
112+
113+
-- Next Up the Owner Address, using Parse Name instead of Substrings
114+
,
115+
SELECT OwnerAddress
116+
FROM PortfolioProject..NashvilleHousing
117+
118+
SELECT
119+
PARSENAME(REPLACE(OwnerAddress, ',', '.'), 3),
120+
PARSENAME(REPLACE(OwnerAddress, ',', '.'), 2),
121+
PARSENAME(REPLACE(OwnerAddress, ',', '.'), 1)
122+
FROM PortfolioProject..NashvilleHousing
123+
124+
ALTER TABLE NashvilleHousing
125+
ADD OwnerSplitAddress nvarchar(255)
126+
127+
UPDATE NashvilleHousing
128+
SET OwnerSplitAddress = PARSENAME(REPLACE(OwnerAddress, ',', '.'), 3)
129+
130+
ALTER TABLE NashvilleHousing
131+
ADD OwnerSplitCity nvarchar(255)
132+
133+
UPDATE NashvilleHousing
134+
SET OwnerSplitCity = PARSENAME(REPLACE(OwnerAddress, ',', '.'), 2)
135+
136+
ALTER TABLE NashvilleHousing
137+
ADD OwnerSplitState nvarchar(255)
138+
139+
UPDATE NashvilleHousing
140+
SET OwnerSplitState = PARSENAME(REPLACE(OwnerAddress, ',', '.'), 1)
141+
142+
SELECT *
143+
FROM PortfolioProject..NashvilleHousing
144+
145+
146+
--------------------------------------------------
147+
148+
-- Change Y and N to Yes and No in "Sold as Vacant" field
149+
150+
SELECT DISTINCT(SoldAsVacant),
151+
COUNT(SoldAsVacant)
152+
FROM PortfolioProject..NashvilleHousing
153+
GROUP BY SoldAsVacant
154+
ORDER BY 2
155+
156+
SELECT SoldAsVacant,
157+
CASE WHEN SoldAsVacant = 'Y' THEN 'YES'
158+
WHEN SoldAsVacant = 'N' THEN 'NO'
159+
ELSE SoldAsVacant
160+
END
161+
FROM PortfolioProject..NashvilleHousing
162+
163+
UPDATE NashvilleHousing
164+
SET SoldAsVacant =
165+
CASE WHEN SoldAsVacant = 'Y' THEN 'YES'
166+
WHEN SoldAsVacant = 'N' THEN 'NO'
167+
ELSE SoldAsVacant
168+
END
169+
170+
171+
--------------------------------------------------
172+
173+
-- Remove Duplicates - Not standard to remove data from a table, put practicing the action of.
174+
-- Using a CTE. Can't use Order By within one.
175+
176+
WITH RowNumCTE AS (
177+
SELECT *, ROW_NUMBER() OVER (
178+
PARTITION BY ParcelID,
179+
PropertyAddress,
180+
SalePrice,
181+
SaleDate,
182+
LegalReference
183+
ORDER BY ParcelID
184+
) row_num
185+
FROM PortfolioProject..NashvilleHousing
186+
-- ORDER BY ParcelID
187+
)
188+
DELETE
189+
FROM RowNumCTE
190+
WHERE row_num > 1
191+
--ORDER BY PropertyAddress
192+
193+
194+
--------------------------------------------------
195+
196+
-- Delete Unused Columns. You don't do this to raw data that you put into your database.
197+
-- Again, just the practice of doing so.
198+
199+
SELECT *
200+
FROM PortfolioProject..NashvilleHousing
201+
202+
ALTER TABLE PortfolioProject..NashvilleHousing
203+
DROP COLUMN PropertyAddress, TaxDistrict, OwnerAddress, SaleDate
204+
205+
ALTER TABLE PortfolioProject..NashvilleHousing
206+
DROP COLUMN Acreage

0 commit comments

Comments
 (0)