1+ /*
2+ Cleaning Data in SQL Queries
3+ */
4+
5+ SELECT *
6+ FROM PortfolioProject..NashvilleHousing
7+
8+
9+ -- ------------------------------------------------
10+
11+ -- Standardize Date Format
12+
13+ SELECT SaleDate,
14+ CONVERT (Date , SaleDate)
15+ FROM PortfolioProject..NashvilleHousing
16+
17+ UPDATE NashvilleHousing
18+ SET SaleDate = CONVERT (Date , SaleDate)
19+
20+ -- If it doesn't Update properly
21+
22+ ALTER TABLE NashvilleHousing
23+ ADD SaleDateConverted Date
24+
25+ UPDATE NashvilleHousing
26+ SET SaleDateConverted = CONVERT (Date , SaleDate)
27+
28+ -- Initially spelled the column name wrong that was adding as SaleDataConverted, so going to remove that column.
29+
30+ ALTER TABLE NashvilleHousing
31+ DROP COLUMN SaleDataConverted
32+
33+
34+ -- ------------------------------------------------
35+
36+ -- Standardize Acreage
37+
38+ SELECT Acreage,
39+ CAST(Acreage AS Decimal (5 , 2 ))
40+ FROM PortfolioProject..NashvilleHousing
41+
42+ UPDATE NashvilleHousing
43+ SET Acreage = CAST(Acreage AS Decimal (5 , 2 ))
44+
45+ -- If it doesn't Update properly
46+
47+ ALTER TABLE NashvilleHousing
48+ ADD AcreageConverted Decimal (5 ,2 )
49+
50+ UPDATE NashvilleHousing
51+ SET AcreageConverted = CAST(Acreage AS Decimal (5 , 2 ))
52+
53+
54+ -- ------------------------------------------------
55+
56+ -- Populate Property Address data
57+
58+ SELECT *
59+ FROM PortfolioProject..NashvilleHousing
60+ -- WHERE PropertyAddress is null
61+ ORDER BY ParcelID
62+
63+ SELECT a .ParcelID ,
64+ a .PropertyAddress ,
65+ b .ParcelID ,
66+ b .PropertyAddress ,
67+ ISNULL(a .PropertyAddress ,b .PropertyAddress )
68+ FROM PortfolioProject..NashvilleHousing a
69+ JOIN PortfolioProject..NashvilleHousing b
70+ ON a .ParcelID = b .ParcelID
71+ AND a.[UniqueID ] <> b.[UniqueID ]
72+ WHERE a .PropertyAddress IS NULL
73+
74+ UPDATE a
75+ SET PropertyAddress = ISNULL(a .PropertyAddress ,b .PropertyAddress )
76+ FROM PortfolioProject..NashvilleHousing a
77+ JOIN PortfolioProject..NashvilleHousing b
78+ ON a .ParcelID = b .ParcelID
79+ AND a.[UniqueID ] <> b.[UniqueID ]
80+ WHERE a .PropertyAddress IS NULL
81+
82+
83+ -- ------------------------------------------------
84+
85+ -- Breaking out Address into Individual Columns (Address, City, State)
86+ -- First up the Property Address, using Substrings
87+
88+ SELECT PropertyAddress
89+ FROM PortfolioProject..NashvilleHousing
90+ -- WHERE PropertyAddress is null
91+ -- ORDER BY ParcelID
92+
93+ SELECT
94+ SUBSTRING (PropertyAddress, 1 , CHARINDEX(' ,' , PropertyAddress) - 1 ) AS Address,
95+ SUBSTRING (PropertyAddress, CHARINDEX(' ,' , PropertyAddress) + 1 , LEN(PropertyAddress)) AS City
96+ FROM PortfolioProject..NashvilleHousing
97+
98+ ALTER TABLE NashvilleHousing
99+ ADD PropertySplitAddress nvarchar(255 )
100+
101+ UPDATE NashvilleHousing
102+ SET PropertySplitAddress = SUBSTRING (PropertyAddress, 1 , CHARINDEX(' ,' , PropertyAddress) - 1 )
103+
104+ ALTER TABLE NashvilleHousing
105+ ADD PropertySplitCity nvarchar(255 )
106+
107+ UPDATE NashvilleHousing
108+ SET PropertySplitCity = SUBSTRING (PropertyAddress, CHARINDEX(' ,' , PropertyAddress) + 1 , LEN(PropertyAddress))
109+
110+ SELECT *
111+ FROM PortfolioProject..NashvilleHousing
112+
113+ -- Next Up the Owner Address, using Parse Name instead of Substrings
114+ ,
115+ SELECT OwnerAddress
116+ FROM PortfolioProject..NashvilleHousing
117+
118+ SELECT
119+ PARSENAME(REPLACE(OwnerAddress, ' ,' , ' .' ), 3 ),
120+ PARSENAME(REPLACE(OwnerAddress, ' ,' , ' .' ), 2 ),
121+ PARSENAME(REPLACE(OwnerAddress, ' ,' , ' .' ), 1 )
122+ FROM PortfolioProject..NashvilleHousing
123+
124+ ALTER TABLE NashvilleHousing
125+ ADD OwnerSplitAddress nvarchar(255 )
126+
127+ UPDATE NashvilleHousing
128+ SET OwnerSplitAddress = PARSENAME(REPLACE(OwnerAddress, ' ,' , ' .' ), 3 )
129+
130+ ALTER TABLE NashvilleHousing
131+ ADD OwnerSplitCity nvarchar(255 )
132+
133+ UPDATE NashvilleHousing
134+ SET OwnerSplitCity = PARSENAME(REPLACE(OwnerAddress, ' ,' , ' .' ), 2 )
135+
136+ ALTER TABLE NashvilleHousing
137+ ADD OwnerSplitState nvarchar(255 )
138+
139+ UPDATE NashvilleHousing
140+ SET OwnerSplitState = PARSENAME(REPLACE(OwnerAddress, ' ,' , ' .' ), 1 )
141+
142+ SELECT *
143+ FROM PortfolioProject..NashvilleHousing
144+
145+
146+ -- ------------------------------------------------
147+
148+ -- Change Y and N to Yes and No in "Sold as Vacant" field
149+
150+ SELECT DISTINCT (SoldAsVacant),
151+ COUNT (SoldAsVacant)
152+ FROM PortfolioProject..NashvilleHousing
153+ GROUP BY SoldAsVacant
154+ ORDER BY 2
155+
156+ SELECT SoldAsVacant,
157+ CASE WHEN SoldAsVacant = ' Y' THEN ' YES'
158+ WHEN SoldAsVacant = ' N' THEN ' NO'
159+ ELSE SoldAsVacant
160+ END
161+ FROM PortfolioProject..NashvilleHousing
162+
163+ UPDATE NashvilleHousing
164+ SET SoldAsVacant =
165+ CASE WHEN SoldAsVacant = ' Y' THEN ' YES'
166+ WHEN SoldAsVacant = ' N' THEN ' NO'
167+ ELSE SoldAsVacant
168+ END
169+
170+
171+ -- ------------------------------------------------
172+
173+ -- Remove Duplicates - Not standard to remove data from a table, put practicing the action of.
174+ -- Using a CTE. Can't use Order By within one.
175+
176+ WITH RowNumCTE AS (
177+ SELECT * , ROW_NUMBER() OVER (
178+ PARTITION BY ParcelID,
179+ PropertyAddress,
180+ SalePrice,
181+ SaleDate,
182+ LegalReference
183+ ORDER BY ParcelID
184+ ) row_num
185+ FROM PortfolioProject..NashvilleHousing
186+ -- ORDER BY ParcelID
187+ )
188+ DELETE
189+ FROM RowNumCTE
190+ WHERE row_num > 1
191+ -- ORDER BY PropertyAddress
192+
193+
194+ -- ------------------------------------------------
195+
196+ -- Delete Unused Columns. You don't do this to raw data that you put into your database.
197+ -- Again, just the practice of doing so.
198+
199+ SELECT *
200+ FROM PortfolioProject..NashvilleHousing
201+
202+ ALTER TABLE PortfolioProject..NashvilleHousing
203+ DROP COLUMN PropertyAddress, TaxDistrict, OwnerAddress, SaleDate
204+
205+ ALTER TABLE PortfolioProject..NashvilleHousing
206+ DROP COLUMN Acreage
0 commit comments