@@ -85,6 +85,18 @@ from sklearn.model_selection import train_test_split
8585from sklearn.tree import DecisionTreeClassifier
8686from sklearn.metrics import accuracy_score
8787
88+
89+ def main (path : str ) -> float :
90+ data = load_data(path)
91+ data = fill_missing_values(data)
92+ data = normalize_features(data)
93+ data = engineer_features(data)
94+ X_train, X_test, y_train, y_test = split_data(data)
95+ model = train_model(X_train, y_train)
96+ accuracy = evaluate_model(model, X_test, y_test)
97+ return accuracy
98+
99+
88100def load_data (file_path : str ) -> pd.DataFrame:
89101 if file_path.endswith(' .csv' ):
90102 data = pd.read_csv(file_path)
@@ -121,8 +133,8 @@ def engineer_features(data: pd.DataFrame) -> pd.DataFrame:
121133
122134
123135def split_data (data : pd.DataFrame) -> tuple[pd.DataFrame, ... ]:
124- X = data.drop(' target' , axis = 1 )
125136 y = data[' target' ]
137+ X = data.drop(' target' , axis = 1 )
126138 return train_test_split(X, y, test_size = 0.2 , random_state = 42 )
127139
128140
@@ -138,16 +150,6 @@ def evaluate_model(model: DecisionTreeClassifier, X_test: pd.DataFrame, y_test:
138150 print (f " Model Accuracy: { accuracy:.2f } " )
139151 return accuracy
140152
141-
142- def main (path : str ) -> float :
143- data = load_data(path)
144- data = fill_missing_values(data)
145- data = normalize_features(data)
146- data = engineer_features(data)
147- X_train, X_test, y_train, y_test = split_data(data)
148- model = train_model(X_train, y_train)
149- accuracy = evaluate_model(model, X_test, y_test)
150- return accuracy
151153```
152154By simply extracting low-level functions in a way that the low-level functions have an isolated task and calling
153155them in the high-level function * main* , we already gained:
0 commit comments