1
+ """
2
+ Model Selection and Hyperparameter Tuning
3
+ ==========================================
4
+ This script demonstrates how to select the best model for your dataset
5
+ and optimize hyperparameters using GridSearchCV and RandomizedSearchCV.
6
+ """
7
+
8
+ from sklearn .datasets import load_iris
9
+ from sklearn .model_selection import train_test_split , GridSearchCV , RandomizedSearchCV
10
+ from sklearn .metrics import accuracy_score
11
+ from sklearn .ensemble import RandomForestClassifier
12
+ from sklearn .svm import SVC
13
+ import numpy as np
14
+
15
+ # Load Example Dataset
16
+ data = load_iris ()
17
+ X , y = data .data , data .target
18
+ X_train , X_test , y_train , y_test = train_test_split (X , y , test_size = 0.2 , random_state = 42 )
19
+
20
+ print (f"Training Samples: { X_train .shape [0 ]} , Test Samples: { X_test .shape [0 ]} " )
21
+
22
+ # 1. Model Selection: Comparing Different Models
23
+ models = {
24
+ "Random Forest" : RandomForestClassifier (random_state = 42 ),
25
+ "SVM" : SVC (random_state = 42 )
26
+ }
27
+
28
+ for name , model in models .items ():
29
+ model .fit (X_train , y_train )
30
+ predictions = model .predict (X_test )
31
+ acc = accuracy_score (y_test , predictions )
32
+ print (f"{ name } Accuracy: { acc :.4f} " )
33
+
34
+ # 2. Hyperparameter Tuning with GridSearchCV
35
+ param_grid_rf = {
36
+ 'n_estimators' : [50 , 100 , 150 ],
37
+ 'max_depth' : [3 , 5 , 10 , None ],
38
+ 'min_samples_split' : [2 , 5 , 10 ]
39
+ }
40
+
41
+ print ("\n Performing GridSearchCV for Random Forest..." )
42
+ grid_search_rf = GridSearchCV (
43
+ RandomForestClassifier (random_state = 42 ),
44
+ param_grid_rf ,
45
+ cv = 3 ,
46
+ scoring = 'accuracy' ,
47
+ n_jobs = - 1
48
+ )
49
+ grid_search_rf .fit (X_train , y_train )
50
+
51
+ print ("Best Parameters for Random Forest:" , grid_search_rf .best_params_ )
52
+ print ("Best Cross-Validated Accuracy (RF):" , grid_search_rf .best_score_ )
53
+
54
+ # 3. Hyperparameter Tuning with RandomizedSearchCV
55
+ param_dist_svc = {
56
+ 'C' : np .logspace (- 3 , 3 , 7 ),
57
+ 'gamma' : np .logspace (- 3 , 3 , 7 ),
58
+ 'kernel' : ['rbf' , 'poly' , 'sigmoid' ]
59
+ }
60
+
61
+ print ("\n Performing RandomizedSearchCV for SVM..." )
62
+ random_search_svc = RandomizedSearchCV (
63
+ SVC (random_state = 42 ),
64
+ param_distributions = param_dist_svc ,
65
+ n_iter = 20 ,
66
+ cv = 3 ,
67
+ scoring = 'accuracy' ,
68
+ n_jobs = - 1 ,
69
+ random_state = 42
70
+ )
71
+ random_search_svc .fit (X_train , y_train )
72
+
73
+ print ("Best Parameters for SVM:" , random_search_svc .best_params_ )
74
+ print ("Best Cross-Validated Accuracy (SVM):" , random_search_svc .best_score_ )
75
+
76
+ # 4. Final Evaluation with Best Models
77
+ print ("\n Evaluating Best Models on Test Data..." )
78
+
79
+ best_rf = grid_search_rf .best_estimator_
80
+ best_svc = random_search_svc .best_estimator_
81
+
82
+ rf_test_acc = accuracy_score (y_test , best_rf .predict (X_test ))
83
+ svc_test_acc = accuracy_score (y_test , best_svc .predict (X_test ))
84
+
85
+ print (f"Final Test Accuracy (Random Forest): { rf_test_acc :.4f} " )
86
+ print (f"Final Test Accuracy (SVM): { svc_test_acc :.4f} " )
87
+
88
+ # 5. Choosing the Best Model
89
+ if rf_test_acc > svc_test_acc :
90
+ print ("\n The Best Model for This Dataset is Random Forest." )
91
+ else :
92
+ print ("\n The Best Model for This Dataset is SVM." )
93
+
94
+ '''
95
+ Model Selection: Compares two popular models, Random Forest and SVM, to find the one with the best baseline accuracy.
96
+ GridSearchCV: Systematic search over a predefined grid of hyperparameters for Random Forest.
97
+ RandomizedSearchCV: Randomized search over a wide range of hyperparameters for SVM.
98
+ Final Evaluation: Uses the best hyperparameters to evaluate performance on unseen test data.
99
+ Best Model Selection: Selects the model with the highest test accuracy.
100
+ '''
0 commit comments