Skip to content

Commit 589c71b

Browse files
committed
Add files via upload
1 parent 34fe391 commit 589c71b

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

model_selection_and_tuning.py

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""
2+
Model Selection and Hyperparameter Tuning
3+
==========================================
4+
This script demonstrates how to select the best model for your dataset
5+
and optimize hyperparameters using GridSearchCV and RandomizedSearchCV.
6+
"""
7+
8+
from sklearn.datasets import load_iris
9+
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
10+
from sklearn.metrics import accuracy_score
11+
from sklearn.ensemble import RandomForestClassifier
12+
from sklearn.svm import SVC
13+
import numpy as np
14+
15+
# Load Example Dataset
16+
data = load_iris()
17+
X, y = data.data, data.target
18+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
19+
20+
print(f"Training Samples: {X_train.shape[0]}, Test Samples: {X_test.shape[0]}")
21+
22+
# 1. Model Selection: Comparing Different Models
23+
models = {
24+
"Random Forest": RandomForestClassifier(random_state=42),
25+
"SVM": SVC(random_state=42)
26+
}
27+
28+
for name, model in models.items():
29+
model.fit(X_train, y_train)
30+
predictions = model.predict(X_test)
31+
acc = accuracy_score(y_test, predictions)
32+
print(f"{name} Accuracy: {acc:.4f}")
33+
34+
# 2. Hyperparameter Tuning with GridSearchCV
35+
param_grid_rf = {
36+
'n_estimators': [50, 100, 150],
37+
'max_depth': [3, 5, 10, None],
38+
'min_samples_split': [2, 5, 10]
39+
}
40+
41+
print("\nPerforming GridSearchCV for Random Forest...")
42+
grid_search_rf = GridSearchCV(
43+
RandomForestClassifier(random_state=42),
44+
param_grid_rf,
45+
cv=3,
46+
scoring='accuracy',
47+
n_jobs=-1
48+
)
49+
grid_search_rf.fit(X_train, y_train)
50+
51+
print("Best Parameters for Random Forest:", grid_search_rf.best_params_)
52+
print("Best Cross-Validated Accuracy (RF):", grid_search_rf.best_score_)
53+
54+
# 3. Hyperparameter Tuning with RandomizedSearchCV
55+
param_dist_svc = {
56+
'C': np.logspace(-3, 3, 7),
57+
'gamma': np.logspace(-3, 3, 7),
58+
'kernel': ['rbf', 'poly', 'sigmoid']
59+
}
60+
61+
print("\nPerforming RandomizedSearchCV for SVM...")
62+
random_search_svc = RandomizedSearchCV(
63+
SVC(random_state=42),
64+
param_distributions=param_dist_svc,
65+
n_iter=20,
66+
cv=3,
67+
scoring='accuracy',
68+
n_jobs=-1,
69+
random_state=42
70+
)
71+
random_search_svc.fit(X_train, y_train)
72+
73+
print("Best Parameters for SVM:", random_search_svc.best_params_)
74+
print("Best Cross-Validated Accuracy (SVM):", random_search_svc.best_score_)
75+
76+
# 4. Final Evaluation with Best Models
77+
print("\nEvaluating Best Models on Test Data...")
78+
79+
best_rf = grid_search_rf.best_estimator_
80+
best_svc = random_search_svc.best_estimator_
81+
82+
rf_test_acc = accuracy_score(y_test, best_rf.predict(X_test))
83+
svc_test_acc = accuracy_score(y_test, best_svc.predict(X_test))
84+
85+
print(f"Final Test Accuracy (Random Forest): {rf_test_acc:.4f}")
86+
print(f"Final Test Accuracy (SVM): {svc_test_acc:.4f}")
87+
88+
# 5. Choosing the Best Model
89+
if rf_test_acc > svc_test_acc:
90+
print("\nThe Best Model for This Dataset is Random Forest.")
91+
else:
92+
print("\nThe Best Model for This Dataset is SVM.")
93+
94+
'''
95+
Model Selection: Compares two popular models, Random Forest and SVM, to find the one with the best baseline accuracy.
96+
GridSearchCV: Systematic search over a predefined grid of hyperparameters for Random Forest.
97+
RandomizedSearchCV: Randomized search over a wide range of hyperparameters for SVM.
98+
Final Evaluation: Uses the best hyperparameters to evaluate performance on unseen test data.
99+
Best Model Selection: Selects the model with the highest test accuracy.
100+
'''

0 commit comments

Comments
 (0)