Skip to content

Commit 01ef9e8

Browse files
committed
Add files via upload
1 parent 589c71b commit 01ef9e8

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

evaluation_metrics_and_validation.py

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""
2+
Evaluation Metrics and Model Validation Techniques
3+
===================================================
4+
This script demonstrates the use of various evaluation metrics
5+
and model validation techniques for assessing AI/ML models.
6+
"""
7+
8+
from sklearn.datasets import load_digits
9+
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
10+
from sklearn.metrics import (
11+
accuracy_score, precision_score, recall_score, f1_score,
12+
confusion_matrix, roc_auc_score, roc_curve, classification_report
13+
)
14+
from sklearn.linear_model import LogisticRegression
15+
from sklearn.ensemble import RandomForestClassifier
16+
import matplotlib.pyplot as plt
17+
import seaborn as sns
18+
import numpy as np
19+
20+
# Load Example Dataset (Handwritten Digits)
21+
data = load_digits()
22+
X, y = data.data, data.target
23+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
24+
25+
print(f"Training Samples: {X_train.shape[0]}, Test Samples: {X_test.shape[0]}")
26+
27+
# Train Logistic Regression Model
28+
model = LogisticRegression(max_iter=10000, random_state=42)
29+
model.fit(X_train, y_train)
30+
31+
# Predictions
32+
y_pred = model.predict(X_test)
33+
y_proba = model.predict_proba(X_test)
34+
35+
# 1. Accuracy
36+
accuracy = accuracy_score(y_test, y_pred)
37+
print(f"Accuracy: {accuracy:.4f}")
38+
39+
# 2. Precision, Recall, and F1-Score
40+
precision = precision_score(y_test, y_pred, average='weighted')
41+
recall = recall_score(y_test, y_pred, average='weighted')
42+
f1 = f1_score(y_test, y_pred, average='weighted')
43+
44+
print(f"Precision: {precision:.4f}")
45+
print(f"Recall: {recall:.4f}")
46+
print(f"F1-Score: {f1:.4f}")
47+
48+
# 3. Confusion Matrix
49+
conf_matrix = confusion_matrix(y_test, y_pred)
50+
plt.figure(figsize=(8, 6))
51+
sns.heatmap(conf_matrix, annot=True, cmap="Blues", fmt='d', xticklabels=data.target_names, yticklabels=data.target_names)
52+
plt.title("Confusion Matrix")
53+
plt.xlabel("Predicted Labels")
54+
plt.ylabel("True Labels")
55+
plt.show()
56+
57+
# 4. ROC-AUC Score (for Binary Classification)
58+
# For simplicity, we convert this to a binary classification task
59+
binary_y = (y == 1).astype(int)
60+
binary_y_train, binary_y_test = train_test_split(binary_y, test_size=0.2, random_state=42)
61+
62+
binary_model = LogisticRegression(max_iter=10000, random_state=42)
63+
binary_model.fit(X_train, binary_y_train)
64+
binary_y_proba = binary_model.predict_proba(X_test)[:, 1]
65+
66+
roc_auc = roc_auc_score(binary_y_test, binary_y_proba)
67+
fpr, tpr, _ = roc_curve(binary_y_test, binary_y_proba)
68+
69+
print(f"ROC-AUC Score: {roc_auc:.4f}")
70+
71+
# Plot ROC Curve
72+
plt.figure(figsize=(8, 6))
73+
plt.plot(fpr, tpr, label=f"Logistic Regression (AUC = {roc_auc:.2f})")
74+
plt.plot([0, 1], [0, 1], 'k--', label="Random Guess")
75+
plt.xlabel("False Positive Rate")
76+
plt.ylabel("True Positive Rate")
77+
plt.title("ROC Curve")
78+
plt.legend(loc="lower right")
79+
plt.show()
80+
81+
# 5. Classification Report
82+
print("\nClassification Report:")
83+
print(classification_report(y_test, y_pred, target_names=data.target_names.astype(str)))
84+
85+
# 6. Cross-Validation
86+
cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
87+
print(f"Cross-Validation Scores: {cv_scores}")
88+
print(f"Mean CV Accuracy: {np.mean(cv_scores):.4f}")
89+
90+
# 7. Stratified K-Fold Validation
91+
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
92+
stratified_scores = []
93+
94+
for train_idx, val_idx in skf.split(X, y):
95+
X_train_fold, X_val_fold = X[train_idx], X[val_idx]
96+
y_train_fold, y_val_fold = y[train_idx], y[val_idx]
97+
model.fit(X_train_fold, y_train_fold)
98+
fold_acc = model.score(X_val_fold, y_val_fold)
99+
stratified_scores.append(fold_acc)
100+
101+
print(f"Stratified K-Fold Scores: {stratified_scores}")
102+
print(f"Mean Stratified K-Fold Accuracy: {np.mean(stratified_scores):.4f}")
103+
104+
# 8. Comparing Models with Validation
105+
models = {
106+
"Logistic Regression": LogisticRegression(max_iter=10000, random_state=42),
107+
"Random Forest": RandomForestClassifier(random_state=42)
108+
}
109+
110+
for name, model in models.items():
111+
cv_score = cross_val_score(model, X, y, cv=5, scoring='accuracy')
112+
print(f"{name} Mean CV Accuracy: {np.mean(cv_score):.4f}")
113+
114+
115+
'''
116+
Accuracy: Measures the percentage of correct predictions.
117+
Precision, Recall, F1-Score: Key metrics for imbalanced datasets:
118+
- Precision: True positives / (True positives + False positives).
119+
- Recall: True positives / (True positives + False negatives).
120+
- F1-Score: Harmonic mean of Precision and Recall.
121+
Confusion Matrix: Provides a matrix representation of True/False Positives/Negatives.
122+
ROC-AUC Score: Useful for evaluating binary classification models, independent of threshold selection.
123+
Classification Report: Summarizes key metrics for each class.
124+
Cross-Validation: Evaluates model performance on multiple dataset splits.
125+
Stratified K-Fold: Ensures class proportions are maintained in each fold.
126+
Model Comparison: Uses CV scores to compare multiple models.
127+
'''

0 commit comments

Comments
 (0)