1
+ """
2
+ Evaluation Metrics and Model Validation Techniques
3
+ ===================================================
4
+ This script demonstrates the use of various evaluation metrics
5
+ and model validation techniques for assessing AI/ML models.
6
+ """
7
+
8
+ from sklearn .datasets import load_digits
9
+ from sklearn .model_selection import train_test_split , cross_val_score , StratifiedKFold
10
+ from sklearn .metrics import (
11
+ accuracy_score , precision_score , recall_score , f1_score ,
12
+ confusion_matrix , roc_auc_score , roc_curve , classification_report
13
+ )
14
+ from sklearn .linear_model import LogisticRegression
15
+ from sklearn .ensemble import RandomForestClassifier
16
+ import matplotlib .pyplot as plt
17
+ import seaborn as sns
18
+ import numpy as np
19
+
20
+ # Load Example Dataset (Handwritten Digits)
21
+ data = load_digits ()
22
+ X , y = data .data , data .target
23
+ X_train , X_test , y_train , y_test = train_test_split (X , y , test_size = 0.2 , random_state = 42 , stratify = y )
24
+
25
+ print (f"Training Samples: { X_train .shape [0 ]} , Test Samples: { X_test .shape [0 ]} " )
26
+
27
+ # Train Logistic Regression Model
28
+ model = LogisticRegression (max_iter = 10000 , random_state = 42 )
29
+ model .fit (X_train , y_train )
30
+
31
+ # Predictions
32
+ y_pred = model .predict (X_test )
33
+ y_proba = model .predict_proba (X_test )
34
+
35
+ # 1. Accuracy
36
+ accuracy = accuracy_score (y_test , y_pred )
37
+ print (f"Accuracy: { accuracy :.4f} " )
38
+
39
+ # 2. Precision, Recall, and F1-Score
40
+ precision = precision_score (y_test , y_pred , average = 'weighted' )
41
+ recall = recall_score (y_test , y_pred , average = 'weighted' )
42
+ f1 = f1_score (y_test , y_pred , average = 'weighted' )
43
+
44
+ print (f"Precision: { precision :.4f} " )
45
+ print (f"Recall: { recall :.4f} " )
46
+ print (f"F1-Score: { f1 :.4f} " )
47
+
48
+ # 3. Confusion Matrix
49
+ conf_matrix = confusion_matrix (y_test , y_pred )
50
+ plt .figure (figsize = (8 , 6 ))
51
+ sns .heatmap (conf_matrix , annot = True , cmap = "Blues" , fmt = 'd' , xticklabels = data .target_names , yticklabels = data .target_names )
52
+ plt .title ("Confusion Matrix" )
53
+ plt .xlabel ("Predicted Labels" )
54
+ plt .ylabel ("True Labels" )
55
+ plt .show ()
56
+
57
+ # 4. ROC-AUC Score (for Binary Classification)
58
+ # For simplicity, we convert this to a binary classification task
59
+ binary_y = (y == 1 ).astype (int )
60
+ binary_y_train , binary_y_test = train_test_split (binary_y , test_size = 0.2 , random_state = 42 )
61
+
62
+ binary_model = LogisticRegression (max_iter = 10000 , random_state = 42 )
63
+ binary_model .fit (X_train , binary_y_train )
64
+ binary_y_proba = binary_model .predict_proba (X_test )[:, 1 ]
65
+
66
+ roc_auc = roc_auc_score (binary_y_test , binary_y_proba )
67
+ fpr , tpr , _ = roc_curve (binary_y_test , binary_y_proba )
68
+
69
+ print (f"ROC-AUC Score: { roc_auc :.4f} " )
70
+
71
+ # Plot ROC Curve
72
+ plt .figure (figsize = (8 , 6 ))
73
+ plt .plot (fpr , tpr , label = f"Logistic Regression (AUC = { roc_auc :.2f} )" )
74
+ plt .plot ([0 , 1 ], [0 , 1 ], 'k--' , label = "Random Guess" )
75
+ plt .xlabel ("False Positive Rate" )
76
+ plt .ylabel ("True Positive Rate" )
77
+ plt .title ("ROC Curve" )
78
+ plt .legend (loc = "lower right" )
79
+ plt .show ()
80
+
81
+ # 5. Classification Report
82
+ print ("\n Classification Report:" )
83
+ print (classification_report (y_test , y_pred , target_names = data .target_names .astype (str )))
84
+
85
+ # 6. Cross-Validation
86
+ cv_scores = cross_val_score (model , X , y , cv = 5 , scoring = 'accuracy' )
87
+ print (f"Cross-Validation Scores: { cv_scores } " )
88
+ print (f"Mean CV Accuracy: { np .mean (cv_scores ):.4f} " )
89
+
90
+ # 7. Stratified K-Fold Validation
91
+ skf = StratifiedKFold (n_splits = 5 , shuffle = True , random_state = 42 )
92
+ stratified_scores = []
93
+
94
+ for train_idx , val_idx in skf .split (X , y ):
95
+ X_train_fold , X_val_fold = X [train_idx ], X [val_idx ]
96
+ y_train_fold , y_val_fold = y [train_idx ], y [val_idx ]
97
+ model .fit (X_train_fold , y_train_fold )
98
+ fold_acc = model .score (X_val_fold , y_val_fold )
99
+ stratified_scores .append (fold_acc )
100
+
101
+ print (f"Stratified K-Fold Scores: { stratified_scores } " )
102
+ print (f"Mean Stratified K-Fold Accuracy: { np .mean (stratified_scores ):.4f} " )
103
+
104
+ # 8. Comparing Models with Validation
105
+ models = {
106
+ "Logistic Regression" : LogisticRegression (max_iter = 10000 , random_state = 42 ),
107
+ "Random Forest" : RandomForestClassifier (random_state = 42 )
108
+ }
109
+
110
+ for name , model in models .items ():
111
+ cv_score = cross_val_score (model , X , y , cv = 5 , scoring = 'accuracy' )
112
+ print (f"{ name } Mean CV Accuracy: { np .mean (cv_score ):.4f} " )
113
+
114
+
115
+ '''
116
+ Accuracy: Measures the percentage of correct predictions.
117
+ Precision, Recall, F1-Score: Key metrics for imbalanced datasets:
118
+ - Precision: True positives / (True positives + False positives).
119
+ - Recall: True positives / (True positives + False negatives).
120
+ - F1-Score: Harmonic mean of Precision and Recall.
121
+ Confusion Matrix: Provides a matrix representation of True/False Positives/Negatives.
122
+ ROC-AUC Score: Useful for evaluating binary classification models, independent of threshold selection.
123
+ Classification Report: Summarizes key metrics for each class.
124
+ Cross-Validation: Evaluates model performance on multiple dataset splits.
125
+ Stratified K-Fold: Ensures class proportions are maintained in each fold.
126
+ Model Comparison: Uses CV scores to compare multiple models.
127
+ '''
0 commit comments