1
+ import numpy as np # linear algebra
2
+ import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
3
+ import matplotlib .pyplot as plt
4
+
5
+ #importing the dataset
6
+ Dataset = pd .read_csv ('vgsales.csv' )
7
+ #Label Encoding
8
+ from sklearn .preprocessing import LabelEncoder
9
+ number = LabelEncoder ()
10
+
11
+ Dataset ['Platform' ] = number .fit_transform (Dataset ['Platform' ].astype ('str' ))
12
+ Dataset ['Genre' ] = number .fit_transform (Dataset ['Genre' ].astype ('str' ))
13
+ Dataset ['Publisher' ] = number .fit_transform (Dataset ['Publisher' ].astype ('str' ))
14
+
15
+ #extracting the feature vector and the dependant variable vector
16
+
17
+ columns = ["Platform" , "Genre" , "Publisher" , "NA_Sales" , "EU_Sales" ]
18
+
19
+
20
+ y = Dataset ["Global_Sales" ].values
21
+ X = Dataset [list (columns )].values
22
+
23
+ #importing the linear model library
24
+ from sklearn import linear_model
25
+
26
+ regr = linear_model .LinearRegression ()
27
+ #importing the train test split library and splitting data into 80% for training 20% for testing
28
+ from sklearn .model_selection import train_test_split
29
+ X_train , X_test , y_train , y_test = train_test_split (X ,y ,test_size = 0.2 ,random_state = 0 )
30
+ #scaling the data
31
+ from sklearn .preprocessing import StandardScaler
32
+ scaler = StandardScaler ()
33
+ scaler .fit (X_train )
34
+ X_train = scaler .transform (X_train )
35
+ # apply same transformation to test data
36
+ X_test = scaler .transform (X_test )
37
+ #fit the train data to the linear model
38
+ regr .fit (X_train , y_train )
39
+
40
+ #Printing Accuracy in our Linear model
41
+ Accuracy = regr .score (X_train , y_train )
42
+ print ("Accuracy in the training data with Linear Regression Model: " , Accuracy * 100 , "%" )
43
+
44
+ accuracy = regr .score (X_test , y_test )
45
+ print ("Accuracy in the test data with Linear Regression model" , accuracy * 100 , "%" )
46
+ #Comparing the model predicted results vs the Test set
47
+ y_pred_Model1 = regr .predict (X_test )
48
+ y_pred_Model1
49
+
50
+
51
+ compare_Model1 = np .concatenate ((y_pred_Model1 .reshape (len (y_pred_Model1 ),1 ), y_test .reshape (len (y_test ),1 )),1 )
52
+ compare_Model1
53
+ ####*************************
54
+ #Using DecisionTreeRegressor :
55
+ from sklearn .tree import DecisionTreeRegressor
56
+ DTR = DecisionTreeRegressor ()
57
+ DTR2 = DecisionTreeRegressor (min_samples_leaf = 0.2 )
58
+ DTR3 = DecisionTreeRegressor (min_samples_leaf = 15 )
59
+ DTR4 = DecisionTreeRegressor (min_samples_leaf = 30 )
60
+ DTR5 = DecisionTreeRegressor (min_samples_leaf = 35 )
61
+
62
+
63
+ DTR .fit (X_train , y_train )
64
+ DTR2 .fit (X_train ,y_train )
65
+ DTR3 .fit (X_train ,y_train )
66
+ DTR4 .fit (X_train ,y_train )
67
+ DTR5 .fit (X_train ,y_train )
68
+
69
+ #printing Accuracy in our DTR Model
70
+
71
+ Accuracy = DTR .score (X_train , y_train )
72
+ print ("Accuracy in the training data with Decision Tree Regression model before tuning : " , Accuracy * 100 , "%" )
73
+
74
+ accuracy = DTR .score (X_test , y_test )
75
+ print ("Accuracy in the test data with Decision Tree Regression model before tuining : " , accuracy * 100 , "%" )
76
+ ##########################
77
+ Accuracy2 = DTR2 .score (X_train , y_train )
78
+ print ("\n Accuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=3 : " , Accuracy2 * 100 , "%" )
79
+
80
+ accuracy2 = DTR2 .score (X_test , y_test )
81
+ print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=3 : " , accuracy2 * 100 , "%" )
82
+ ##########################
83
+ Accuracy3 = DTR3 .score (X_train , y_train )
84
+ print ("\n Accuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=15 : " , Accuracy3 * 100 , "%" )
85
+
86
+ accuracy3 = DTR3 .score (X_test , y_test )
87
+ print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=15 : " , accuracy3 * 100 , "%" )
88
+ ##########################
89
+ Accuracy4 = DTR4 .score (X_train , y_train )
90
+ print ("\n Accuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=30 : " , Accuracy4 * 100 , "%" )
91
+
92
+ accuracy4 = DTR4 .score (X_test , y_test )
93
+ print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=30 : " , accuracy4 * 100 , "%" )
94
+ ##########################
95
+ Accuracy5 = DTR5 .score (X_train , y_train )
96
+ print ("\n Accuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=35 : " , Accuracy5 * 100 , "%" )
97
+
98
+ accuracy5 = DTR5 .score (X_test , y_test )
99
+ print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=35 : " , accuracy5 * 100 , "%" )
100
+ ##########################
101
+
102
+ #Comparing the model predicted results vs the Test set
103
+ y_pred_Model2 = DTR .predict (X_test )
104
+ y_pred_Model2
105
+
106
+ y_pred_Model2_Tuned = DTR5 .predict (X_test )
107
+ y_pred_Model2_Tuned
108
+
109
+ compare_Model2 = np .concatenate ((y_pred_Model2 .reshape (len (y_pred_Model2 ),1 ), y_test .reshape (len (y_test ),1 )),1 )
110
+ compare_Model2
111
+
112
+ compare_Model2_Tuned = np .concatenate ((y_pred_Model2_Tuned .reshape (len (y_pred_Model2_Tuned ),1 ), y_test .reshape (len (y_test ),1 )),1 )
113
+ compare_Model2_Tuned
0 commit comments