Skip to content

Commit 43caaf6

Browse files
authored
Add files via upload
1 parent e675e72 commit 43caaf6

File tree

2 files changed

+16712
-0
lines changed

2 files changed

+16712
-0
lines changed

Vgregression.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import numpy as np # linear algebra
2+
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
3+
import matplotlib.pyplot as plt
4+
5+
#importing the dataset
6+
Dataset = pd.read_csv('vgsales.csv')
7+
#Label Encoding
8+
from sklearn.preprocessing import LabelEncoder
9+
number = LabelEncoder()
10+
11+
Dataset['Platform'] = number.fit_transform(Dataset['Platform'].astype('str'))
12+
Dataset['Genre'] = number.fit_transform(Dataset['Genre'].astype('str'))
13+
Dataset['Publisher'] = number.fit_transform(Dataset['Publisher'].astype('str'))
14+
15+
#extracting the feature vector and the dependant variable vector
16+
17+
columns = ["Platform", "Genre", "Publisher", "NA_Sales", "EU_Sales"]
18+
19+
20+
y = Dataset["Global_Sales"].values
21+
X = Dataset[list(columns)].values
22+
23+
#importing the linear model library
24+
from sklearn import linear_model
25+
26+
regr = linear_model.LinearRegression()
27+
#importing the train test split library and splitting data into 80% for training 20% for testing
28+
from sklearn.model_selection import train_test_split
29+
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2,random_state=0)
30+
#scaling the data
31+
from sklearn.preprocessing import StandardScaler
32+
scaler = StandardScaler()
33+
scaler.fit(X_train)
34+
X_train = scaler.transform(X_train)
35+
# apply same transformation to test data
36+
X_test = scaler.transform(X_test)
37+
#fit the train data to the linear model
38+
regr.fit(X_train, y_train)
39+
40+
#Printing Accuracy in our Linear model
41+
Accuracy = regr.score(X_train, y_train)
42+
print ("Accuracy in the training data with Linear Regression Model: ", Accuracy*100, "%")
43+
44+
accuracy = regr.score(X_test, y_test)
45+
print ("Accuracy in the test data with Linear Regression model", accuracy*100, "%")
46+
#Comparing the model predicted results vs the Test set
47+
y_pred_Model1 = regr.predict(X_test)
48+
y_pred_Model1
49+
50+
51+
compare_Model1 = np.concatenate((y_pred_Model1.reshape(len(y_pred_Model1),1), y_test.reshape(len(y_test),1)),1)
52+
compare_Model1
53+
####*************************
54+
#Using DecisionTreeRegressor :
55+
from sklearn.tree import DecisionTreeRegressor
56+
DTR = DecisionTreeRegressor()
57+
DTR2 =DecisionTreeRegressor(min_samples_leaf=0.2)
58+
DTR3 =DecisionTreeRegressor(min_samples_leaf=15)
59+
DTR4 =DecisionTreeRegressor(min_samples_leaf=30)
60+
DTR5 =DecisionTreeRegressor(min_samples_leaf=35)
61+
62+
63+
DTR.fit(X_train, y_train)
64+
DTR2.fit(X_train,y_train)
65+
DTR3.fit(X_train,y_train)
66+
DTR4.fit(X_train,y_train)
67+
DTR5.fit(X_train,y_train)
68+
69+
#printing Accuracy in our DTR Model
70+
71+
Accuracy = DTR.score(X_train, y_train)
72+
print ("Accuracy in the training data with Decision Tree Regression model before tuning : ", Accuracy*100, "%")
73+
74+
accuracy = DTR.score(X_test, y_test)
75+
print ("Accuracy in the test data with Decision Tree Regression model before tuining : ", accuracy*100, "%")
76+
##########################
77+
Accuracy2 = DTR2.score(X_train, y_train)
78+
print ("\nAccuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=3 : ", Accuracy2*100, "%")
79+
80+
accuracy2 = DTR2.score(X_test, y_test)
81+
print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=3 : ", accuracy2*100, "%")
82+
##########################
83+
Accuracy3 = DTR3.score(X_train, y_train)
84+
print ("\nAccuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=15 : ", Accuracy3*100, "%")
85+
86+
accuracy3 = DTR3.score(X_test, y_test)
87+
print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=15 : ", accuracy3*100, "%")
88+
##########################
89+
Accuracy4 = DTR4.score(X_train, y_train)
90+
print ("\nAccuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=30 : ", Accuracy4*100, "%")
91+
92+
accuracy4 = DTR4.score(X_test, y_test)
93+
print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=30 : ", accuracy4*100, "%")
94+
##########################
95+
Accuracy5 = DTR5.score(X_train, y_train)
96+
print ("\nAccuracy in the training data with Decision Tree Regression model After tuning with min_samples_leaf=35 : ", Accuracy5*100, "%")
97+
98+
accuracy5 = DTR5.score(X_test, y_test)
99+
print ("Accuracy in the test data with Decision Tree Regression model After tuning with min_samples_leaf=35 : ", accuracy5*100, "%")
100+
##########################
101+
102+
#Comparing the model predicted results vs the Test set
103+
y_pred_Model2 = DTR.predict(X_test)
104+
y_pred_Model2
105+
106+
y_pred_Model2_Tuned = DTR5.predict(X_test)
107+
y_pred_Model2_Tuned
108+
109+
compare_Model2 = np.concatenate((y_pred_Model2.reshape(len(y_pred_Model2),1), y_test.reshape(len(y_test),1)),1)
110+
compare_Model2
111+
112+
compare_Model2_Tuned = np.concatenate((y_pred_Model2_Tuned.reshape(len(y_pred_Model2_Tuned),1), y_test.reshape(len(y_test),1)),1)
113+
compare_Model2_Tuned

0 commit comments

Comments
 (0)