Skip to content

Commit f34296e

Browse files
committed
first implementation
0 parents  commit f34296e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3185
-0
lines changed

Gaussian Discriminant Analyses.ipynb

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 7,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import numpy as np\n",
10+
"\n",
11+
"class GDABinaryClassifier:\n",
12+
" \n",
13+
" def fit(self, X, y):\n",
14+
" self.fi = y.mean()\n",
15+
" self.u = np.array([ X[y==k].mean(axis=0) for k in [0,1]])\n",
16+
" X_u = X.copy()\n",
17+
" for k in [0,1]: X_u[y==k] -= self.u[k]\n",
18+
" self.E = X_u.T.dot(X_u) / len(y)\n",
19+
" self.invE = np.linalg.pinv(self.E)\n",
20+
" return self\n",
21+
" \n",
22+
" def predict(self, X):\n",
23+
" return np.argmax([self.compute_prob(X, i) for i in range(len(self.u))], axis=0)\n",
24+
" \n",
25+
" def compute_prob(self, X, i):\n",
26+
" u, phi = self.u[i], ((self.fi)**i * (1 - self.fi)**(1 - i))\n",
27+
" return np.exp(-1.0 * np.sum((X-u).dot(self.invE)*(X-u), axis=1)) * phi\n",
28+
" \n",
29+
" def score(self, X, y):\n",
30+
" return (self.predict(X) == y).mean()\n",
31+
" \n",
32+
"\n",
33+
"class GDAClassifier:\n",
34+
" \n",
35+
" def fit(self, X, y, epsilon = 1e-10):\n",
36+
" self.y_classes, y_counts = np.unique(y, return_counts=True)\n",
37+
" self.phi_y = 1.0 * y_counts/len(y)\n",
38+
" self.u = np.array([ X[y==k].mean(axis=0) for k in self.y_classes])\n",
39+
" self.E = self.compute_sigma(X, y)\n",
40+
" self.E += np.ones_like(self.E) * epsilon # fix zero overflow\n",
41+
" self.invE = np.linalg.pinv(self.E)\n",
42+
" return self\n",
43+
" \n",
44+
" def compute_sigma(self,X, y):\n",
45+
" X_u = X.copy().astype('float64')\n",
46+
" for i in range(len(self.u)):\n",
47+
" X_u[y==self.y_classes[i]] -= self.u[i]\n",
48+
" return X_u.T.dot(X_u) / len(y)\n",
49+
"\n",
50+
" def predict(self, X):\n",
51+
" return np.apply_along_axis(self.get_prob, 1, X)\n",
52+
" \n",
53+
" def score(self, X, y):\n",
54+
" return (self.predict(X) == y).mean()\n",
55+
" \n",
56+
" def get_prob(self, x):\n",
57+
" p = np.exp(-1.0 * np.sum((x - self.u).dot(self.invE) * (x - self.u), axis =1)) * self.phi_y\n",
58+
" return np.argmax(p)"
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": 15,
64+
"metadata": {},
65+
"outputs": [
66+
{
67+
"data": {
68+
"text/plain": [
69+
"0.9666080843585237"
70+
]
71+
},
72+
"execution_count": 15,
73+
"metadata": {},
74+
"output_type": "execute_result"
75+
}
76+
],
77+
"source": [
78+
"from sklearn.datasets import load_breast_cancer\n",
79+
"X,y = load_breast_cancer(return_X_y=True)\n",
80+
"model = GDABinaryClassifier().fit(X,y)\n",
81+
"pre = model.predict(X)\n",
82+
"model.score(X,y)"
83+
]
84+
},
85+
{
86+
"cell_type": "code",
87+
"execution_count": 9,
88+
"metadata": {},
89+
"outputs": [],
90+
"source": [
91+
"from utils import train_test_split\n",
92+
"from sklearn.datasets import load_iris\n",
93+
"X,y = load_iris(return_X_y=True)\n",
94+
"X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.8)\n",
95+
"model = GDAClassifier().fit(X_train,y_train)"
96+
]
97+
},
98+
{
99+
"cell_type": "code",
100+
"execution_count": 11,
101+
"metadata": {},
102+
"outputs": [
103+
{
104+
"data": {
105+
"text/plain": [
106+
"0.9583333333333334"
107+
]
108+
},
109+
"execution_count": 11,
110+
"metadata": {},
111+
"output_type": "execute_result"
112+
}
113+
],
114+
"source": [
115+
"model.score(X_test,y_test)"
116+
]
117+
},
118+
{
119+
"cell_type": "code",
120+
"execution_count": 13,
121+
"metadata": {},
122+
"outputs": [
123+
{
124+
"data": {
125+
"text/plain": [
126+
"0.9494505494505494"
127+
]
128+
},
129+
"execution_count": 13,
130+
"metadata": {},
131+
"output_type": "execute_result"
132+
}
133+
],
134+
"source": [
135+
"X,y = load_breast_cancer(return_X_y=True)\n",
136+
"X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.8)\n",
137+
"model = GDAClassifier().fit(X_train,y_train)\n",
138+
"model.score(X_test,y_test)"
139+
]
140+
},
141+
{
142+
"cell_type": "code",
143+
"execution_count": 14,
144+
"metadata": {},
145+
"outputs": [
146+
{
147+
"data": {
148+
"text/plain": [
149+
"0.9543429844097996"
150+
]
151+
},
152+
"execution_count": 14,
153+
"metadata": {},
154+
"output_type": "execute_result"
155+
}
156+
],
157+
"source": [
158+
"from sklearn.datasets import load_digits\n",
159+
"digits = load_digits()\n",
160+
"X = digits.data\n",
161+
"y = digits.target\n",
162+
"X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.5)\n",
163+
"model = GDAClassifier().fit(X_train,y_train)\n",
164+
"model.score(X_test,y_test)"
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": null,
170+
"metadata": {},
171+
"outputs": [],
172+
"source": []
173+
}
174+
],
175+
"metadata": {
176+
"kernelspec": {
177+
"display_name": "python3",
178+
"language": "python",
179+
"name": "python3"
180+
},
181+
"language_info": {
182+
"codemirror_mode": {
183+
"name": "ipython",
184+
"version": 2
185+
},
186+
"file_extension": ".py",
187+
"mimetype": "text/x-python",
188+
"name": "python",
189+
"nbconvert_exporter": "python",
190+
"pygments_lexer": "ipython2",
191+
"version": "2.7.16"
192+
}
193+
},
194+
"nbformat": 4,
195+
"nbformat_minor": 2
196+
}

KMeans.ipynb

Lines changed: 325 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)