bamtak
diff --git a/‎Gaussian Discriminant Analyses.ipynb
Lines changed: 196 additions & 0 deletions b/‎Gaussian Discriminant Analyses.ipynb
Lines changed: 196 additions & 0 deletions
diff --git a/‎KMeans.ipynb
Lines changed: 325 additions & 0 deletions b/‎KMeans.ipynb
Lines changed: 325 additions & 0 deletions
@@ -0,0 +1,196 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "class GDABinaryClassifier:\n",
+    "    \n",
+    "    def fit(self, X, y):\n",
+    "        self.fi = y.mean()\n",
+    "        self.u = np.array([ X[y==k].mean(axis=0) for k in [0,1]])\n",
+    "        X_u = X.copy()\n",
+    "        for k in [0,1]: X_u[y==k] -= self.u[k]\n",
+    "        self.E = X_u.T.dot(X_u) / len(y)\n",
+    "        self.invE = np.linalg.pinv(self.E)\n",
+    "        return self\n",
+    "    \n",
+    "    def predict(self, X):\n",
+    "        return np.argmax([self.compute_prob(X, i) for i in range(len(self.u))], axis=0)\n",
+    "    \n",
+    "    def compute_prob(self, X, i):\n",
+    "        u, phi = self.u[i], ((self.fi)**i * (1 - self.fi)**(1 - i))\n",
+    "        return np.exp(-1.0 * np.sum((X-u).dot(self.invE)*(X-u), axis=1)) * phi\n",
+    "    \n",
+    "    def score(self, X, y):\n",
+    "        return (self.predict(X) == y).mean()\n",
+    "        \n",
+    "\n",
+    "class GDAClassifier:\n",
+    "    \n",
+    "    def fit(self, X, y, epsilon = 1e-10):\n",
+    "        self.y_classes, y_counts = np.unique(y, return_counts=True)\n",
+    "        self.phi_y = 1.0 * y_counts/len(y)\n",
+    "        self.u = np.array([ X[y==k].mean(axis=0) for k in self.y_classes])\n",
+    "        self.E = self.compute_sigma(X, y)\n",
+    "        self.E += np.ones_like(self.E) * epsilon # fix zero overflow\n",
+    "        self.invE = np.linalg.pinv(self.E)\n",
+    "        return self\n",
+    "    \n",
+    "    def compute_sigma(self,X, y):\n",
+    "        X_u = X.copy().astype('float64')\n",
+    "        for i in range(len(self.u)):\n",
+    "            X_u[y==self.y_classes[i]] -= self.u[i]\n",
+    "        return X_u.T.dot(X_u) / len(y)\n",
+    "\n",
+    "    def predict(self, X):\n",
+    "        return np.apply_along_axis(self.get_prob, 1, X)\n",
+    "    \n",
+    "    def score(self, X, y):\n",
+    "        return (self.predict(X) == y).mean()\n",
+    "    \n",
+    "    def get_prob(self, x):\n",
+    "        p = np.exp(-1.0 * np.sum((x - self.u).dot(self.invE) * (x - self.u), axis =1)) * self.phi_y\n",
+    "        return np.argmax(p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9666080843585237"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.datasets import load_breast_cancer\n",
+    "X,y = load_breast_cancer(return_X_y=True)\n",
+    "model = GDABinaryClassifier().fit(X,y)\n",
+    "pre = model.predict(X)\n",
+    "model.score(X,y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from utils import train_test_split\n",
+    "from sklearn.datasets import load_iris\n",
+    "X,y = load_iris(return_X_y=True)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.8)\n",
+    "model = GDAClassifier().fit(X_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9583333333333334"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.score(X_test,y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9494505494505494"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X,y = load_breast_cancer(return_X_y=True)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.8)\n",
+    "model = GDAClassifier().fit(X_train,y_train)\n",
+    "model.score(X_test,y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9543429844097996"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.datasets import load_digits\n",
+    "digits = load_digits()\n",
+    "X = digits.data\n",
+    "y = digits.target\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.5)\n",
+    "model = GDAClassifier().fit(X_train,y_train)\n",
+    "model.score(X_test,y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}