DataScienceNigeria · fortune-uwha · Apr 22, 2020 · Apr 22, 2020 · Apr 22, 2020 · Apr 22, 2020
diff --git a/DSN Logistic Regression from scratch using numpy.ipynb b/DSN Logistic Regression from scratch using numpy.ipynb
@@ -0,0 +1,131 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Class Implementation for Logistic Regression: Binary logistic Regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class logisticRegression:\n",
+    "    \n",
+    "    def __init__(self,lr=0.001,n_iters = 1000):\n",
+    "        self.lr = lr\n",
+    "        self.n_iters = n_iters\n",
+    "        self.weights = None\n",
+    "        self.bias = None\n",
+    "        \n",
+    "    def _sigmoid(self,x):\n",
+    "        return 1/(1+np.exp(-x))    \n",
+    "        \n",
+    "    def fit(self,X,y):\n",
+    "        #Initialize parameters\n",
+    "        n_samples,n_features = X.shape\n",
+    "        self.weights = np.zeros(n_features)\n",
+    "        self.bias=0 # you can also use random numbers but zero is fine\n",
+    "        #gradient descent\n",
+    "        for _ in range(self.n_iters):\n",
+    "            linear_model = np.dot(X,self.weights)+self.bias\n",
+    "            # Applying the sigmoid function\n",
+    "            y_predicted = self._sigmoid(linear_model)# So this is our approximation of y\n",
+    "            # Next, we need to update our weight\n",
+    "            dw = (1/n_samples)*np.dot(X.T,(y_predicted-y))\n",
+    "            db = (1/n_samples)*np.sum(y_predicted-y)\n",
+    "            #update our parameters\n",
+    "            self.weights -= self.lr*dw\n",
+    "            self.bias -=self.lr*db\n",
+    "    #Implement the predict method, we first approximate our data with a linear model,and apply the sigmoid function to get a probability\n",
+    "    def predict(self,X):\n",
+    "            linear_model = np.dot(X,self.weights)+self.bias\n",
+    "            y_predicted = self._sigmoid(linear_model)\n",
+    "            y_predicted_cls = [1 if i>0.5 else 0 for i in y_predicted]\n",
+    "            return y_predicted_cls\n",
+    "    def accuracy (y_true,y_pred):\n",
+    "        accuracy = np.sum(y_true == y_pred)/len(y_true)\n",
+    "        return accuracy   "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Class Implementation for Logistic Regression: Multiclass logistic Regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "class MultiClassLogisticRegression:\n",
+    "    \n",
+    "    def fit(self, X, y, lr=0.00001, n_iter=1000):\n",
+    "        X = np.insert(X, 0, 1, axis=1)\n",
+    "        self.unique_y = np.unique(y)\n",
+    "        self.w = np.zeros[(len(self.unique_y), X.shape[1])]\n",
+    "        #so we are converting our y into the same shape as predictions using one-hot encoding\n",
+    "        y = self.one_hot(y)\n",
+    "        for i in range(n_iter):\n",
+    "            predictions = self.probabilities(X)\n",
+    "            #update weight\n",
+    "            error = predictions - y\n",
+    "            gradient = np.dot(error.T, X)\n",
+    "            self.w -= (lr * gradient)     \n",
+    "        return self\n",
+    "    \n",
+    "    def probabilities(self, X):\n",
+    "        scores = np.dot(X, self.w.T)\n",
+    "        return self.softmax(scores)\n",
+    "    \n",
+    "    def softmax(self, z):\n",
+    "        return np.exp(z)/ np.sum(np.exp(z), axis=1).reshape[-1,1]\n",
+    "        \n",
+    "    def predict(self, X):\n",
+    "        X = np.insert(X, 0, 1, axis=1)\n",
+    "        #we use the np.vectorize to convert our predicted classes to actual classes that we have in y\n",
+    "        return np.vectorize(lambda i: self.unique_y[i])(np.argmax(self.probabilities(X), axis = 1))# returning the index of the highest probability for each row in X\n",
+    "    \n",
+    "    def score(self, X, y):\n",
+    "        return np.mean(self.predict(X) == y)\n",
+    "    \n",
+    "    def one_hot(self, y):\n",
+    "        u_y = list(np.unique(y))\n",
+    "        encoded = np.zeros((len[y], len(u_y)))\n",
+    "        for i, c in enumerate[y]:\n",
+    "            encoded[i][u_y.index(c)] = 1\n",
+    "        return encoded\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/DSN_logo.png b/DSN_logo.png
diff --git a/README.md b/README.md
@@ -1,41 +1,9 @@
 # ML-Logistic-regression-algorithm-challenge
 
+In this notebook I will try to implement a Logistic Regression without relying to Python's easy-to-use scikit-learn library. This notebook aims to create a Logistic Regression without the help of in-built Logistic Regression libraries to help us fully understand how Logistic Regression works in the background.
 
-![DSN logo](DSN_logo.png)|DSN Algorithm Challenge|
-|---|---|
+Before we start coding let us first understand or atleast try to understand the things happening at the back-end of Logistic Regression. Logistic regression is a classification algorithm, it is sometimes confusing that the term regression appears in this name even though logistic regression is actually a classification algorithm. But that's just a name for historical reasons.So let's not get confused, logistic regression is actually a classiication algorithm that we apply to settings where the label y is a discrete value, when it's either zero or one
 
-A lot of data scientists or machine learning enthusiasts do use various machine learning algorithms as a black box without knowing how they work or the mathematics behind it. The purpose of this challenge is to encourage the mathematical understanding of machine learning algorithms, their break and yield point. 
+A Logistic regression takes input and returns an output of probability, a value between 0 and 1. How does a Logistic Regression do that? With the help of a function called a logistic function or most commonly known as a sigmoid. The terms sigmoid function and logistic function are basically synonyms and mean the same thing. So the two terms are basically interchangable, and either term can be used to refer to this function. 
 
-In summary, participants are encouraged to understand the fundamental concepts behind machine learning algorithms/models.
-
-
-The rules and guidelines for this challenge are as follows:
-
-1. Ensure to register at https://bit.ly/dsnmlhack 
-
-2. The algorithm challenge is open to all.
-
-3. Participants are expected to design and develop the Logistic Regression algorithm from scratch using Python or R programming.
-
-4. For python developers (numpy is advisable).
-
-5. To push your solution to us, make a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) to DSN's GitHub page at  https://www.github.com/datasciencenigeria/ML-Logistic-regression-algorithm-challenge. Ensure to add your readme file to understand your code.
-
-6. The top 3 optimized code will be compensated as follows:
-
-- **1st position**: 20GB data plan.
-- **2nd position**: 15GB data plan.
-- **3rd position**: 10GB data plan.
-
-7. Add your scripts and readme.MD file as a folder saved as your full name (surname_first_middle name) by making a pull request to the repository.
-
----
-For issues on this challenge kindly reach out to the AI+campus/city managers
-
-**Twitter**: [@DataScienceNIG](https://twitter.com/DataScienceNIG), [@elishatofunmi](https://twitter.com/Elishatofunmi), [@o_funminiyi](https://twitter.com/o_funminiyi), [@gbganalyst](https://twitter.com/gbganalyst) 
-
-or
-
-**Call**: +2349062000119,+2349080564419.
-
-Good luck!
+The sigmoid function also called the logistic function, gives an ‘S’ shaped curve that can take any real-valued number and map it into a value between 0 and 1. If the curve goes to positive infinity, y predicted will become 1, and if the curve goes to negative infinity, y predicted will become 0. If the output of the sigmoid function is more than 0.5, we can classify the outcome as 1 or YES, and if it is less than 0.5, we can classify it like 0 or NO.