Add files via upload

jaydipkumar · web-flow · commit 7532fdd493e5 · 2020-05-08T16:19:17.000+05:30
diff --git a/XGBoost.ipynb b/XGBoost.ipynb
@@ -0,0 +1,185 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#import library\n",
+    "\n",
+    "from sklearn.datasets import load_boston\n",
+    "import xgboost as xgb\n",
+    "from sklearn.metrics import mean_squared_error\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "#load Data\n",
+    "boston = load_boston()\n",
+    "print(boston.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(506, 13)\n",
+      "['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'\n",
+      " 'B' 'LSTAT']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(boston.data.shape)\n",
+    "print(boston.feature_names)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#covert data into DataFrame\n",
+    "data = pd.DataFrame(boston.data)\n",
+    "data.columns = boston.feature_names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#set depend and predictor\n",
+    "data['PRICE'] = boston.target\n",
+    "data.describe()\n",
+    "X, y = data.iloc[:,:-1],data.iloc[:,-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
+      "  if getattr(data, 'base', None) is not None and \\\n",
+      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/xgboost/core.py:588: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
+      "  data.base is not None and isinstance(data, np.ndarray) \\\n"
+     ]
+    }
+   ],
+   "source": [
+    "#xgb model\n",
+    "data_dmatrix = xgb.DMatrix(data=X,label=y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#spilt data\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#XGBRegressor model\n",
+    "xg_reg = xgb.XGBRegressor(objective ='reg:linear', colsample_bytree = 0.3, learning_rate = 0.1,\n",
+    "                max_depth = 5, alpha = 10, n_estimators = 10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[16:18:05] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
+     ]
+    }
+   ],
+   "source": [
+    "#fit model\n",
+    "xg_reg.fit(X_train,y_train)\n",
+    "\n",
+    "preds = xg_reg.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RMSE: 10.397587\n"
+     ]
+    }
+   ],
+   "source": [
+    "#check error\n",
+    "rmse = np.sqrt(mean_squared_error(y_test, preds))\n",
+    "print(\"RMSE: %f\" % (rmse))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}