|
1221 | 1221 | }
|
1222 | 1222 | ],
|
1223 | 1223 | "source": [
|
1224 |
| - "#1.4 Combining dataframes using Merge function (Inner Join)\n", |
| 1224 | + "#1.3 Combining dataframes using Merge function (Inner Join)\n", |
1225 | 1225 | "# This bascially is similar to inner join where the two customers who exist in Info didn't appear because they dont have transactions\n",
|
1226 | 1226 | "# As we have name variable in both datasets, on combining python creates automatically name_x and name_y in the final dataset\n",
|
1227 | 1227 | "# If we don't submit how, its inner join by default\n",
|
|
1372 | 1372 | }
|
1373 | 1373 | ],
|
1374 | 1374 | "source": [
|
1375 |
| - "#1.5 Combining dataframes using Merge function (left Join)\n", |
| 1375 | + "#1.4 Combining dataframes using Merge function (left Join)\n", |
1376 | 1376 | "# Now you can see 106 and 107 customers who doesn't have transactions\n",
|
1377 | 1377 | "merge_left=pd.merge(left=Info, right=Trans, how='left',on='Id')\n",
|
1378 | 1378 | "\n",
|
|
1521 | 1521 | }
|
1522 | 1522 | ],
|
1523 | 1523 | "source": [
|
1524 |
| - "#1.6 Combining dataframes using Merge function (right Join)\n", |
| 1524 | + "#1.5 Combining dataframes using Merge function (right Join)\n", |
1525 | 1525 | "\n",
|
1526 | 1526 | "merge_right=pd.merge(left=Info, right=Trans, how='right',on='Id')\n",
|
1527 | 1527 | "merge_right.head()"
|
|
1890 | 1890 | }
|
1891 | 1891 | ],
|
1892 | 1892 | "source": [
|
1893 |
| - "#1.7 Combining dataframes using Merge function (outer Join)\n", |
| 1893 | + "#1.6 Combining dataframes using Merge function (outer Join)\n", |
1894 | 1894 | "\n",
|
1895 | 1895 | "merge_outer=pd.merge(left=Info, right=Trans, how='outer',on='Id')\n",
|
1896 | 1896 | "\n",
|
|
2039 | 2039 | }
|
2040 | 2040 | ],
|
2041 | 2041 | "source": [
|
2042 |
| - "#1.8 Use of suffixes\n", |
| 2042 | + "#1.7 Use of suffixes\n", |
2043 | 2043 | "# If the variable exists in both datasets during merging, python creates them as var_x and var_y by default\n",
|
2044 | 2044 | "#However, if we want to specify the suffix explicitly- we can use the suffix option like below\n",
|
2045 | 2045 | "\n",
|
|
3946 | 3946 | }
|
3947 | 3947 | ],
|
3948 | 3948 | "source": [
|
3949 |
| - "#4.3 Imputation of missing values with mean or any fixed value- Using fillno()\n", |
| 3949 | + "#4.4 Imputation of missing values with mean or any fixed value- Using fillno()\n", |
3950 | 3950 | "\n",
|
3951 | 3951 | "mean_income= Info['Income'].mean()\n",
|
3952 | 3952 | "print(mean_income)\n",
|
|
3962 | 3962 | "metadata": {},
|
3963 | 3963 | "outputs": [],
|
3964 | 3964 | "source": [
|
3965 |
| - "#4.4 Dropping rows/columns which contains missing values\n", |
| 3965 | + "#4.5 Dropping rows/columns which contains missing values\n", |
3966 | 3966 | "\n",
|
3967 | 3967 | "# df.dropna()- Drop rows with missing values\n",
|
3968 | 3968 | "# df.drop(columns_to_drop, axis=1)\n",
|
|
0 commit comments