The Algorithms logo
The Algorithms
AboutDonate

Support Vector Machine

t
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm = pd.read_csv('/WA_Fn-UseC_-HR-Employee-Attrition.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.max_columns', None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dateset Information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1470, 35)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EmployeeCount</th>\n",
       "      <th>EmployeeNumber</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StandardHours</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.0</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.0</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>36.923810</td>\n",
       "      <td>802.485714</td>\n",
       "      <td>9.192517</td>\n",
       "      <td>2.912925</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1024.865306</td>\n",
       "      <td>2.721769</td>\n",
       "      <td>65.891156</td>\n",
       "      <td>2.729932</td>\n",
       "      <td>2.063946</td>\n",
       "      <td>2.728571</td>\n",
       "      <td>6502.931293</td>\n",
       "      <td>14313.103401</td>\n",
       "      <td>2.693197</td>\n",
       "      <td>15.209524</td>\n",
       "      <td>3.153741</td>\n",
       "      <td>2.712245</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.793878</td>\n",
       "      <td>11.279592</td>\n",
       "      <td>2.799320</td>\n",
       "      <td>2.761224</td>\n",
       "      <td>7.008163</td>\n",
       "      <td>4.229252</td>\n",
       "      <td>2.187755</td>\n",
       "      <td>4.123129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>9.135373</td>\n",
       "      <td>403.509100</td>\n",
       "      <td>8.106864</td>\n",
       "      <td>1.024165</td>\n",
       "      <td>0.0</td>\n",
       "      <td>602.024335</td>\n",
       "      <td>1.093082</td>\n",
       "      <td>20.329428</td>\n",
       "      <td>0.711561</td>\n",
       "      <td>1.106940</td>\n",
       "      <td>1.102846</td>\n",
       "      <td>4707.956783</td>\n",
       "      <td>7117.786044</td>\n",
       "      <td>2.498009</td>\n",
       "      <td>3.659938</td>\n",
       "      <td>0.360824</td>\n",
       "      <td>1.081209</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.852077</td>\n",
       "      <td>7.780782</td>\n",
       "      <td>1.289271</td>\n",
       "      <td>0.706476</td>\n",
       "      <td>6.126525</td>\n",
       "      <td>3.623137</td>\n",
       "      <td>3.222430</td>\n",
       "      <td>3.568136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>18.000000</td>\n",
       "      <td>102.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1009.000000</td>\n",
       "      <td>2094.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>30.000000</td>\n",
       "      <td>465.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>491.250000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>48.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2911.000000</td>\n",
       "      <td>8047.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>36.000000</td>\n",
       "      <td>802.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1020.500000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>66.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4919.000000</td>\n",
       "      <td>14235.500000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>43.000000</td>\n",
       "      <td>1157.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1555.750000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>83.750000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>8379.000000</td>\n",
       "      <td>20461.500000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>7.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>60.000000</td>\n",
       "      <td>1499.000000</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2068.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>19999.000000</td>\n",
       "      <td>26999.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>17.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Age    DailyRate  DistanceFromHome    Education  EmployeeCount  \\\n",
       "count  1470.000000  1470.000000       1470.000000  1470.000000         1470.0   \n",
       "mean     36.923810   802.485714          9.192517     2.912925            1.0   \n",
       "std       9.135373   403.509100          8.106864     1.024165            0.0   \n",
       "min      18.000000   102.000000          1.000000     1.000000            1.0   \n",
       "25%      30.000000   465.000000          2.000000     2.000000            1.0   \n",
       "50%      36.000000   802.000000          7.000000     3.000000            1.0   \n",
       "75%      43.000000  1157.000000         14.000000     4.000000            1.0   \n",
       "max      60.000000  1499.000000         29.000000     5.000000            1.0   \n",
       "\n",
       "       EmployeeNumber  EnvironmentSatisfaction   HourlyRate  JobInvolvement  \\\n",
       "count     1470.000000              1470.000000  1470.000000     1470.000000   \n",
       "mean      1024.865306                 2.721769    65.891156        2.729932   \n",
       "std        602.024335                 1.093082    20.329428        0.711561   \n",
       "min          1.000000                 1.000000    30.000000        1.000000   \n",
       "25%        491.250000                 2.000000    48.000000        2.000000   \n",
       "50%       1020.500000                 3.000000    66.000000        3.000000   \n",
       "75%       1555.750000                 4.000000    83.750000        3.000000   \n",
       "max       2068.000000                 4.000000   100.000000        4.000000   \n",
       "\n",
       "          JobLevel  JobSatisfaction  MonthlyIncome   MonthlyRate  \\\n",
       "count  1470.000000      1470.000000    1470.000000   1470.000000   \n",
       "mean      2.063946         2.728571    6502.931293  14313.103401   \n",
       "std       1.106940         1.102846    4707.956783   7117.786044   \n",
       "min       1.000000         1.000000    1009.000000   2094.000000   \n",
       "25%       1.000000         2.000000    2911.000000   8047.000000   \n",
       "50%       2.000000         3.000000    4919.000000  14235.500000   \n",
       "75%       3.000000         4.000000    8379.000000  20461.500000   \n",
       "max       5.000000         4.000000   19999.000000  26999.000000   \n",
       "\n",
       "       NumCompaniesWorked  PercentSalaryHike  PerformanceRating  \\\n",
       "count         1470.000000        1470.000000        1470.000000   \n",
       "mean             2.693197          15.209524           3.153741   \n",
       "std              2.498009           3.659938           0.360824   \n",
       "min              0.000000          11.000000           3.000000   \n",
       "25%              1.000000          12.000000           3.000000   \n",
       "50%              2.000000          14.000000           3.000000   \n",
       "75%              4.000000          18.000000           3.000000   \n",
       "max              9.000000          25.000000           4.000000   \n",
       "\n",
       "       RelationshipSatisfaction  StandardHours  StockOptionLevel  \\\n",
       "count               1470.000000         1470.0       1470.000000   \n",
       "mean                   2.712245           80.0          0.793878   \n",
       "std                    1.081209            0.0          0.852077   \n",
       "min                    1.000000           80.0          0.000000   \n",
       "25%                    2.000000           80.0          0.000000   \n",
       "50%                    3.000000           80.0          1.000000   \n",
       "75%                    4.000000           80.0          1.000000   \n",
       "max                    4.000000           80.0          3.000000   \n",
       "\n",
       "       TotalWorkingYears  TrainingTimesLastYear  WorkLifeBalance  \\\n",
       "count        1470.000000            1470.000000      1470.000000   \n",
       "mean           11.279592               2.799320         2.761224   \n",
       "std             7.780782               1.289271         0.706476   \n",
       "min             0.000000               0.000000         1.000000   \n",
       "25%             6.000000               2.000000         2.000000   \n",
       "50%            10.000000               3.000000         3.000000   \n",
       "75%            15.000000               3.000000         3.000000   \n",
       "max            40.000000               6.000000         4.000000   \n",
       "\n",
       "       YearsAtCompany  YearsInCurrentRole  YearsSinceLastPromotion  \\\n",
       "count     1470.000000         1470.000000              1470.000000   \n",
       "mean         7.008163            4.229252                 2.187755   \n",
       "std          6.126525            3.623137                 3.222430   \n",
       "min          0.000000            0.000000                 0.000000   \n",
       "25%          3.000000            2.000000                 0.000000   \n",
       "50%          5.000000            3.000000                 1.000000   \n",
       "75%          9.000000            7.000000                 3.000000   \n",
       "max         40.000000           18.000000                15.000000   \n",
       "\n",
       "       YearsWithCurrManager  \n",
       "count           1470.000000  \n",
       "mean               4.123129  \n",
       "std                3.568136  \n",
       "min                0.000000  \n",
       "25%                2.000000  \n",
       "50%                3.000000  \n",
       "75%                7.000000  \n",
       "max               17.000000  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age  mode:  35\n",
      "Attrition  mode:  No\n",
      "BusinessTravel  mode:  Travel_Rarely\n",
      "DailyRate  mode:  691\n",
      "Department  mode:  Research & Development\n",
      "DistanceFromHome  mode:  2\n",
      "Education  mode:  3\n",
      "EducationField  mode:  Life Sciences\n",
      "EmployeeCount  mode:  1\n",
      "EmployeeNumber  mode:  1\n",
      "EnvironmentSatisfaction  mode:  3\n",
      "Gender  mode:  Male\n",
      "HourlyRate  mode:  66\n",
      "JobInvolvement  mode:  3\n",
      "JobLevel  mode:  1\n",
      "JobRole  mode:  Sales Executive\n",
      "JobSatisfaction  mode:  4\n",
      "MaritalStatus  mode:  Married\n",
      "MonthlyIncome  mode:  2342\n",
      "MonthlyRate  mode:  9150\n",
      "NumCompaniesWorked  mode:  1\n",
      "Over18  mode:  Y\n",
      "OverTime  mode:  No\n",
      "PercentSalaryHike  mode:  11\n",
      "PerformanceRating  mode:  3\n",
      "RelationshipSatisfaction  mode:  3\n",
      "StandardHours  mode:  80\n",
      "StockOptionLevel  mode:  0\n",
      "TotalWorkingYears  mode:  10\n",
      "TrainingTimesLastYear  mode:  2\n",
      "WorkLifeBalance  mode:  3\n",
      "YearsAtCompany  mode:  5\n",
      "YearsInCurrentRole  mode:  2\n",
      "YearsSinceLastPromotion  mode:  0\n",
      "YearsWithCurrManager  mode:  2\n"
     ]
    }
   ],
   "source": [
    "import statistics\n",
    "for i in ibm.columns:\n",
    "    print(i, \" mode: \", statistics.mode(ibm[i]));"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1470 entries, 0 to 1469\n",
      "Data columns (total 35 columns):\n",
      " #   Column                    Non-Null Count  Dtype \n",
      "---  ------                    --------------  ----- \n",
      " 0   Age                       1470 non-null   int64 \n",
      " 1   Attrition                 1470 non-null   object\n",
      " 2   BusinessTravel            1470 non-null   object\n",
      " 3   DailyRate                 1470 non-null   int64 \n",
      " 4   Department                1470 non-null   object\n",
      " 5   DistanceFromHome          1470 non-null   int64 \n",
      " 6   Education                 1470 non-null   int64 \n",
      " 7   EducationField            1470 non-null   object\n",
      " 8   EmployeeCount             1470 non-null   int64 \n",
      " 9   EmployeeNumber            1470 non-null   int64 \n",
      " 10  EnvironmentSatisfaction   1470 non-null   int64 \n",
      " 11  Gender                    1470 non-null   object\n",
      " 12  HourlyRate                1470 non-null   int64 \n",
      " 13  JobInvolvement            1470 non-null   int64 \n",
      " 14  JobLevel                  1470 non-null   int64 \n",
      " 15  JobRole                   1470 non-null   object\n",
      " 16  JobSatisfaction           1470 non-null   int64 \n",
      " 17  MaritalStatus             1470 non-null   object\n",
      " 18  MonthlyIncome             1470 non-null   int64 \n",
      " 19  MonthlyRate               1470 non-null   int64 \n",
      " 20  NumCompaniesWorked        1470 non-null   int64 \n",
      " 21  Over18                    1470 non-null   object\n",
      " 22  OverTime                  1470 non-null   object\n",
      " 23  PercentSalaryHike         1470 non-null   int64 \n",
      " 24  PerformanceRating         1470 non-null   int64 \n",
      " 25  RelationshipSatisfaction  1470 non-null   int64 \n",
      " 26  StandardHours             1470 non-null   int64 \n",
      " 27  StockOptionLevel          1470 non-null   int64 \n",
      " 28  TotalWorkingYears         1470 non-null   int64 \n",
      " 29  TrainingTimesLastYear     1470 non-null   int64 \n",
      " 30  WorkLifeBalance           1470 non-null   int64 \n",
      " 31  YearsAtCompany            1470 non-null   int64 \n",
      " 32  YearsInCurrentRole        1470 non-null   int64 \n",
      " 33  YearsSinceLastPromotion   1470 non-null   int64 \n",
      " 34  YearsWithCurrManager      1470 non-null   int64 \n",
      "dtypes: int64(26), object(9)\n",
      "memory usage: 402.1+ KB\n"
     ]
    }
   ],
   "source": [
    "ibm.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm.drop(columns = 'EmployeeCount', inplace = True)\n",
    "ibm.drop(columns = 'EmployeeNumber', inplace = True)\n",
    "ibm.drop(columns = 'Over18', inplace = True)\n",
    "ibm.drop(columns = 'StandardHours', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Attrition</th>\n",
       "      <th>BusinessTravel</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>Department</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EducationField</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>Gender</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobRole</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>1102</td>\n",
       "      <td>Sales</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>2</td>\n",
       "      <td>Female</td>\n",
       "      <td>94</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>Sales Executive</td>\n",
       "      <td>4</td>\n",
       "      <td>Single</td>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>8</td>\n",
       "      <td>Yes</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>279</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>3</td>\n",
       "      <td>Male</td>\n",
       "      <td>61</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Research Scientist</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>1</td>\n",
       "      <td>No</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>1373</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Other</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>92</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>3</td>\n",
       "      <td>Single</td>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>6</td>\n",
       "      <td>Yes</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>1392</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>4</td>\n",
       "      <td>Female</td>\n",
       "      <td>56</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Research Scientist</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1</td>\n",
       "      <td>Yes</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>27</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>591</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Medical</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>9</td>\n",
       "      <td>No</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>36</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>884</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>Medical</td>\n",
       "      <td>3</td>\n",
       "      <td>Male</td>\n",
       "      <td>41</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>4</td>\n",
       "      <td>Married</td>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>4</td>\n",
       "      <td>No</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>39</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>613</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>Medical</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>Healthcare Representative</td>\n",
       "      <td>1</td>\n",
       "      <td>Married</td>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>4</td>\n",
       "      <td>No</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>27</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>155</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>87</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Manufacturing Director</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>1</td>\n",
       "      <td>Yes</td>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>49</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>1023</td>\n",
       "      <td>Sales</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>Medical</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>63</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Sales Executive</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>2</td>\n",
       "      <td>No</td>\n",
       "      <td>14</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>34</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>628</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>Medical</td>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>82</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>2</td>\n",
       "      <td>No</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1470 rows × 31 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Age Attrition     BusinessTravel  DailyRate              Department  \\\n",
       "0      41       Yes      Travel_Rarely       1102                   Sales   \n",
       "1      49        No  Travel_Frequently        279  Research & Development   \n",
       "2      37       Yes      Travel_Rarely       1373  Research & Development   \n",
       "3      33        No  Travel_Frequently       1392  Research & Development   \n",
       "4      27        No      Travel_Rarely        591  Research & Development   \n",
       "...   ...       ...                ...        ...                     ...   \n",
       "1465   36        No  Travel_Frequently        884  Research & Development   \n",
       "1466   39        No      Travel_Rarely        613  Research & Development   \n",
       "1467   27        No      Travel_Rarely        155  Research & Development   \n",
       "1468   49        No  Travel_Frequently       1023                   Sales   \n",
       "1469   34        No      Travel_Rarely        628  Research & Development   \n",
       "\n",
       "      DistanceFromHome  Education EducationField  EnvironmentSatisfaction  \\\n",
       "0                    1          2  Life Sciences                        2   \n",
       "1                    8          1  Life Sciences                        3   \n",
       "2                    2          2          Other                        4   \n",
       "3                    3          4  Life Sciences                        4   \n",
       "4                    2          1        Medical                        1   \n",
       "...                ...        ...            ...                      ...   \n",
       "1465                23          2        Medical                        3   \n",
       "1466                 6          1        Medical                        4   \n",
       "1467                 4          3  Life Sciences                        2   \n",
       "1468                 2          3        Medical                        4   \n",
       "1469                 8          3        Medical                        2   \n",
       "\n",
       "      Gender  HourlyRate  JobInvolvement  JobLevel                    JobRole  \\\n",
       "0     Female          94               3         2            Sales Executive   \n",
       "1       Male          61               2         2         Research Scientist   \n",
       "2       Male          92               2         1      Laboratory Technician   \n",
       "3     Female          56               3         1         Research Scientist   \n",
       "4       Male          40               3         1      Laboratory Technician   \n",
       "...      ...         ...             ...       ...                        ...   \n",
       "1465    Male          41               4         2      Laboratory Technician   \n",
       "1466    Male          42               2         3  Healthcare Representative   \n",
       "1467    Male          87               4         2     Manufacturing Director   \n",
       "1468    Male          63               2         2            Sales Executive   \n",
       "1469    Male          82               4         2      Laboratory Technician   \n",
       "\n",
       "      JobSatisfaction MaritalStatus  MonthlyIncome  MonthlyRate  \\\n",
       "0                   4        Single           5993        19479   \n",
       "1                   2       Married           5130        24907   \n",
       "2                   3        Single           2090         2396   \n",
       "3                   3       Married           2909        23159   \n",
       "4                   2       Married           3468        16632   \n",
       "...               ...           ...            ...          ...   \n",
       "1465                4       Married           2571        12290   \n",
       "1466                1       Married           9991        21457   \n",
       "1467                2       Married           6142         5174   \n",
       "1468                2       Married           5390        13243   \n",
       "1469                3       Married           4404        10228   \n",
       "\n",
       "      NumCompaniesWorked OverTime  PercentSalaryHike  PerformanceRating  \\\n",
       "0                      8      Yes                 11                  3   \n",
       "1                      1       No                 23                  4   \n",
       "2                      6      Yes                 15                  3   \n",
       "3                      1      Yes                 11                  3   \n",
       "4                      9       No                 12                  3   \n",
       "...                  ...      ...                ...                ...   \n",
       "1465                   4       No                 17                  3   \n",
       "1466                   4       No                 15                  3   \n",
       "1467                   1      Yes                 20                  4   \n",
       "1468                   2       No                 14                  3   \n",
       "1469                   2       No                 12                  3   \n",
       "\n",
       "      RelationshipSatisfaction  StockOptionLevel  TotalWorkingYears  \\\n",
       "0                            1                 0                  8   \n",
       "1                            4                 1                 10   \n",
       "2                            2                 0                  7   \n",
       "3                            3                 0                  8   \n",
       "4                            4                 1                  6   \n",
       "...                        ...               ...                ...   \n",
       "1465                         3                 1                 17   \n",
       "1466                         1                 1                  9   \n",
       "1467                         2                 1                  6   \n",
       "1468                         4                 0                 17   \n",
       "1469                         1                 0                  6   \n",
       "\n",
       "      TrainingTimesLastYear  WorkLifeBalance  YearsAtCompany  \\\n",
       "0                         0                1               6   \n",
       "1                         3                3              10   \n",
       "2                         3                3               0   \n",
       "3                         3                3               8   \n",
       "4                         3                3               2   \n",
       "...                     ...              ...             ...   \n",
       "1465                      3                3               5   \n",
       "1466                      5                3               7   \n",
       "1467                      0                3               6   \n",
       "1468                      3                2               9   \n",
       "1469                      3                4               4   \n",
       "\n",
       "      YearsInCurrentRole  YearsSinceLastPromotion  YearsWithCurrManager  \n",
       "0                      4                        0                     5  \n",
       "1                      7                        1                     7  \n",
       "2                      0                        0                     0  \n",
       "3                      7                        3                     0  \n",
       "4                      2                        2                     2  \n",
       "...                  ...                      ...                   ...  \n",
       "1465                   2                        0                     3  \n",
       "1466                   7                        1                     7  \n",
       "1467                   2                        0                     3  \n",
       "1468                   6                        0                     8  \n",
       "1469                   3                        1                     2  \n",
       "\n",
       "[1470 rows x 31 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Age                         0\n",
       "Attrition                   0\n",
       "BusinessTravel              0\n",
       "DailyRate                   0\n",
       "Department                  0\n",
       "DistanceFromHome            0\n",
       "Education                   0\n",
       "EducationField              0\n",
       "EnvironmentSatisfaction     0\n",
       "Gender                      0\n",
       "HourlyRate                  0\n",
       "JobInvolvement              0\n",
       "JobLevel                    0\n",
       "JobRole                     0\n",
       "JobSatisfaction             0\n",
       "MaritalStatus               0\n",
       "MonthlyIncome               0\n",
       "MonthlyRate                 0\n",
       "NumCompaniesWorked          0\n",
       "OverTime                    0\n",
       "PercentSalaryHike           0\n",
       "PerformanceRating           0\n",
       "RelationshipSatisfaction    0\n",
       "StockOptionLevel            0\n",
       "TotalWorkingYears           0\n",
       "TrainingTimesLastYear       0\n",
       "WorkLifeBalance             0\n",
       "YearsAtCompany              0\n",
       "YearsInCurrentRole          0\n",
       "YearsSinceLastPromotion     0\n",
       "YearsWithCurrManager        0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Attrition (0 - No, 1 - Yes)\n",
    "ibm.replace({'Attrition' : {'Yes': 1, 'No': 0}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace BusinessTravel (0 - Non-Travel, 1 - Travel_Rarely, 2 - Travel_Frequently)\n",
    "ibm.replace({'BusinessTravel' : {'Non-Travel': 0, 'Travel_Rarely': 1, 'Travel_Frequently': 2}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Department\n",
    "dummy = pd.get_dummies(ibm['Department'])\n",
    "ibm.insert(5,'Dp_Sales&Development', dummy['Research & Development'])\n",
    "ibm.insert(6,'Dp_Sales', dummy['Sales'])\n",
    "ibm.insert(7,'Dp_HumanResources', dummy['Human Resources'])\n",
    "\n",
    "ibm.drop(columns = 'Department', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "#EducationField\n",
    "dummy = pd.get_dummies(ibm['EducationField'])\n",
    "ibm.insert(11,'EF_Life Sciences',dummy['Life Sciences'])\n",
    "ibm.insert(12,'EF_Medical',dummy['Medical'])\n",
    "ibm.insert(13,'EF_Marketing',dummy['Marketing'])\n",
    "ibm.insert(14,'EF_TechnicalDegree',dummy['Technical Degree'])\n",
    "ibm.insert(15,'EF_HumanResources',dummy['Human Resources'])\n",
    "ibm.insert(16,'EF_Other',dummy['Other'])\n",
    "\n",
    "ibm.drop(columns = 'EducationField', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Gender (0 - Male; 1 - Female)\n",
    "ibm.replace({'Gender': {'Male': 0, 'Female': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Job role dummy variables\n",
    "dummy=pd.get_dummies(ibm['JobRole'])\n",
    "ibm.insert(23, 'JR_HealthcareRepresentive', dummy['Healthcare Representative'])\n",
    "ibm.insert(24, 'JR_HumanResource', dummy['Human Resources'])\n",
    "ibm.insert(25, 'JR_LaboratoryTechnician', dummy['Laboratory Technician'])\n",
    "ibm.insert(26, 'JR_Manager', dummy['Manager'])\n",
    "ibm.insert(27, 'JR_ManufacturingDirector', dummy['Manufacturing Director'])\n",
    "ibm.insert(28, 'JR_ResearchDirector', dummy['Research Director'])\n",
    "ibm.insert(29, 'JR_ResearchScientist', dummy['Research Scientist'])\n",
    "ibm.insert(30, 'JR_SalesExecutive', dummy['Sales Executive'])\n",
    "ibm.insert(31, 'JR_SalesRepresentative', dummy['Sales Representative'])\n",
    "\n",
    "ibm.drop(columns = 'JobRole', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MaritalStatus role dummy variables\n",
    "dummy=pd.get_dummies(ibm['MaritalStatus'])\n",
    "ibm.insert(34, 'MS_Married', dummy['Married'])\n",
    "ibm.insert(35, 'MS_Single', dummy['Single'])\n",
    "ibm.insert(36, 'MS_Divorced', dummy['Divorced'])\n",
    "\n",
    "ibm.drop(columns = 'MaritalStatus', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Overtime (0 - No; 1 - Yes)\n",
    "ibm.replace({'OverTime': {'No': 0, 'Yes': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Over18 (0 - N; 1 - Y)\n",
    "ibm.replace({'Over18': {'N': 0, 'Y': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def iqr_outliers(data):\n",
    "    out=[]\n",
    "    \n",
    "    firstQuartile = data.quantile(0.25)\n",
    "    thirdQuartile = data.quantile(0.75)\n",
    "    \n",
    "    iqr = thirdQuartile-firstQuartile\n",
    "    \n",
    "    Lower_bound = firstQuartile - 1.5 * iqr\n",
    "    Upper_bound = thirdQuartile + 1.5 * iqr\n",
    "    \n",
    "    for i in data:\n",
    "        if i > Upper_bound or i < Lower_bound:\n",
    "            out.append(i)\n",
    "            \n",
    "    print(\"Outliers:\",out , \"\\nCount: \", len(out), \"\\n\")\n",
    "    return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Attrition\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  237 \n",
      "\n",
      "BusinessTravel\n",
      "Outliers: [2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 2, 2] \n",
      "Count:  427 \n",
      "\n",
      "DailyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_Sales&Development\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_Sales\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_HumanResources\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  63 \n",
      "\n",
      "DistanceFromHome\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Education\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EnvironmentSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Life Sciences\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Medical\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Marketing\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  159 \n",
      "\n",
      "EF_TechnicalDegree\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  132 \n",
      "\n",
      "EF_HumanResources\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  27 \n",
      "\n",
      "EF_Other\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  82 \n",
      "\n",
      "Gender\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "HourlyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobInvolvement\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobLevel\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JR_HealthcareRepresentive\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  131 \n",
      "\n",
      "JR_HumanResource\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  52 \n",
      "\n",
      "JR_LaboratoryTechnician\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  259 \n",
      "\n",
      "JR_Manager\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  102 \n",
      "\n",
      "JR_ManufacturingDirector\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  145 \n",
      "\n",
      "JR_ResearchDirector\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  80 \n",
      "\n",
      "JR_ResearchScientist\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  292 \n",
      "\n",
      "JR_SalesExecutive\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  326 \n",
      "\n",
      "JR_SalesRepresentative\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  83 \n",
      "\n",
      "MonthlyIncome\n",
      "Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] \n",
      "Count:  114 \n",
      "\n",
      "MonthlyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "NumCompaniesWorked\n",
      "Outliers: [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9] \n",
      "Count:  52 \n",
      "\n",
      "MS_Married\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "MS_Single\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "MS_Divorced\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  327 \n",
      "\n",
      "OverTime\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "PercentSalaryHike\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "PerformanceRating\n",
      "Outliers: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] \n",
      "Count:  226 \n",
      "\n",
      "RelationshipSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "StockOptionLevel\n",
      "Outliers: [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] \n",
      "Count:  85 \n",
      "\n",
      "TotalWorkingYears\n",
      "Outliers: [31, 29, 37, 38, 30, 40, 36, 34, 32, 33, 37, 30, 36, 31, 33, 32, 37, 31, 32, 32, 30, 34, 30, 40, 29, 35, 31, 33, 31, 29, 32, 30, 33, 30, 29, 31, 32, 33, 36, 34, 31, 36, 33, 31, 29, 33, 29, 32, 31, 35, 29, 32, 34, 36, 32, 30, 36, 29, 34, 37, 29, 29, 35] \n",
      "Count:  63 \n",
      "\n",
      "TrainingTimesLastYear\n",
      "Outliers: [0, 5, 5, 5, 6, 5, 5, 5, 6, 6, 0, 0, 0, 5, 0, 5, 5, 5, 6, 6, 5, 0, 6, 5, 5, 0, 5, 5, 6, 5, 5, 5, 0, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 0, 0, 5, 5, 5, 6, 6, 5, 0, 5, 0, 5, 5, 0, 6, 0, 5, 5, 6, 6, 5, 6, 5, 0, 5, 5, 5, 5, 0, 6, 5, 5, 5, 5, 6, 5, 5, 6, 5, 5, 5, 0, 5, 0, 5, 5, 6, 5, 6, 5, 0, 5, 5, 0, 6, 6, 5, 6, 0, 5, 0, 6, 6, 6, 6, 5, 5, 0, 5, 0, 0, 6, 0, 6, 5, 6, 5, 5, 0, 5, 6, 6, 5, 5, 0, 0, 6, 0, 0, 5, 0, 5, 6, 5, 5, 6, 6, 5, 5, 5, 5, 5, 6, 5, 6, 6, 0, 6, 6, 5, 5, 0, 0, 6, 6, 0, 5, 0, 0, 0, 0, 0, 5, 5, 6, 5, 5, 0, 5, 5, 0, 5, 5, 6, 5, 5, 5, 6, 5, 5, 5, 0, 0, 5, 5, 5, 5, 6, 0, 0, 6, 6, 6, 6, 5, 5, 5, 6, 5, 0, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 0, 5, 5, 5, 5, 5, 0, 0, 0, 6, 5, 6, 6, 5, 6, 0, 6, 6, 5, 6, 6, 5, 5, 5, 0] \n",
      "Count:  238 \n",
      "\n",
      "WorkLifeBalance\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "YearsAtCompany\n",
      "Outliers: [25, 22, 22, 27, 21, 22, 37, 25, 20, 40, 20, 24, 20, 24, 33, 20, 19, 22, 33, 24, 19, 21, 20, 36, 20, 20, 22, 24, 21, 21, 25, 21, 29, 20, 27, 20, 31, 32, 20, 20, 21, 22, 22, 34, 24, 26, 31, 20, 31, 26, 19, 21, 21, 32, 21, 19, 20, 22, 20, 21, 26, 20, 22, 24, 33, 29, 25, 21, 19, 19, 20, 19, 33, 19, 19, 20, 20, 20, 20, 20, 32, 20, 21, 33, 36, 26, 30, 22, 23, 23, 21, 21, 22, 22, 19, 22, 19, 22, 20, 20, 20, 22, 20, 20] \n",
      "Count:  104 \n",
      "\n",
      "YearsInCurrentRole\n",
      "Outliers: [15, 16, 18, 15, 18, 17, 16, 15, 16, 15, 16, 16, 15, 16, 17, 15, 15, 15, 17, 17, 16] \n",
      "Count:  21 \n",
      "\n",
      "YearsSinceLastPromotion\n",
      "Outliers: [8, 15, 8, 8, 9, 13, 12, 10, 11, 9, 12, 15, 15, 15, 9, 11, 11, 9, 12, 11, 15, 11, 10, 9, 11, 9, 8, 11, 11, 8, 13, 9, 9, 12, 10, 11, 15, 13, 9, 11, 10, 8, 8, 11, 9, 11, 12, 11, 14, 13, 14, 8, 11, 15, 10, 11, 11, 15, 11, 13, 11, 13, 15, 8, 13, 15, 11, 14, 15, 15, 9, 11, 9, 8, 9, 15, 11, 12, 9, 8, 10, 14, 8, 13, 13, 12, 14, 8, 8, 8, 14, 14, 8, 12, 13, 14, 14, 12, 11, 8, 11, 9, 12, 8, 9, 11, 9] \n",
      "Count:  107 \n",
      "\n",
      "YearsWithCurrManager\n",
      "Outliers: [17, 15, 15, 15, 15, 17, 16, 17, 15, 17, 17, 17, 17, 16] \n",
      "Count:  14 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "for c_name in ibm.columns:\n",
    "    print (c_name)\n",
    "    iqr_outliers(ibm[c_name])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def remove_outliers(c_name):\n",
    "    outliers = iqr_outliers(ibm[c_name])\n",
    "\n",
    "    while (len(outliers)!=0):\n",
    "        for i in outliers:\n",
    "            ibm.drop(ibm.loc[ibm[c_name]==i].index, inplace = True)\n",
    "        outliers = iqr_outliers(ibm[c_name])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] \n",
      "Count:  114 \n",
      "\n",
      "Outliers: [15427, 13458, 14756, 13245, 13664, 13503, 13549, 13872, 13734, 13591, 16064, 13675, 13496, 13603, 13525, 16015, 13964, 15992, 14336, 13212, 16555, 14118, 13610, 13237, 16184, 15402, 14814, 13770, 16307, 13826, 14275, 13582, 14852, 13194, 13973, 13726, 13320, 13120, 13499, 13758, 13191, 16124, 13577, 14026, 13142, 13695, 13402, 13247, 14732, 16422, 13757, 16032, 16328, 14411, 16437, 15202, 16413, 13269, 13966, 15972, 15379, 12936, 12965, 13116, 13464, 16291, 15787, 13225, 13348, 13341, 13206, 13744, 13570] \n",
      "Count:  73 \n",
      "\n",
      "Outliers: [11994, 12490, 12185, 11849, 11996, 12061, 11878, 12504, 11935, 12808, 11836, 12742, 11904, 12169, 11916, 11957, 12031] \n",
      "Count:  17 \n",
      "\n",
      "Outliers: [11713, 11691] \n",
      "Count:  2 \n",
      "\n",
      "Outliers: [11631] \n",
      "Count:  1 \n",
      "\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "remove_outliers('MonthlyIncome')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Attrition</th>\n",
       "      <th>BusinessTravel</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>Dp_Sales&amp;Development</th>\n",
       "      <th>Dp_Sales</th>\n",
       "      <th>Dp_HumanResources</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>EF_Life Sciences</th>\n",
       "      <th>EF_Medical</th>\n",
       "      <th>EF_Marketing</th>\n",
       "      <th>EF_TechnicalDegree</th>\n",
       "      <th>EF_HumanResources</th>\n",
       "      <th>EF_Other</th>\n",
       "      <th>Gender</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>JR_HealthcareRepresentive</th>\n",
       "      <th>JR_HumanResource</th>\n",
       "      <th>JR_LaboratoryTechnician</th>\n",
       "      <th>JR_Manager</th>\n",
       "      <th>JR_ManufacturingDirector</th>\n",
       "      <th>JR_ResearchDirector</th>\n",
       "      <th>JR_ResearchScientist</th>\n",
       "      <th>JR_SalesExecutive</th>\n",
       "      <th>JR_SalesRepresentative</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>MS_Married</th>\n",
       "      <th>MS_Single</th>\n",
       "      <th>MS_Divorced</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1102</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>94</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>279</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>61</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1373</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>92</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1392</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>56</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>591</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>884</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>41</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>613</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>155</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>87</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1023</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>63</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>34</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>628</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1263 rows × 48 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Age  Attrition  BusinessTravel  DailyRate  Dp_Sales&Development  \\\n",
       "0      41          1               1       1102                     0   \n",
       "1      49          0               2        279                     1   \n",
       "2      37          1               1       1373                     1   \n",
       "3      33          0               2       1392                     1   \n",
       "4      27          0               1        591                     1   \n",
       "...   ...        ...             ...        ...                   ...   \n",
       "1465   36          0               2        884                     1   \n",
       "1466   39          0               1        613                     1   \n",
       "1467   27          0               1        155                     1   \n",
       "1468   49          0               2       1023                     0   \n",
       "1469   34          0               1        628                     1   \n",
       "\n",
       "      Dp_Sales  Dp_HumanResources  DistanceFromHome  Education  \\\n",
       "0            1                  0                 1          2   \n",
       "1            0                  0                 8          1   \n",
       "2            0                  0                 2          2   \n",
       "3            0                  0                 3          4   \n",
       "4            0                  0                 2          1   \n",
       "...        ...                ...               ...        ...   \n",
       "1465         0                  0                23          2   \n",
       "1466         0                  0                 6          1   \n",
       "1467         0                  0                 4          3   \n",
       "1468         1                  0                 2          3   \n",
       "1469         0                  0                 8          3   \n",
       "\n",
       "      EnvironmentSatisfaction  EF_Life Sciences  EF_Medical  EF_Marketing  \\\n",
       "0                           2                 1           0             0   \n",
       "1                           3                 1           0             0   \n",
       "2                           4                 0           0             0   \n",
       "3                           4                 1           0             0   \n",
       "4                           1                 0           1             0   \n",
       "...                       ...               ...         ...           ...   \n",
       "1465                        3                 0           1             0   \n",
       "1466                        4                 0           1             0   \n",
       "1467                        2                 1           0             0   \n",
       "1468                        4                 0           1             0   \n",
       "1469                        2                 0           1             0   \n",
       "\n",
       "      EF_TechnicalDegree  EF_HumanResources  EF_Other  Gender  HourlyRate  \\\n",
       "0                      0                  0         0       1          94   \n",
       "1                      0                  0         0       0          61   \n",
       "2                      0                  0         1       0          92   \n",
       "3                      0                  0         0       1          56   \n",
       "4                      0                  0         0       0          40   \n",
       "...                  ...                ...       ...     ...         ...   \n",
       "1465                   0                  0         0       0          41   \n",
       "1466                   0                  0         0       0          42   \n",
       "1467                   0                  0         0       0          87   \n",
       "1468                   0                  0         0       0          63   \n",
       "1469                   0                  0         0       0          82   \n",
       "\n",
       "      JobInvolvement  JobLevel  JobSatisfaction  JR_HealthcareRepresentive  \\\n",
       "0                  3         2                4                          0   \n",
       "1                  2         2                2                          0   \n",
       "2                  2         1                3                          0   \n",
       "3                  3         1                3                          0   \n",
       "4                  3         1                2                          0   \n",
       "...              ...       ...              ...                        ...   \n",
       "1465               4         2                4                          0   \n",
       "1466               2         3                1                          1   \n",
       "1467               4         2                2                          0   \n",
       "1468               2         2                2                          0   \n",
       "1469               4         2                3                          0   \n",
       "\n",
       "      JR_HumanResource  JR_LaboratoryTechnician  JR_Manager  \\\n",
       "0                    0                        0           0   \n",
       "1                    0                        0           0   \n",
       "2                    0                        1           0   \n",
       "3                    0                        0           0   \n",
       "4                    0                        1           0   \n",
       "...                ...                      ...         ...   \n",
       "1465                 0                        1           0   \n",
       "1466                 0                        0           0   \n",
       "1467                 0                        0           0   \n",
       "1468                 0                        0           0   \n",
       "1469                 0                        1           0   \n",
       "\n",
       "      JR_ManufacturingDirector  JR_ResearchDirector  JR_ResearchScientist  \\\n",
       "0                            0                    0                     0   \n",
       "1                            0                    0                     1   \n",
       "2                            0                    0                     0   \n",
       "3                            0                    0                     1   \n",
       "4                            0                    0                     0   \n",
       "...                        ...                  ...                   ...   \n",
       "1465                         0                    0                     0   \n",
       "1466                         0                    0                     0   \n",
       "1467                         1                    0                     0   \n",
       "1468                         0                    0                     0   \n",
       "1469                         0                    0                     0   \n",
       "\n",
       "      JR_SalesExecutive  JR_SalesRepresentative  MonthlyIncome  MonthlyRate  \\\n",
       "0                     1                       0           5993        19479   \n",
       "1                     0                       0           5130        24907   \n",
       "2                     0                       0           2090         2396   \n",
       "3                     0                       0           2909        23159   \n",
       "4                     0                       0           3468        16632   \n",
       "...                 ...                     ...            ...          ...   \n",
       "1465                  0                       0           2571        12290   \n",
       "1466                  0                       0           9991        21457   \n",
       "1467                  0                       0           6142         5174   \n",
       "1468                  1                       0           5390        13243   \n",
       "1469                  0                       0           4404        10228   \n",
       "\n",
       "      NumCompaniesWorked  MS_Married  MS_Single  MS_Divorced  OverTime  \\\n",
       "0                      8           0          1            0         1   \n",
       "1                      1           1          0            0         0   \n",
       "2                      6           0          1            0         1   \n",
       "3                      1           1          0            0         1   \n",
       "4                      9           1          0            0         0   \n",
       "...                  ...         ...        ...          ...       ...   \n",
       "1465                   4           1          0            0         0   \n",
       "1466                   4           1          0            0         0   \n",
       "1467                   1           1          0            0         1   \n",
       "1468                   2           1          0            0         0   \n",
       "1469                   2           1          0            0         0   \n",
       "\n",
       "      PercentSalaryHike  PerformanceRating  RelationshipSatisfaction  \\\n",
       "0                    11                  3                         1   \n",
       "1                    23                  4                         4   \n",
       "2                    15                  3                         2   \n",
       "3                    11                  3                         3   \n",
       "4                    12                  3                         4   \n",
       "...                 ...                ...                       ...   \n",
       "1465                 17                  3                         3   \n",
       "1466                 15                  3                         1   \n",
       "1467                 20                  4                         2   \n",
       "1468                 14                  3                         4   \n",
       "1469                 12                  3                         1   \n",
       "\n",
       "      StockOptionLevel  TotalWorkingYears  TrainingTimesLastYear  \\\n",
       "0                    0                  8                      0   \n",
       "1                    1                 10                      3   \n",
       "2                    0                  7                      3   \n",
       "3                    0                  8                      3   \n",
       "4                    1                  6                      3   \n",
       "...                ...                ...                    ...   \n",
       "1465                 1                 17                      3   \n",
       "1466                 1                  9                      5   \n",
       "1467                 1                  6                      0   \n",
       "1468                 0                 17                      3   \n",
       "1469                 0                  6                      3   \n",
       "\n",
       "      WorkLifeBalance  YearsAtCompany  YearsInCurrentRole  \\\n",
       "0                   1               6                   4   \n",
       "1                   3              10                   7   \n",
       "2                   3               0                   0   \n",
       "3                   3               8                   7   \n",
       "4                   3               2                   2   \n",
       "...               ...             ...                 ...   \n",
       "1465                3               5                   2   \n",
       "1466                3               7                   7   \n",
       "1467                3               6                   2   \n",
       "1468                2               9                   6   \n",
       "1469                4               4                   3   \n",
       "\n",
       "      YearsSinceLastPromotion  YearsWithCurrManager  \n",
       "0                           0                     5  \n",
       "1                           1                     7  \n",
       "2                           0                     0  \n",
       "3                           3                     0  \n",
       "4                           2                     2  \n",
       "...                       ...                   ...  \n",
       "1465                        0                     3  \n",
       "1466                        1                     7  \n",
       "1467                        0                     3  \n",
       "1468                        0                     8  \n",
       "1469                        1                     2  \n",
       "\n",
       "[1263 rows x 48 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Classification"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Support Vector Machine (prepared by Teh Liang Sean) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import important library to do SVM\n",
    "from sklearn import svm\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "#The target for SVM will be the attrition of IBM employees to know whether the employees will continue stay or leave IBM\n",
    "x_svm_find = ibm.drop(columns = 'Attrition')\n",
    "y_svm = ibm['Attrition']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                     Features         Score\n",
      "29              MonthlyIncome  26471.159476\n",
      "30                MonthlyRate   1308.443569\n",
      "2                   DailyRate   1111.594737\n",
      "44         YearsInCurrentRole    109.263859\n",
      "43             YearsAtCompany    103.805057\n",
      "46       YearsWithCurrManager    100.636711\n",
      "40          TotalWorkingYears     95.843571\n",
      "35                   OverTime     60.367656\n",
      "6            DistanceFromHome     57.197704\n",
      "0                         Age     46.705340\n",
      "28     JR_SalesRepresentative     27.299127\n",
      "33                  MS_Single     26.251695\n",
      "39           StockOptionLevel     24.376114\n",
      "20  JR_HealthcareRepresentive     10.935616\n",
      "24   JR_ManufacturingDirector      9.987076\n"
     ]
    }
   ],
   "source": [
    "# Try use SelectKBest and chi-squared (chi²) statistical test for non-negative feature to find top 15 best features\n",
    "#Import library\n",
    "from sklearn.feature_selection import SelectKBest\n",
    "from sklearn.feature_selection import chi2\n",
    "#Use SelectKBest class to find top 15 best features\n",
    "best_15_features = SelectKBest(score_func=chi2, k=15)\n",
    "fit = best_15_features.fit(x_svm_find,y_svm)\n",
    "dfscores = pd.DataFrame(fit.scores_)\n",
    "dfcolumns = pd.DataFrame(x_svm_find.columns)\n",
    "#Try to concat two dataframes for a better visualization \n",
    "top_15_feature_scores = pd.concat([dfcolumns,dfscores],axis=1)\n",
    "#Name the dataframe columns\n",
    "top_15_feature_scores.columns = ['Features','Score']  \n",
    "#Show 15 best features\n",
    "print(top_15_feature_scores.nlargest(15,'Score'))  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm_svm_features_df = pd.DataFrame()\n",
    "# Set up data to do SVM using top 15 best features identified\n",
    "ibm_svm_features_df.insert(0,'MonthlyIncome',ibm['MonthlyIncome'])\n",
    "ibm_svm_features_df.insert(1,'MonthlyRate',ibm['MonthlyRate'])\n",
    "ibm_svm_features_df.insert(2,'DailyRate',ibm['DailyRate'])\n",
    "ibm_svm_features_df.insert(3,'YearsInCurrentRole',ibm['YearsInCurrentRole'])\n",
    "ibm_svm_features_df.insert(4,'YearsAtCompany',ibm['YearsAtCompany'])\n",
    "ibm_svm_features_df.insert(5,'YearsWithCurrManager',ibm['YearsWithCurrManager'])\n",
    "ibm_svm_features_df.insert(6,'TotalWorkingYears',ibm['TotalWorkingYears'])\n",
    "ibm_svm_features_df.insert(7,'OverTime',ibm['OverTime'])\n",
    "ibm_svm_features_df.insert(8,'DistanceFromHome',ibm['DistanceFromHome'])\n",
    "ibm_svm_features_df.insert(9,'Age',ibm['Age'])\n",
    "ibm_svm_features_df.insert(10,'JR_SalesRepresentative',ibm['JR_SalesRepresentative'])\n",
    "ibm_svm_features_df.insert(11,'MS_Single',ibm['MS_Single'])\n",
    "ibm_svm_features_df.insert(12,'StockOptionLevel',ibm['StockOptionLevel'])\n",
    "ibm_svm_features_df.insert(13,'JR_HealthcareRepresentive ',ibm['JR_HealthcareRepresentive'])\n",
    "ibm_svm_features_df.insert(14,'JR_ManufacturingDirector',ibm['JR_ManufacturingDirector'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Age</th>\n",
       "      <th>JR_SalesRepresentative</th>\n",
       "      <th>MS_Single</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>JR_HealthcareRepresentive</th>\n",
       "      <th>JR_ManufacturingDirector</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>1102</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>41</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>279</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>1373</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>37</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1392</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>33</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>591</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>884</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>613</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>155</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>1023</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>628</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>34</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1263 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      MonthlyIncome  MonthlyRate  DailyRate  YearsInCurrentRole  \\\n",
       "0              5993        19479       1102                   4   \n",
       "1              5130        24907        279                   7   \n",
       "2              2090         2396       1373                   0   \n",
       "3              2909        23159       1392                   7   \n",
       "4              3468        16632        591                   2   \n",
       "...             ...          ...        ...                 ...   \n",
       "1465           2571        12290        884                   2   \n",
       "1466           9991        21457        613                   7   \n",
       "1467           6142         5174        155                   2   \n",
       "1468           5390        13243       1023                   6   \n",
       "1469           4404        10228        628                   3   \n",
       "\n",
       "      YearsAtCompany  YearsWithCurrManager  TotalWorkingYears  OverTime  \\\n",
       "0                  6                     5                  8         1   \n",
       "1                 10                     7                 10         0   \n",
       "2                  0                     0                  7         1   \n",
       "3                  8                     0                  8         1   \n",
       "4                  2                     2                  6         0   \n",
       "...              ...                   ...                ...       ...   \n",
       "1465               5                     3                 17         0   \n",
       "1466               7                     7                  9         0   \n",
       "1467               6                     3                  6         1   \n",
       "1468               9                     8                 17         0   \n",
       "1469               4                     2                  6         0   \n",
       "\n",
       "      DistanceFromHome  Age  JR_SalesRepresentative  MS_Single  \\\n",
       "0                    1   41                       0          1   \n",
       "1                    8   49                       0          0   \n",
       "2                    2   37                       0          1   \n",
       "3                    3   33                       0          0   \n",
       "4                    2   27                       0          0   \n",
       "...                ...  ...                     ...        ...   \n",
       "1465                23   36                       0          0   \n",
       "1466                 6   39                       0          0   \n",
       "1467                 4   27                       0          0   \n",
       "1468                 2   49                       0          0   \n",
       "1469                 8   34                       0          0   \n",
       "\n",
       "      StockOptionLevel  JR_HealthcareRepresentive   JR_ManufacturingDirector  \n",
       "0                    0                           0                         0  \n",
       "1                    1                           0                         0  \n",
       "2                    0                           0                         0  \n",
       "3                    0                           0                         0  \n",
       "4                    1                           0                         0  \n",
       "...                ...                         ...                       ...  \n",
       "1465                 1                           0                         0  \n",
       "1466                 1                           1                         0  \n",
       "1467                 1                           0                         1  \n",
       "1468                 0                           0                         0  \n",
       "1469                 0                           0                         0  \n",
       "\n",
       "[1263 rows x 15 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm_svm_features_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "#assignment ibm_svm_features to x\n",
    "x_svm = ibm_svm_features_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Try to scale all the numeric data of each features to make svm model train more effective\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "s_scaler = StandardScaler()\n",
    "x_scaled_svm = s_scaler.fit_transform(x_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Try to use tomek link to solve undersampling problem as attriction too few 'yes' value for imbalanced classification \n",
    "from imblearn.under_sampling import TomekLinks\n",
    "\n",
    "tl_svm = TomekLinks(sampling_strategy='not minority')\n",
    "x_tl_svm, y_tl_svm= tl_svm.fit_resample(x_svm, y_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Train the modals with 80% and test 20% of the data\n",
    "x_train_svm, x_test_svm, y_train_svm, y_test_svm = train_test_split(x_tl_svm,y_tl_svm, test_size=0.2,random_state=40, stratify=y_tl_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Model 1 is using the manual tuning for some hyperparameters of SVM\n",
    "model_1_svm=svm.SVC(C=2,kernel='sigmoid',gamma='scale',coef0=0.6,random_state=40,probability=True)\n",
    "model_1_svm.fit(x_train_svm,y_train_svm)\n",
    "y_predict_1_svm=model_1_svm.predict(x_test_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 4 folds for each of 5400 candidates, totalling 21600 fits\n"
     ]
    }
   ],
   "source": [
    "# Modal 2 is using GridSearchCV to find the best hyperparameters for SVM using cross validation\n",
    "# Only some hyperparameters are tuned \n",
    "\n",
    "# import GridSearchCV library\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "#Try to tune the hyperparameter with\n",
    "#kernel type: linear/rbf/sigmoid\n",
    "#C which is the regularization parameter: range 0-1 increase by 0.1\n",
    "#coef0 that is the independent term for kernel method (only for sigmoid): range 0.0-0.5 increase by 0.1\n",
    "#degree for the polynomial ('poly') kernel method: range 0-5 increase by 1\n",
    "#gamma that are kernel coefficient for 'rbf' and 'poly': scale/auto\n",
    "\n",
    "param_grid={'kernel':('linear','rbf','sigmoid'),\n",
    "        'C':[i for i in np.arange(1.0,3.0,0.1)],\n",
    "        'coef0':[y for y in np.arange(0.0,1.5,0.1)],\n",
    "        'degree':[z for z in np.arange(3,6,1)],\n",
    "        'gamma':('auto','scale'),}\n",
    "# set random state to 40\n",
    "find_best_para_model=svm.SVC(random_state=40)\n",
    "Grid_search_svm=GridSearchCV(find_best_para_model,param_grid, n_jobs=-1,verbose=2,cv=4)\n",
    "# this may take some time to run\n",
    "Grid_search_svm.fit(x_train_svm,y_train_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'C': 2.8000000000000016,\n",
       " 'coef0': 0.0,\n",
       " 'degree': 3,\n",
       " 'gamma': 'scale',\n",
       " 'kernel': 'rbf'}"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Show the best hyperparameter found by grid search\n",
    "Grid_search_svm.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use hyperparameter found grid search to build modal \n",
    "model_2_svm=svm.SVC(C=2.8000000000000016,kernel='rbf',degree=3,gamma='scale',coef0=0.0,probability=True,random_state=40)\n",
    "model_2_svm.fit(x_train_svm,y_train_svm)\n",
    "y_predict_2_svm=model_2_svm.predict(x_test_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy of prediction classification result for 2 model\n",
      "Hyperparameters that try to tune manually (model 1):  0.7416666666666667\n",
      "Best hyperparameters found using GridSearchCV (model 2):  0.8166666666666667\n"
     ]
    }
   ],
   "source": [
    "#Evaluate accurracy of classification result\n",
    "print('Accuracy of prediction classification result for 2 model')\n",
    "print('Hyperparameters that try to tune manually (model 1): ',metrics.accuracy_score(y_test_svm, y_predict_1_svm))\n",
    "print('Best hyperparameters found using GridSearchCV (model 2): ',metrics.accuracy_score(y_test_svm, y_predict_2_svm)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[183  12]\n",
      " [ 32  13]]\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.85      0.94      0.89       195\n",
      "           1       0.52      0.29      0.37        45\n",
      "\n",
      "    accuracy                           0.82       240\n",
      "   macro avg       0.69      0.61      0.63       240\n",
      "weighted avg       0.79      0.82      0.79       240\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:70: FutureWarning: Pass labels=[0, 1] as keyword args. From version 1.0 (renaming of 0.25) passing these as positional arguments will result in an error\n",
      "  warnings.warn(f\"Pass {args_msg} as keyword args. From version \"\n"
     ]
    }
   ],
   "source": [
    "#Evaluating classification result by confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "print (confusion_matrix(y_test_svm, y_predict_2_svm,[0,1]))\n",
    "\n",
    "#Evaluating classification result by Precision, Recall and F1-Measure\n",
    "from sklearn.metrics import classification_report\n",
    "print (classification_report(y_test_svm, y_predict_2_svm))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAz+UlEQVR4nO3de5xN9frA8c/jfhvkmtyT2wwhg3QKxYlSEerERIkzlK5O/VIicolURIgiOXGcSFIJUS5RSpLLSMepk5RObrlPGM/vj7XG2e3msjFrr71nP+/Xa71mr71uz9ozs579/X7X+n5FVTHGGBO78vgdgDHGGH9ZIjDGmBhnicAYY2KcJQJjjIlxlgiMMSbGWSIwxpgYZ4nAGGNinCUCc4aIrBCRAyJSMIP3ewe910pEdgXMi4jcLyJbROSoiOwSkbkiUj/EYxcUkekickhEfhaR/lms+7iIHAmYjovIaREp4y5/RkR+cPf1vYgMDNr+RjfOIyKyVkTig85juIj8KCIH3XNPCIhxmrvPwyLypYhcF7Tv3iKyw933YhG5KGjfo0Vknzs9IyISsPw/7rmkn9fSoG0HishO97zmiEjxgOWlROSfIrLXnWalLxeRWiLytojsEZH9IrJERGoHffZjReQn9/c/SUTyB+37Lff3+r2IdAtYVk1ENOj3MShgeUkReU1EfnGnIUGfV0MRWe1+1rtEZHBmv3fjIVW1ySaAakAasB+4JWjZCqB30HutgF0B8+OBfwPXAAWBIkASMCDE4z8NrAYuAOoCPwPtQtx2CPBhwHxtoKj7uiKwFejkztcEDgFXAvmAx4AdQD53+a3AT8DFQF43rg3usqLusarhfIm6ATgMVHOXtwR+ARKAAsBkYGVAXH2A7UAlN64UoG/A8v8AbTI5xzuAr4HKQDHgbeC1gOWTgKVAcaAEsAx43l3WFOgFlALyA8OArwO2fdL97EsBZYFPgaEBy/8B/NM97pXAQSAh4O9G0z+/DOJ+FZjr/j1Uc/9GegYsTwFGuJ91DWA3cJPf/w+xNvkegE2RMQGDgTXA88C7QctWkEUicC+uaUDT8zj+j8C1AfPDgDkhbCfuxeWOTJZXBDYD/+fO3wu8F7A8D3AcaO3OPwq8EbA8AUjN4vibgM7u62eBiQHLLnIvkjXc+bVAcsDyXsCnAfNZJYJ5wCMB81cAqUARd/594J6A5f2AJZnsq5QbV2l3fj0ByR/oBvzgvi4KnABqBSz/OzDKfZ1dItgLNAmYfxxYHTB/DIgPmJ8LPOb3/0OsTVY1ZNL1AGa5U1sRKX8W27bGSQqfZbaCiHQTkU2ZLLsA56L5VcDbX+FchLNzFVAeeDNonwNE5AiwC+diNjt9kTsRNF/PnZ8DXOJWp+TH+Sa+OJO4ywO1cEocme2bgH0nkP05znKrcJaKSIMM4gycL4iThAEmAjeIyAXu59kZJzlkpAXws6ruy2LflUSkhHt+aar6TTZxf+9W7byaXkUXtL/A1/UC5scBPUQkv1td1RynNGPCyBKBQUSuBKrifBP+Aucbdrest/qd0jhF+kyp6mxVvTSTxcXcnwcD3jsIxIVw7DuAeap6JOh4o9ztL8P5Bpu+7w+AluK0cRTA+YZaAKfqAvc8VuNU4RwHbgEeCj6omyRm4VTPfO2+vQi4VUQuFZHCOKUsDdh3sQzOsVhAO0ESzjfsqsBHwBIRKekuex/o7dbJl8ApuRCw7w3ueexzpzSc6qLguCvhJI3ANpj3gQdEpKyIXAjcH7Dv4JjT407/3ewFmrgxN3bfnxWw7mJggIjEicglwF0BMQO8C3TB+ay/Bqap6ufBcRtvWSIw4FxMl6rqXnd+tvteulM4dcuB8gMn3df7gArncfz0i3jxgPeK49S/Z8q92N4CvJbRcnV8iXORGeq+9zXOub2Ic9Evg1NPnd7w/STOha0yUMjd7kMROXPxEpE8OMnlBE5VU/rxlrvbvwl8j1PVczhg30cyOMcj6taJqOoaVT2uqsdU9WngV5wSD8B0nLr6FTglkI/c99P3PRf4BudCXBwnmb8e9HmVxWlHmKSq/whYNAL4EtiIU321AOd3+0sGMafHfdiN+YiqrlfVU6r6X/fzuDagIft+nM//XzjtGv9Ij1lESuEkiqdwPuvKOKXRezBhZYkgxrkX01txviX/LCI/43wDbhBQNbET55tqoOo4FzuA5ThVCYnnEoOqHsC5KAdWhTTgf1UumemE07i9Ipv18uE0RKYfb56q1lPV0jgX7qpA+rfQBsA/VXWXe3GbgdOAHQ/O3TvANJzqqM6qepIAqjpRVWuqajmchJAP2OIu3nqW56i41SqqelpVn1TVaqpayd3uR3dK39cUVT3qlo5eAq5P35FbXbQUWKiqI4JiPq6q96pqRVW9GCexf6GqaTjJJZ+I1AzYJKu407szTo97v6omqeqFqpqAc81Jr0K8GKfaaab7We/CqZq7/g97Nd7yu5HCJn8noCvOxbQKcGHAtAp4zl2nLc63w6Y4/+C1gG38/o6XCTjf+lrhVFEUAm4j9LuGRgErcS66dXASQ5Z3DeFc2J4Kei8Pzt05F7ixNnX3dX/AOo1x7lIpi3M3zOyAZU8CH+Nc6PMA3YGjQEl3+Us4d9UUyyCeQjj13+J+niuAkQHL+7qfW0WcNpGt6Z+hu/6fAj67R4A9/K9BtxROMhOcpLSF3zc8f+T+Dgq70yRgjbusOM7F98VMPsf0eAS4HPiB3zfcz8H5Jl/UjTHwrqFmOHdp5cGpIvwn8FHAtjXc9/MC1+FUJSUExPUrTjVkHpy/u0+AEX7/X8Ta5HsANvn8B+AUzZ/L4P1bcW7hTL+t8i73wnUI53bLAUCegPUFeMBd5xjON9V/BvzTJwFbs4ijIE71xyHgv0D/oOVHgKsC5iviVFldErReHvec9rvbfIPTDiAB63yMU7WxH5iCe6upu6wQTh36bjeWDbgJCafkoDh36xwJmJLc5SVx7iI66n52TwN5gz6jZ9zj7ndfi7ssIWDbfTilrMSAbWvhtFscwymJBX8+1YF33G33u59BTXfZHW7cR4PiruIub4FTjXXMPUZS0L5L4VQXHcUpHXYLWNYV+M5dthuYCVwY9Hf0k7vvjUDboH1fg1MaO+h+Zi/j3gllU/im9D9CY4wxMcraCIwxJsZZIjDGmBhnicAYY2KcJQJjjIlx+fwO4GyVKVNGq1Wr5ncYxhgTVb744ou9qlo2o2VRlwiqVavG+vXr/Q7DGGOiioh8n9kyqxoyxpgYZ4nAGGNinCUCY4yJcZYIjDEmxlkiMMaYGOdZIhBnIPJfRGRLJstFRMaLM9D3JhG5zKtYjDHGZM7LEsEMoF0Wy6/DGWavJpCMM9C3McaYMPPsOQJVXSUi1bJYpQMwU53uTz8VkZIiUkFVsxzy0Bhjwm32up28vfHH7Ff0iKqSmppK4xrlefLGUIbyPjt+thFUxBkAI90u970/EJFkEVkvIuv37NkTluCMMSbd2xt/JGX3IV+OfeTIETZs2MDGjRs5efJk9hucAz+fLJYM3stwcARVnQpMBUhMTLQBFIwxYRdfoTj/7NM8bMdLTU1l6NChjBkzhjJlyjBp0iQ6dWroybH8TAS7cAarTlcJZyQjY4yJeR07dmTJkiX07NmT5557jgsuuMCzY/lZNbQQ6OHePXQ5cNDaB4wxsezw4cOkpqYCMGDAAJYuXcr06dM9TQLgYYlARP6BM5B5GRHZhTMoeH4AVX0JWARcjzP+7TGgp1exGGMik9+NsKFK2X2I+ArFPT3GkiVLSE5O5vbbb2fEiBG0atXK0+MF8vKuoa7ZLFegn1fHN8ZEvvRGWK8vsucrvkJxOjTM8F6W87Z//3769+/Pa6+9Rp06dWjfvr0nx8lK1HVDbYzJXcLdCBtJli9fTlJSEvv27WPgwIE88cQTFCpUKOxxWCIwxhiflCtXjurVq7N48WIaNmzoWxyWCIwxnsusLSAaqoVykqry2muvsWHDBsaPH0/9+vVZu3YtIhndTR8+1umcMcZzmT2Q5WXde6T57rvvaNu2LT179mTjxo0cP34cwPckAFYiMMaESay2BaSlpTFx4kQee+wx8uTJw6RJk+jTpw958kTO93BLBMYY46G9e/cyePBgWrZsyUsvvUSVKlX8DukPLBEYY87Kudz7H2ttASdPnmTWrFn06NGD8uXLs2HDBqpXrx4R1UAZiZyyiTEmKpxLB2yx1BbwxRdfkJiYSM+ePfnggw8AuPjiiyM2CYCVCIwx5yBW6/uzcvz4cYYOHcqzzz5LuXLleOutt2jbtq3fYYXEEoExxuSAjh07snTpUnr37s2YMWMoWbKk3yGFzKqGjDHmHB06dOhMJ3GPP/44y5Yt4+WXX46qJABWIjDGcHYNwLHW8JuZRYsW0bdvX26//XZGjhxJy5Yt/Q7pnFmJwBhzVg3AsdTwm5G9e/fSvXt32rdvT1xcHDfddJPfIZ03KxEYYwBrAA7FBx98QFJSEgcOHGDw4ME8/vjjFCxY0O+wzpslAmOMCVGFChWoVasWkydPpn79+n6Hk2MsERgTA7JrA7B6/4ypKtOmTePLL79k4sSJ1KtXj9WrV0f0MwHnwtoIjIkB2bUBxHq9f0a+/fZb2rRpw1//+ldSUlIiqpO4nGYlAmNihLUBhCYtLY3x48czcOBA8uXLx5QpU+jdu3dEdRKX0ywRGGNMgL179zJ06FBat27N5MmTqVSpkt8heS73pjhjjAnRiRMnmD59OqdPn6Z8+fJs3LiRhQsXxkQSACsRGBP1QnkYzBqDM/f5559z1113sWXLFipVqsS1115LtWrV/A4rrKxEYEyUC+VhMGsM/qNjx47x8MMPc/nll3PgwAEWLlzItdde63dYvrASgTG5gDUEn70OHTqwbNkykpOTeeaZZyhRooTfIfnGSgTGmJhx8ODBM53EDRo0iA8//JApU6bEdBIAKxEYEzHOZeQvsPr/UL377rv07duX7t278/TTT9OiRQu/Q4oYViIwJkKcy8hfYPX/2dmzZw/dunXjxhtvpFSpUnTq1MnvkCKOlQiMiSBW15+zli5dSlJSEgcPHmTo0KEMGDCAAgUK+B1WxLFEYIzJtSpWrEjdunWZPHkyCQkJfocTsSwRGOOjwHYBq+s/f6dPn+aVV17hyy+/PHPxX7Vqld9hRTxrIzDGR4HtAlbXf3527NhB69at6dOnD9u3bz/TSZzJnpUIjPGZtQucn7S0NMaNG8egQYPInz8/L7/8Mr169cqVvYR6xdMSgYi0E5HtIrJDRAZksLyEiLwjIl+JyFYR6ellPMaY3Gfv3r0MHz6cP//5z6SkpNC7d29LAmfJs0QgInmBicB1QDzQVUTig1brB6SoagOgFfCciFiTvjEmS7/99hsvv/zy7zqJW7BgARUrWtXaufCyRNAU2KGq36rqCWAO0CFoHQXixEnfxYD9wCkPYzImIsxet5O/TPnknJ4biHXr1q2jcePGJCcns2zZMgCqVq1qpYDz4GUiqAj8EDC/y30v0ItAXeAnYDPwgKqeDt6RiCSLyHoRWb9nzx6v4jUmbNIbia2BOHRHjx6lf//+NG/enIMHD/Lee+/FbCdxOc3LxuKM0rMGzbcFNgLXADWAD0Rktar+7muSqk4FpgIkJiYG78OYqGSNxGenY8eOLFu2jLvvvptRo0ZRvLjdaptTvCwR7AIqB8xXwvnmH6gnMF8dO4DvgDoexmSMiSK//vrrmdtABw8ezMqVK5k0aZIlgRzmZYngc6CmiFQHfgRuA7oFrbMTaA2sFpHyQG3gWw9jMsYXwR3K2cNj2Vu4cCF333033bt3Z9SoUVx11VV+h5RreVYiUNVTwL3AEmAb8IaqbhWRviLS111tGHCFiGwGlgOPquper2Iyxi/BHcpZ20DmfvnlF2677TY6dOhAmTJl6NKli98h5XqePlCmqouARUHvvRTw+ifAWntMTLA2gewtXryYpKQkjhw5wrBhw3j00UfJnz+/32HlevZksTEmYlSuXJn69eszadIk4uODHzsyXrG+howxvjl9+jSTJ0+mT58+ACQkJLBixQpLAmFmJQJjOPfRwUJljcN/9M0339C7d29Wr17Nn//8Z1JTUylUqJDfYcUkKxEYw7mPDhYqaxz+n1OnTjF69GguvfRSNm/ezKuvvsqSJUssCfjISgTGuKwxNzz27dvH6NGjuf7665k4cSIVKlTwO6SYZyUCY4znfvvtN6ZMmXKmk7ivvvqK+fPnWxKIEFYiMLleKPX/VofvnU8++YRevXqxbds2atSoQZs2bahcuXL2G5qwsRKByfVCqf+3Ovycd+TIER588EH+9Kc/cfToURYvXkybNm38DstkwEoEJiZY/X/4dezYkeXLl3PvvfcycuRI4uLi/A7JZMJKBMaYHHPgwIEzncQNGTKE1atXM2HCBEsCES7kEoGIFFXVo14GY0xGzvcef6v/D4/58+fTr18/evTowejRo7nyyiv9DsmEKNsSgYhcISIpOB3HISINRGSS55EZ4zrfe/yt/t9bP//8M126dKFz585ceOGF3HbbbX6HZM5SKCWCsTgDyCwEUNWvRKSFp1EZE8Tq+CPT+++/T1JSEseOHWPkyJE8/PDD1klcFAqpakhVfwgaDzTNm3CMMdGkatWqNGrUiIkTJ1Knjo0pFa1CaSz+QUSuAFRECojIw7jVRMaY2HL69GlefPFF/vrXvwIQHx/P8uXLLQlEuVBKBH2BF3AGnt8FLAXu8TIok3vkRGdu1tgbGbZv306vXr1Ys2YNbdu2tU7icpFQSgS1VTVJVcurajlVvR2o63VgJnfIic7crLHXXydPnuTpp5+mQYMGpKSkMGPGDN5//31LArlIKCWCCcBlIbxnTIasoTe6HThwgDFjxnDjjTcyYcIELrzwQr9DMjks00QgIs2BK4CyItI/YFFxIK/XgRlj/JOamsr06dPp27cv5cqVY9OmTVSqVMnvsIxHsioRFACKuesEPhZ4CLDRpGPY2dT7W/1+9Pn444/p1asX33zzDbVq1aJNmzaWBHK5TBOBqq4EVorIDFX9PowxmQiXXu8fygXe6vejx+HDh3nssceYOHEi1apVY+nSpdZJXIwIpY3gmIiMARKAM61DqnqNZ1GZiGf1/rlPx44d+eijj3jggQcYPnw4xYoV8zskEyahJIJZwD+BG3BuJb0D2ONlUMaY8Ni/fz+FChWiSJEiDBs2DBGheXNL8LEmlNtHS6vqNOCkqq5U1buAyz2OyxjjsXnz5lG3bl2GDBkCwBVXXGFJIEaFkghOuj93i0h7EWkEWMtRjJq9bifrvtvvdxjmPOzevZtOnTpxyy23ULlyZZKSkvwOyfgslKqh4SJSAvgbzvMDxYEHvQzKRK70u4WsATg6vffee9x+++2kpqYyevRo+vfvT758Nj5VrMv2L0BV33VfHgSuBhCRP3kZlIlszaqXoluzKn6HYc7BxRdfTJMmTXjxxRepVauW3+GYCJFp1ZCI5BWRriLysIjUc9+7QUTWAi+GLUJjzDlLS0vjhRdeoFevXgDUrVuXpUuXWhIwv5NViWAaUBn4DBgvIt8DzYEBqrogDLGZCJL+EJk9IBY9UlJS6N27N5988gnXX3+9dRJnMpVVIkgELlXV0yJSCNgLXKKqP4cnNBNJApOAtQ9EthMnTvDMM88wbNgw4uLieP311+nWrRtBY4oYc0ZWieCEqp4GUNVUEfnmbJOAiLTD6cI6L/CKqo7KYJ1WwDggP7BXVVuezTFM+NhDZNHh119/ZezYsdx8882MHz+ecuXK+R2SiXBZJYI6IrLJfS1ADXdeAFXVS7PasYjkBSYCf8YZx+BzEVmoqikB65QEJgHtVHWniNhfrDHn4Pjx40ybNo177rmHcuXKsXnzZi666CK/wzJRIqtEcL5jDjQFdqjqtwAiMgfoAKQErNMNmK+qOwFU9ZfzPKY5C9Z5XO6watUqevfuzb/+9S/q1q1L69atLQmYs5LpXUOq+n1WUwj7rgj8EDC/y30vUC3gAhFZISJfiEiPjHYkIskisl5E1u/ZY71b5JSzGTTG2gYiz6FDh7jnnnto2bIlp06dYtmyZbRu3drvsEwU8vJJkoxapjSD4zcGWgOFgU9E5FNV/eZ3G6lOBaYCJCYmBu/DnAer949eHTt2ZMWKFTz00EMMGzaMokWL+h2SiVJeJoJdOLefpqsE/JTBOntV9ShwVERWAQ2AbzDG/MHevXspUqQIRYoUYcSIEYgIl19uXX+Z8xNKX0OISGERqX2W+/4cqCki1UWkAHAbsDBonbeBq0Qkn4gUAZoB287yOMbkeqrKnDlzqFu3Lk8++SQAzZs3tyRgckS2iUBEbgQ2Aovd+YYiEnxB/wNVPQXcCyzBubi/oapbRaSviPR119nm7ncTzoNrr6jqlnM8F2NypR9//JGOHTvStWtXqlevTo8eGTalGXPOQqkaGoJzB9AKAFXdKCLVQtm5qi4CFgW991LQ/BhgTCj7MybWvPvuuyQlJXHy5EmeffZZHnzwQfLmtSHDTc4KJRGcUtWD9lSiMeF3ySWXcMUVVzBhwgQuueQSv8MxuVQobQRbRKQbkFdEaorIBGCtx3EZE5PS0tIYO3Ysd955JwB16tTh/ffftyRgPBVKieA+YCDwGzAbp85/uJdBmXNnD4lFr61bt9KrVy/WrVtH+/btrZM4EzahlAhqq+pAVW3iTk+oaqrnkZlzYg+JRZ8TJ07w1FNP0ahRI/79738ze/Zs3nnnHUsCJmxCKRE8LyIVgLnAHFXd6nFM5jzZQ2LR5ddff2X8+PHccsstjBs3jrJly/odkokx2ZYIVPVqoBWwB5gqIptF5AmvAzMmNzt27BgvvPACaWlpZzqJmzVrliUB44uQnix2u58eLyIfAf8HDMbaCcIulPp/q/ePfB999BG9e/fm22+/pV69erRu3ZoKFSr4HZaJYaE8UFZXRIaIyBacISrX4nQXYcIslPp/q/ePXAcPHqRPnz5cc801iAgfffSRdRJnIkIoJYJXgX8A16pqcF9BJsys/j96dezYkVWrVvHII48wZMgQihQp4ndIxgAhJAJVtc5MjDlHe/bsoWjRohQpUoSnn36avHnz0qRJE7/DMuZ3Mq0aEpE33J+bRWRTwLQ5YOQyY0wGVJXZs2f/rpO4yy+/3JKAiUhZlQgecH/eEI5AjMktdu3axd133827775Ls2bNzjwlbEykymqEst3uy3syGJ3snvCEZ0x0WbhwIfHx8Xz44YeMHTuWNWvWkJCQ4HdYxmQplCeL/5zBe9fldCDG5Aa1atXiyiuvZPPmzdZTqIkamVYNicjdON/8Lw5qE4gD1ngdmDHR4NSpU4wbN45NmzYxc+ZM6tSpw6JFi7Lf0JgIklUbwWzgfeBpYEDA+4dVdb+nUcWo7B4Ys4fFIsumTZvo1asX69evp0OHDtZJnIlaWVUNqar+B+gHHA6YEJFS3ocWe7J7YMweFosMv/32G08++SSNGzdm586dvPHGG7z11luWBEzUyq5EcAPwBaBA4Mg0ClzsYVwxyx4Yi3yHDh1i0qRJdO3albFjx1K6dGm/QzLmvGSaCFT1Bvdn9fCFY0xkOnr0KFOnTuX++++nbNmybNmyhfLly/sdljE5IpS+hv4kIkXd17eLyPMiUsX70IyJDMuXL6d+/fr079+flStXAlgSMLlKKLePTgaOiUgDnJ5Hvwf+7mlUMWb2up38ZconIQ8oY8Lj119/pXfv3rRp04Z8+fKxcuVKrrnmGr/DMibHhZIITqmqAh2AF1T1BZxbSE0OSW8ktsbgyHLzzTczY8YMHn30Ub766itatGjhd0jGeCKU3kcPi8hjQHfgKhHJC+T3NqzYY43EkeG///0vxYoVo2jRoowaNYp8+fLRuHFjv8MyxlOhlAj+gjNw/V3uADUVgTGeRmVMmKkqf//734mPjz/TSVyzZs0sCZiYEMpQlT8Ds4ASInIDkKqqMz2PzJgw2blzJ+3bt6dHjx7Url2bXr16+R2SMWEVyl1DtwKfAbcAtwLrRKSL14EZEw5vv/02CQkJrFq1ivHjx7N69Wrq1q3rd1jGhFUobQQDgSaq+guAiJQFlgHzvAzMGC+pKiJCnTp1aNWqFRMmTKBatWp+h2WML0JpI8iTngRc+0LczpiIc+rUKUaPHk337t0BqF27Nu+8844lARPTQrmgLxaRJSJyp4jcCbwHWPeKJup89dVXNGvWjAEDBnDs2DFSU1P9DsmYiBBKY/EjwBTgUqABMFVVH/U6MGNySmpqKk888QSJiYn8+OOPzJs3j/nz51sncca4shqPoCbwLFAD2Aw8rKqZ95FsTIQ6fPgwU6ZMISkpieeff55SpazzXGMCZVUimA68C3TG6YF0wtnuXETaich2EdkhIgOyWK+JiKTZ3Ugmpxw5coRnn32WtLQ0ypYtS0pKCjNmzLAkYEwGsrprKE5VX3ZfbxeRDWezY/cJ5Ik4Q13uAj4XkYWqmpLBeqOBJWezf2Mys3TpUpKTk9m5cyeNGzfm6quvpmzZsn6HZUzEyqpEUEhEGonIZSJyGVA4aD47TYEdqvqtqp4A5uD0VxTsPuBN4JcMluV6s9ftZN13NuBbTti/fz89e/akbdu2FCpUiNWrV3P11Vf7HZYxES+rEsFu4PmA+Z8D5hXIrhvGisAPAfO7gGaBK4hIReBmd19NMtuRiCQDyQBVquSuHrDTh6a0zubO380338yaNWt4/PHHGTRokDUGGxOirAamOd+vUpLBexo0Pw54VFXTRDJa/UwsU4GpAImJicH7iHrNqpeiW7PcleDC5eeffyYuLo6iRYsyZswYChQoQMOGDf0Oy5io4uWDYbuAygHzlYCfgtZJBOaIyH+ALsAkEenoYUwml1BVZsyYQXx8PIMHDwagadOmlgSMOQehdDFxrj4HaopIdeBH4DagW+AKgcNgisgM4F1VXeBhTGExe93OM1U+2Ukfh8CE7j//+Q99+vRh6dKlXHnllSQnJ/sdkjFRzbMSgaqeAu7FuRtoG/CGqm4Vkb4i0ter40aC9IFmQmGD0Zydt956i3r16rF27VpefPFFVq5cSe3atf0Oy5iolm2JQJzK+yTgYlV9yh2v+EJV/Sy7bVV1EUHdUajqS5mse2dIEUcJG2gmZ6V3EpeQkECbNm144YUXqFq1qt9hGZMrhFIimAQ0B7q684dxng8wxnMnT55k5MiRJCUlAVCrVi0WLFhgScCYHBRKImimqv2AVABVPQAU8DQqY4ANGzbQtGlTBg4cSFpaGr/99pvfIRmTK4XSWHzSffpX4cx4BKc9jSoKBTYQWwPw+Tl+/DhPPfUUY8aMoWzZsrz11lt07NjR77CMybVCKRGMB94CyonICOBjYKSnUUWhwAZiawA+P0ePHmXatGnccccdpKSkWBIwxmPZlghUdZaIfAG0xnlIrKOqbvM8sihkDcTn7vDhw0yePJm//e1vlClThpSUFMqUKeN3WMbEhFDGLK4CHAPeARYCR933jMkRixcvpl69egwYMIDVq1cDWBIwJoxCaSN4D6d9QIBCQHVgO5DgYVwmBuzbt4/+/fszc+ZM6taty5o1a2je3EpUxoRbKFVD9QPn3Z5H+3gWkYkZnTp1Yu3atQwaNIiBAwdSsGBBv0MyJiaddRcTqrpBRDLtKdSYrOzevZu4uDiKFSvGs88+S4ECBWjQoIHfYRkT00J5srh/wGwe4DJgj2cRmVxJVXn11Vfp378/d911F88//zxNmtj3CWMiQSglgriA16dw2gze9Cac6GLPDoTm22+/pU+fPixbtowWLVrQt2+u7mrKmKiTZSJwHyQrpqqPhCmeqJL+7EB8heL27EAm5s+fT/fu3cmbNy+TJ08mOTmZPHm87P3cGHO2Mk0EIpJPVU+FOCxlzLJnBzKW3klc/fr1adeuHePGjaNy5crZb2iMCbusSgSf4bQHbBSRhcBc4Gj6QlWd73FsJgqdOHGCZ555hq1btzJ79mxq1qzJm29aTaIxkSyUMnopYB/OuMI3ADe6P435nfXr19OkSRMGDRoEOEnBGBP5sioRlHPvGNrC/x4oS5frxg0OlTUQ/9Hx48d58sknee6557jwwgt5++23uemmm/wOyxgToqxKBHmBYu4UF/A6fYpJ1rncHx09epQZM2bQq1cvtm7daknAmCiTVYlgt6o+FbZIoog1EMOhQ4eYNGkSjzzyCGXKlGHbtm2ULl3a77CMMecgqxKBZLHMxLD33nuPhIQEBg4ceKaTOEsCxkSvrEoErcMWRQQLbBOA2G4X2LNnDw8++CCzZ88mISGBefPm0axZM7/DMsacp0xLBKq6P5yBRKrANgGI7XaBzp07M3fuXIYMGcKGDRssCRiTS5x1p3OxKJbbBH788UdKlChBsWLFGDt2LAULFqRevXp+h2WMyUH2rL/JkKry8ssvEx8fz+DBgwFo3LixJQFjciFLBOYP/v3vf9O6dWuSk5Np3Lgx/fr18zskY4yHLBFkYfa6naz7LraaSubNm0f9+vX54osvmDp1KsuXL6dGjRp+h2WM8ZC1EWQh/W6hWGgcTu8krkGDBrRv356xY8dSqVIlv8MyxoSBlQiy0ax6Kbo1q+J3GJ45ceIEQ4cO5bbbbkNVqVmzJnPnzrUkYEwMsUQQwz777DMaN27MkCFDyJcvn3USZ0yMskQQg44dO8bDDz9M8+bNOXDgAO+88w6zZs2yweONiVGWCGLQ8ePHef3110lOTiYlJYUbbrBexY2JZZ4mAhFpJyLbRWSHiAzIYHmSiGxyp7Ui0sDLeGLZwYMHGTFiBKdOnaJ06dJs27aNyZMnU7x4bHaXYYz5H88SgTve8UTgOiAe6Coi8UGrfQe0VNVLgWHAVK/iiWXvvPPOmQfDPv74YwAuuOACn6MyxkQKL0sETYEdqvqtqp4A5gAdAldQ1bWqesCd/RSIiFtVZq/byV+mfPK7Poai0Z49e+jatSs33XQTpUuXZt26dbRq1crvsIwxEcbLRFAR+CFgfpf7XmZ6Ae9ntEBEkkVkvYis37NnTw6GmLH0juaivYO5zp078+abb/LUU0+xfv16EhMT/Q7JGBOBvHygLKPxDDIc4lJErsZJBFdmtFxVp+JWGyUmJoZlmMxo7Whu165dlCxZkmLFijFu3DgKFixIQkKC32EZYyKYlyWCXUDlgPlKwE/BK4nIpcArQAdV3edhPLna6dOnmTJlCvHx8WcGj7/sssssCRhjsuVlIvgcqCki1UWkAHAbsDBwBRGpAswHuqvqNx7Gkqv961//4pprrqFv3740bdqU++67z++QjDFRxLOqIVU9JSL3AkuAvMB0Vd0qIn3d5S8Bg4HSwCQRATilqr5UZAeORBZNo5DNnTuXHj16ULBgQaZNm0bPnj1xP0tjjAmJp53OqeoiYFHQey8FvO4N9PYyhlAFNhBHQyNxeidxjRo1okOHDjz//PNcdNFFfodljIlC1vtogGhoIP7tt98YMWIE27Zt44033uCSSy5hzpw5fodljIli1sVEFPn000+57LLLGDZsGIULF7ZO4owxOcISQRQ4evQoDz30EFdccQWHDx9m0aJFzJw50zqJM8bkCEsEUSA1NZU5c+Zwzz33sHXrVq677jq/QzLG5CLWRhChfv31VyZMmMBjjz12ppO4kiVL+h2WMSYXshJBBFqwYAHx8fEMHTqUtWvXAlgSMMZ4xhJBBPnvf//Lrbfeys0330y5cuVYt24dLVq08DssY0wuZ1VDEaRLly589tlnDB8+nP/7v/8jf/78fodkjIkBlgh8tnPnTi644ALi4uIYP348BQsWJD4+eNgGY4zxjlUN+eT06dNMnDiRhIQEBg8eDECjRo0sCRhjws4SgQ+2b99Oy5Ytuffee2nevDkPPPCA3yEZY2JYTFcN+dHR3BtvvEGPHj0oXLgwr776KnfccYd1EmeM8VVMlwjSO5oDPO9oTtUZT6dx48Z06tSJbdu2ceedd1oSMMb4LqZLBOB9R3OpqakMGzaMr7/+mnnz5lGjRg1mz57t2fGMMeZsxXSJwGtr166lUaNGjBw5kri4OOskzhgTkSwReODIkSPcf//9XHnllRw7dozFixczY8YM6yTOGBORLBF44MSJE8ybN49+/fqxZcsW2rZt63dIxhiTqZhvI8gp+/fvZ/z48TzxxBOUKlWKbdu2UaJECb/DMsaYbFmJIAe8+eabxMfHM3z48DOdxFkSMMZEC0sE52H37t107tyZLl26cNFFF7F+/XrrJM4YE3ViNhHMXreTdd/tP6993Hrrrbz33nuMGjWKzz77jIYNG+ZMcMYYE0Yx20aQ/kTx2T5E9v3331OqVCni4uKYMGEChQsXpnbt2l6EaIwxYRGzJQKAZtVL0a1ZlZDWPX36NBMmTCAhIYFBgwYB0LBhQ0sCxpioF7MlgrPx9ddf07t3b9asWUO7du146KGH/A7JGGNyTEyXCEIxZ84cGjRowLZt25g5cyaLFi2iatWqfodljDE5xhJBJk6fPg1AkyZNuOWWW0hJSaF79+7WSZwxJtexRBDk+PHjDBgwgM6dO6Oq1KhRg9dff53y5cv7HZoxxnjCEkGA1atX07BhQ0aPHk3p0qU5efKk3yEZY4znLBEAhw8fpl+/frRo0YKTJ0/ywQcf8Morr1CgQAG/QzPGGM/FzF1DgaORwe9HJDt58iQLFizgwQcfZPjw4RQtWtSvMI0xJuxiJhGkj0aWfvGvVbYwsnM9p041oVSpUnz99dfExcX5HKUxxoSfp1VDItJORLaLyA4RGZDBchGR8e7yTSJymZfxxFcozpzky+lSahcfP3ULbz7Tn08++QTAkoAxJmZ5lghEJC8wEbgOiAe6ikh80GrXATXdKRmY7FU84IwT0KlTJ2699VYqV67M+vXrueqqq7w8pDHGRDwvSwRNgR2q+q2qngDmAB2C1ukAzFTHp0BJEangVUBbU7ayePFinnnmGT799FMaNGjg1aGMMSZqeNlGUBH4IWB+F9AshHUqArsDVxKRZJwSA1WqhNY3ULD4i4pTLn8C9z30FbVq1TqnfRhjTG7kZSLI6BFcPYd1UNWpwFSAxMTEPywPxZM3JpzLZsYYk+t5WTW0C6gcMF8J+Okc1jHGGOMhLxPB50BNEakuIgWA24CFQessBHq4dw9dDhxU1d3BOzLGGOMdz6qGVPWUiNwLLAHyAtNVdauI9HWXvwQsAq4HdgDHgJ5exWOMMSZjnj5QpqqLcC72ge+9FPBagX5exmCMMSZr1teQMcbEOEsExhgT4ywRGGNMjLNEYIwxMU6c9troISJ7gO/PcfMywN4cDCca2DnHBjvn2HA+51xVVctmtCDqEsH5EJH1qprodxzhZOccG+ycY4NX52xVQ8YYE+MsERhjTIyLtUQw1e8AfGDnHBvsnGODJ+ccU20Exhhj/ijWSgTGGGOCWCIwxpgYlysTgYi0E5HtIrJDRAZksFxEZLy7fJOIXOZHnDkphHNOcs91k4isFZGoH6czu3MOWK+JiKSJSJdwxueFUM5ZRFqJyEYR2SoiK8MdY04L4W+7hIi8IyJfuecc1b0Yi8h0EflFRLZksjznr1+qmqsmnC6v/w1cDBQAvgLig9a5HngfZ4S0y4F1fscdhnO+ArjAfX1dLJxzwHof4vSC28XvuMPwey4JpABV3PlyfscdhnN+HBjtvi4L7AcK+B37eZxzC+AyYEsmy3P8+pUbSwRNgR2q+q2qngDmAB2C1ukAzFTHp0BJEakQ7kBzULbnrKprVfWAO/spzmhw0SyU3zPAfcCbwC/hDM4joZxzN2C+qu4EUNVoP+9QzlmBOBERoBhOIjgV3jBzjqquwjmHzOT49Ss3JoKKwA8B87vc9852nWhytufTC+cbRTTL9pxFpCJwM/ASuUMov+dawAUiskJEvhCRHmGLzhuhnPOLQF2cYW43Aw+o6unwhOeLHL9+eTowjU8kg/eC75ENZZ1oEvL5iMjVOIngSk8j8l4o5zwOeFRV05wvi1EvlHPOBzQGWgOFgU9E5FNV/cbr4DwSyjm3BTYC1wA1gA9EZLWqHvI4Nr/k+PUrNyaCXUDlgPlKON8UznadaBLS+YjIpcArwHWqui9MsXkllHNOBOa4SaAMcL2InFLVBWGJMOeF+re9V1WPAkdFZBXQAIjWRBDKOfcERqlTgb5DRL4D6gCfhSfEsMvx61durBr6HKgpItVFpABwG7AwaJ2FQA+39f1y4KCq7g53oDko23MWkSrAfKB7FH87DJTtOatqdVWtpqrVgHnAPVGcBCC0v+23gatEJJ+IFAGaAdvCHGdOCuWcd+KUgBCR8kBt4NuwRhleOX79ynUlAlU9JSL3Aktw7jiYrqpbRaSvu/wlnDtIrgd2AMdwvlFErRDPeTBQGpjkfkM+pVHcc2OI55yrhHLOqrpNRBYDm4DTwCuqmuFtiNEgxN/zMGCGiGzGqTZ5VFWjtntqEfkH0AooIyK7gCeB/ODd9cu6mDDGmBiXG6uGjDHGnAVLBMYYE+MsERhjTIyzRGCMMTHOEoExxsQ4SwQmIrm9hW4MmKplse6RHDjeDBH5zj3WBhFpfg77eEVE4t3XjwctW3u+Mbr7Sf9ctrg9bpbMZv2GInJ9Thzb5F52+6iJSCJyRFWL5fS6WexjBvCuqs4TkWuBZ1X10vPY33nHlN1+ReQ14BtVHZHF+ncCiap6b07HYnIPKxGYqCAixURkufttfbOI/KGnURGpICKrAr4xX+W+f62IfOJuO1dEsrtArwIucbft7+5ri4g86L5XVETec/u/3yIif3HfXyEiiSIyCijsxjHLXXbE/fnPwG/obkmks4jkFZExIvK5OH3M9wnhY/kEt7MxEWkqzjgTX7o/a7tP4j4F/MWN5S9u7NPd43yZ0edoYpDffW/bZFNGE5CG05HYRuAtnKfgi7vLyuA8VZleoj3i/vwbMNB9nReIc9ddBRR1338UGJzB8WbgjlcA3AKsw+m8bTNQFKd7461AI6Az8HLAtiXcnytwvn2fiSlgnfQYbwZec18XwOlFsjCQDDzhvl8QWA9UzyDOIwHnNxdo584XB/K5r9sAb7qv7wReDNh+JHC7+7okTh9ERf3+fdvk75TrupgwucZxVW2YPiMi+YGRItICp+uEikB54OeAbT4HprvrLlDVjSLSEogH1rhdaxTA+SadkTEi8gSwB6eH1tbAW+p04IaIzAeuAhYDz4rIaJzqpNVncV7vA+NFpCDQDlilqsfd6qhL5X+jqJUAagLfBW1fWEQ2AtWAL4APAtZ/TURq4vREmT+T418L3CQiD7vzhYAqRHd/ROY8WSIw0SIJZ/Spxqp6UkT+g3MRO0NVV7mJoj3wdxEZAxwAPlDVriEc4xFVnZc+IyJtMlpJVb8RkcY4/b08LSJLVfWpUE5CVVNFZAVO18l/Af6RfjjgPlVdks0ujqtqQxEpAbwL9APG4/S385Gq3uw2rK/IZHsBOqvq9lDiNbHB2ghMtCgB/OImgauBqsEriEhVd52XgWk4w/19CvxJRNLr/IuISK0Qj7kK6OhuUxSnWme1iFwEHFPV14Fn3eMEO+mWTDIyB6ejsKtwOlPD/Xl3+jYiUss9ZoZU9SBwP/Cwu00J4Ed38Z0Bqx7GqSJLtwS4T9zikYg0yuwYJnZYIjDRYhaQKCLrcUoHX2ewTitgo4h8iVOP/4Kq7sG5MP5DRDbhJIY6oRxQVTfgtB18htNm8IqqfgnUBz5zq2gGAsMz2HwqsCm9sTjIUpxxaZepM/wiOONEpAAbxBm0fArZlNjdWL7C6Zr5GZzSyRqc9oN0HwHx6Y3FOCWH/G5sW9x5E+Ps9lFjjIlxViIwxpgYZ4nAGGNinCUCY4yJcZYIjDEmxlkiMMaYGGeJwBhjYpwlAmOMiXH/D95IOHfpsjmgAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "#Evaluating classification result by ROC curves\n",
    "from sklearn.metrics import roc_curve\n",
    "y_pred_prob_svm = model_2_svm.predict_proba(x_test_svm)[:,1]\n",
    "fpr, tpr, threshold = roc_curve(y_test_svm, y_pred_prob_svm)\n",
    "plt.plot([0, 1], [0, 1], 'k--')\n",
    "plt.plot(fpr,tpr)\n",
    "auc = roc_auc_score(y_test_svm,  y_pred_prob_svm)\n",
    "plt.title(f'AUC: {auc}')\n",
    "plt.xlabel('False Positive Rate')\n",
    "plt.ylabel('True Positive Rate')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}