{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Naive Bayes Classification",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "N9Go17xQ0uDv",
"colab_type": "text"
},
"source": [
"Naive Bayes Classification"
]
},
{
"cell_type": "code",
"metadata": {
"id": "f3LF08QLrjXg",
"colab_type": "code",
"outputId": "fcd38855-451f-4b73-ff98-5475bdcc2fc8",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
}
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"data = pd.read_csv('./data/Customer_Behaviour.csv')\n",
"data.head(400)"
],
"execution_count": 47,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>User ID</th>\n",
" <th>Gender</th>\n",
" <th>Age</th>\n",
" <th>EstimatedSalary</th>\n",
" <th>Purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>15624510</td>\n",
" <td>Male</td>\n",
" <td>19</td>\n",
" <td>19000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>15810944</td>\n",
" <td>Male</td>\n",
" <td>35</td>\n",
" <td>20000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>15668575</td>\n",
" <td>Female</td>\n",
" <td>26</td>\n",
" <td>43000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>15603246</td>\n",
" <td>Female</td>\n",
" <td>27</td>\n",
" <td>57000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>15804002</td>\n",
" <td>Male</td>\n",
" <td>19</td>\n",
" <td>76000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>395</th>\n",
" <td>15691863</td>\n",
" <td>Female</td>\n",
" <td>46</td>\n",
" <td>41000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>396</th>\n",
" <td>15706071</td>\n",
" <td>Male</td>\n",
" <td>51</td>\n",
" <td>23000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>397</th>\n",
" <td>15654296</td>\n",
" <td>Female</td>\n",
" <td>50</td>\n",
" <td>20000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>398</th>\n",
" <td>15755018</td>\n",
" <td>Male</td>\n",
" <td>36</td>\n",
" <td>33000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399</th>\n",
" <td>15594041</td>\n",
" <td>Female</td>\n",
" <td>49</td>\n",
" <td>36000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>400 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" User ID Gender Age EstimatedSalary Purchased\n",
"0 15624510 Male 19 19000 0\n",
"1 15810944 Male 35 20000 0\n",
"2 15668575 Female 26 43000 0\n",
"3 15603246 Female 27 57000 0\n",
"4 15804002 Male 19 76000 0\n",
".. ... ... ... ... ...\n",
"395 15691863 Female 46 41000 1\n",
"396 15706071 Male 51 23000 1\n",
"397 15654296 Female 50 20000 1\n",
"398 15755018 Male 36 33000 0\n",
"399 15594041 Female 49 36000 1\n",
"\n",
"[400 rows x 5 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 47
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Urr8Uoy72bbO",
"colab_type": "text"
},
"source": [
" **Gender**\n",
" 0 - Female\n",
" 1 - Male"
]
},
{
"cell_type": "code",
"metadata": {
"id": "KawwwlAPyBQo",
"colab_type": "code",
"colab": {}
},
"source": [
"from sklearn import preprocessing\n",
"\n",
"label_encoder = preprocessing.LabelEncoder()\n",
"\n",
"gender_encoded = label_encoder.fit_transform(data[\"Gender\"])\n",
"age_encoded = label_encoder.fit_transform(data[\"Age\"])\n",
"income_encoded = label_encoder.fit_transform(data[\"EstimatedSalary\"])\n",
"purchase_encoded = label_encoder.fit_transform(data[\"Purchased\"])"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "1uXA6V0-y6kL",
"colab_type": "code",
"outputId": "50b834ac-7024-4454-9ef4-46f1c24b7462",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 54
}
},
"source": [
"features = list(zip(gender_encoded, age_encoded, income_encoded))\n",
"print(features)"
],
"execution_count": 60,
"outputs": [
{
"output_type": "stream",
"text": [
"[(1, 1, 4), (1, 17, 5), (0, 8, 26), (0, 9, 39), (1, 1, 57), (1, 9, 40), (0, 9, 65), (0, 14, 116), (1, 7, 17), (0, 17, 47), (0, 8, 61), (0, 8, 35), (1, 2, 67), (1, 14, 3), (1, 0, 63), (1, 11, 61), (1, 29, 9), (1, 27, 10), (1, 28, 12), (0, 30, 13), (1, 27, 7), (0, 29, 32), (1, 30, 24), (0, 27, 7), (1, 28, 8), (1, 29, 5), (1, 31, 12), (0, 29, 14), (1, 11, 26), (1, 13, 3), (1, 13, 55), (0, 9, 105), (0, 3, 1), (0, 10, 27), (1, 9, 71), (1, 17, 11), (0, 15, 12), (1, 12, 32), (0, 8, 53), (0, 9, 15), (0, 9, 2), (0, 15, 34), (1, 17, 85), (1, 12, 0), (0, 10, 65), (1, 5, 5), (1, 7, 60), (0, 9, 37), (1, 12, 104), (0, 13, 70), (0, 6, 16), (0, 0, 27), (0, 11, 64), (0, 17, 8), (0, 9, 40), (0, 6, 38), (0, 5, 31), (1, 10, 60), (1, 4, 3), (0, 14, 91), (1, 9, 5), (1, 7, 68), (0, 5, 48), (1, 14, 94), (0, 41, 64), (1, 6, 40), (1, 6, 4), (0, 5, 63), (0, 4, 45), (0, 13, 49), (1, 7, 61), (0, 6, 11), (0, 2, 8), (0, 15, 87), (1, 14, 3), (1, 16, 86), (1, 0, 35), (0, 4, 11), (0, 10, 68), (0, 8, 2), (1, 12, 61), (1, 21, 25), (1, 2, 32), (1, 17, 69), (0, 12, 44), (0, 13, 92), (1, 6, 38), (0, 10, 66), (1, 8, 62), (1, 17, 33), (1, 4, 62), (0, 12, 90), (1, 8, 0), (0, 11, 12), (0, 11, 64), (0, 17, 27), (0, 17, 9), (1, 10, 96), (1, 17, 54), (0, 10, 21), (1, 9, 69), (1, 10, 41), (0, 14, 67), (0, 15, 115), (0, 1, 6), (1, 3, 53), (0, 8, 19), (1, 9, 70), (1, 8, 67), (0, 20, 61), (0, 21, 52), (0, 19, 52), (1, 20, 43), (1, 19, 38), (1, 24, 61), (1, 22, 39), (1, 17, 56), (1, 18, 35), (1, 22, 41), (1, 23, 41), (0, 18, 56), (1, 19, 53), (0, 22, 56), (1, 17, 36), (0, 23, 34), (0, 21, 43), (1, 24, 47), (1, 8, 16), (1, 12, 2), (0, 8, 65), (1, 13, 40), (1, 15, 15), (1, 12, 68), (0, 3, 49), (0, 10, 38), (1, 5, 45), (0, 2, 63), (1, 12, 84), (0, 10, 41), (1, 1, 9), (1, 1, 66), (0, 0, 49), (1, 17, 41), (1, 12, 70), (0, 16, 9), (0, 6, 70), (0, 9, 75), (0, 23, 14), (1, 11, 43), (1, 2, 55), (0, 8, 0), (1, 23, 28), (1, 13, 57), (0, 18, 33), (1, 22, 30), (0, 13, 0), (1, 28, 41), (1, 11, 56), (1, 8, 14), (0, 14, 104), (1, 14, 78), (1, 7, 71), (0, 19, 17), (1, 17, 22), (0, 15, 50), (0, 0, 67), (0, 4, 38), (0, 17, 52), (1, 11, 114), (0, 11, 30), (1, 3, 69), (1, 16, 89), (0, 8, 92), (0, 16, 26), (0, 16, 53), (0, 5, 12), (0, 17, 30), (1, 7, 7), (1, 6, 8), (0, 13, 18), (1, 8, 1), (0, 13, 52), (0, 14, 91), (1, 15, 26), (0, 15, 42), (1, 13, 48), (0, 2, 63), (0, 15, 24), (1, 17, 53), (1, 10, 16), (1, 6, 65), (0, 1, 10), (1, 11, 26), (1, 1, 51), (1, 10, 70), (1, 16, 26), (0, 12, 60), (0, 2, 20), (1, 8, 61), (1, 17, 7), (1, 17, 23), (1, 31, 55), (0, 21, 103), (0, 23, 52), (0, 40, 79), (0, 29, 30), (0, 37, 100), (0, 34, 88), (0, 22, 109), (0, 28, 7), (0, 30, 75), (1, 34, 116), (0, 41, 25), (1, 17, 40), (1, 29, 26), (0, 42, 85), (1, 31, 47), (1, 22, 59), (0, 28, 75), (1, 41, 110), (0, 23, 61), (1, 17, 72), (1, 19, 111), (1, 42, 80), (0, 17, 42), (1, 19, 36), (0, 18, 98), (1, 38, 102), (0, 22, 53), (0, 24, 61), (0, 17, 113), (1, 21, 25), (1, 22, 84), (1, 31, 67), (0, 20, 86), (1, 28, 60), (1, 22, 39), (0, 19, 61), (0, 28, 63), (0, 35, 110), (1, 24, 115), (1, 20, 41), (0, 32, 69), (0, 38, 81), (0, 23, 53), (0, 33, 112), (0, 17, 33), (0, 39, 95), (1, 23, 35), (0, 17, 76), (0, 26, 23), (1, 19, 35), (0, 30, 103), (0, 19, 112), (0, 32, 27), (0, 34, 71), (0, 23, 53), (1, 22, 39), (0, 40, 74), (0, 27, 101), (0, 17, 58), (1, 18, 111), (0, 37, 97), (0, 17, 53), (1, 30, 71), (0, 24, 85), (1, 22, 56), (1, 19, 55), (0, 29, 111), (1, 22, 43), (0, 25, 102), (0, 41, 57), (1, 42, 25), (1, 21, 83), (0, 39, 10), (1, 39, 55), (1, 20, 52), (1, 31, 69), (0, 34, 22), (0, 32, 20), (0, 41, 69), (1, 17, 43), (1, 19, 51), (0, 34, 6), (1, 30, 108), (0, 19, 73), (0, 19, 44), (0, 30, 106), (1, 23, 60), (0, 19, 59), (1, 21, 103), (1, 31, 70), (1, 37, 23), (1, 19, 58), (0, 17, 39), (0, 18, 45), (1, 24, 54), (0, 25, 86), (1, 27, 60), (1, 28, 91), (0, 40, 22), (1, 30, 55), (0, 19, 105), (1, 19, 60), (0, 22, 42), (1, 24, 37), (0, 33, 103), (0, 29, 87), (1, 18, 97), (0, 20, 33), (0, 24, 51), (1, 21, 75), (0, 20, 33), (0, 31, 108), (0, 21, 60), (0, 21, 56), (0, 36, 81), (1, 17, 38), (1, 27, 16), (1, 18, 42), (0, 34, 106), (0, 35, 63), (1, 23, 35), (0, 30, 14), (0, 30, 101), (0, 23, 42), (1, 23, 53), (0, 24, 56), (1, 18, 92), (0, 29, 84), (1, 20, 34), (0, 30, 93), (1, 24, 47), (1, 22, 47), (1, 39, 42), (0, 18, 37), (1, 40, 111), (1, 17, 60), (0, 20, 38), (1, 21, 95), (0, 35, 81), (1, 17, 56), (0, 20, 47), (0, 29, 34), (1, 29, 82), (0, 23, 45), (1, 35, 53), (0, 36, 85), (1, 21, 58), (1, 20, 43), (0, 20, 87), (1, 19, 56), (0, 24, 71), (0, 19, 39), (1, 18, 77), (1, 42, 18), (1, 36, 51), (0, 23, 53), (1, 22, 52), (1, 24, 37), (1, 25, 99), (0, 35, 18), (0, 29, 33), (0, 24, 60), (1, 24, 81), (0, 41, 13), (0, 40, 30), (1, 28, 69), (1, 20, 52), (0, 36, 10), (0, 42, 29), (1, 42, 64), (0, 21, 54), (1, 41, 100), (0, 19, 61), (0, 28, 16), (0, 28, 55), (0, 24, 36), (1, 23, 68), (0, 40, 8), (1, 24, 46), (1, 30, 17), (0, 26, 107), (1, 31, 12), (0, 39, 17), (1, 38, 42), (0, 31, 23), (1, 21, 52), (1, 29, 18), (0, 30, 19), (1, 30, 17), (1, 29, 8), (0, 27, 28), (1, 42, 25), (0, 21, 41), (0, 28, 24), (1, 33, 8), (0, 32, 5), (1, 18, 17), (0, 31, 20)]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "-ybOTy2K38JP",
"colab_type": "code",
"colab": {}
},
"source": [
"x = features\n",
"y = purchase_encoded"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "m63cqqwe3vg3",
"colab_type": "code",
"colab": {}
},
"source": [
"from sklearn.model_selection import train_test_split \n",
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1) "
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "4bH-zYvB3f82",
"colab_type": "text"
},
"source": [
"**Purchase**\n",
"0 - No\n",
"1 - Yes"
]
},
{
"cell_type": "code",
"metadata": {
"id": "UXcmw_00zycc",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "e2560cdc-2e78-4e81-f55a-ef790a55152e"
},
"source": [
"from sklearn.naive_bayes import GaussianNB\n",
"\n",
"model = GaussianNB()\n",
"\n",
"model.fit(x_train, y_train)\n",
"\n",
"purchase_predicted = model.predict(x_test)\n",
"\n",
"from sklearn import metrics\n",
"\n",
"print(\"Model accuracy(in %):\", metrics.accuracy_score(y_test, purchase_predicted)*100)"
],
"execution_count": 77,
"outputs": [
{
"output_type": "stream",
"text": [
"Model accuracy(in %): 87.5\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "DD3P0tqa-0K_",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 54
},
"outputId": "e17c6642-ee9d-40b4-83d0-5e862016f42a"
},
"source": [
"print(x_test)"
],
"execution_count": 78,
"outputs": [
{
"output_type": "stream",
"text": [
"[(1, 18, 17), (0, 21, 43), (1, 18, 92), (1, 21, 95), (0, 8, 92), (0, 20, 47), (0, 2, 20), (1, 31, 70), (1, 13, 3), (1, 30, 108), (0, 16, 53), (0, 21, 54), (1, 17, 53), (0, 30, 101), (0, 35, 63), (1, 38, 102), (1, 42, 64), (1, 9, 40), (0, 10, 68), (1, 42, 80), (0, 22, 56), (0, 32, 69), (0, 26, 107), (1, 29, 26), (1, 27, 10), (1, 8, 0), (0, 40, 30), (1, 31, 55), (0, 35, 18), (0, 34, 88), (1, 21, 25), (1, 1, 57), (0, 0, 67), (1, 39, 55), (0, 9, 65), (1, 12, 61), (1, 4, 3), (0, 14, 67), (0, 32, 5), (1, 1, 9)]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Ba8oKhN9-30b",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "b14b0540-cc86-4b4d-e4bf-24e05020354e"
},
"source": [
"print(purchase_predicted)"
],
"execution_count": 79,
"outputs": [
{
"output_type": "stream",
"text": [
"[0 0 1 1 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 1 0 1 1 1 0 0 1 1 1 1 0 0 0 1 0 0 0\n",
" 0 1 0]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "NWNHZDVt5jAJ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "3a149903-b6b7-4c71-f18c-c8d67f28b473"
},
"source": [
"test = model.predict([(1, 40, 92)])\n",
"print(\"Purchased: {}\".format(test))"
],
"execution_count": 80,
"outputs": [
{
"output_type": "stream",
"text": [
"Purchased: [1]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "elx8Ybro-hmU",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}