The Algorithms logo
The Algorithms
AboutDonate

Naive Bayes Classification

H
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Naive Bayes Classification",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "N9Go17xQ0uDv",
        "colab_type": "text"
      },
      "source": [
        "Naive Bayes Classification"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "f3LF08QLrjXg",
        "colab_type": "code",
        "outputId": "fcd38855-451f-4b73-ff98-5475bdcc2fc8",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        }
      },
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "\n",
        "data = pd.read_csv('./data/Customer_Behaviour.csv')\n",
        "data.head(400)"
      ],
      "execution_count": 47,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>User ID</th>\n",
              "      <th>Gender</th>\n",
              "      <th>Age</th>\n",
              "      <th>EstimatedSalary</th>\n",
              "      <th>Purchased</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>15624510</td>\n",
              "      <td>Male</td>\n",
              "      <td>19</td>\n",
              "      <td>19000</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>15810944</td>\n",
              "      <td>Male</td>\n",
              "      <td>35</td>\n",
              "      <td>20000</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>15668575</td>\n",
              "      <td>Female</td>\n",
              "      <td>26</td>\n",
              "      <td>43000</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>15603246</td>\n",
              "      <td>Female</td>\n",
              "      <td>27</td>\n",
              "      <td>57000</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>15804002</td>\n",
              "      <td>Male</td>\n",
              "      <td>19</td>\n",
              "      <td>76000</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>395</th>\n",
              "      <td>15691863</td>\n",
              "      <td>Female</td>\n",
              "      <td>46</td>\n",
              "      <td>41000</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>396</th>\n",
              "      <td>15706071</td>\n",
              "      <td>Male</td>\n",
              "      <td>51</td>\n",
              "      <td>23000</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>397</th>\n",
              "      <td>15654296</td>\n",
              "      <td>Female</td>\n",
              "      <td>50</td>\n",
              "      <td>20000</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>398</th>\n",
              "      <td>15755018</td>\n",
              "      <td>Male</td>\n",
              "      <td>36</td>\n",
              "      <td>33000</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>399</th>\n",
              "      <td>15594041</td>\n",
              "      <td>Female</td>\n",
              "      <td>49</td>\n",
              "      <td>36000</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>400 rows × 5 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "      User ID  Gender  Age  EstimatedSalary  Purchased\n",
              "0    15624510    Male   19            19000          0\n",
              "1    15810944    Male   35            20000          0\n",
              "2    15668575  Female   26            43000          0\n",
              "3    15603246  Female   27            57000          0\n",
              "4    15804002    Male   19            76000          0\n",
              "..        ...     ...  ...              ...        ...\n",
              "395  15691863  Female   46            41000          1\n",
              "396  15706071    Male   51            23000          1\n",
              "397  15654296  Female   50            20000          1\n",
              "398  15755018    Male   36            33000          0\n",
              "399  15594041  Female   49            36000          1\n",
              "\n",
              "[400 rows x 5 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 47
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Urr8Uoy72bbO",
        "colab_type": "text"
      },
      "source": [
        " **Gender**\n",
        " 0 - Female\n",
        " 1 - Male"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KawwwlAPyBQo",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from sklearn import preprocessing\n",
        "\n",
        "label_encoder = preprocessing.LabelEncoder()\n",
        "\n",
        "gender_encoded = label_encoder.fit_transform(data[\"Gender\"])\n",
        "age_encoded = label_encoder.fit_transform(data[\"Age\"])\n",
        "income_encoded = label_encoder.fit_transform(data[\"EstimatedSalary\"])\n",
        "purchase_encoded = label_encoder.fit_transform(data[\"Purchased\"])"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1uXA6V0-y6kL",
        "colab_type": "code",
        "outputId": "50b834ac-7024-4454-9ef4-46f1c24b7462",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        }
      },
      "source": [
        "features = list(zip(gender_encoded, age_encoded, income_encoded))\n",
        "print(features)"
      ],
      "execution_count": 60,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[(1, 1, 4), (1, 17, 5), (0, 8, 26), (0, 9, 39), (1, 1, 57), (1, 9, 40), (0, 9, 65), (0, 14, 116), (1, 7, 17), (0, 17, 47), (0, 8, 61), (0, 8, 35), (1, 2, 67), (1, 14, 3), (1, 0, 63), (1, 11, 61), (1, 29, 9), (1, 27, 10), (1, 28, 12), (0, 30, 13), (1, 27, 7), (0, 29, 32), (1, 30, 24), (0, 27, 7), (1, 28, 8), (1, 29, 5), (1, 31, 12), (0, 29, 14), (1, 11, 26), (1, 13, 3), (1, 13, 55), (0, 9, 105), (0, 3, 1), (0, 10, 27), (1, 9, 71), (1, 17, 11), (0, 15, 12), (1, 12, 32), (0, 8, 53), (0, 9, 15), (0, 9, 2), (0, 15, 34), (1, 17, 85), (1, 12, 0), (0, 10, 65), (1, 5, 5), (1, 7, 60), (0, 9, 37), (1, 12, 104), (0, 13, 70), (0, 6, 16), (0, 0, 27), (0, 11, 64), (0, 17, 8), (0, 9, 40), (0, 6, 38), (0, 5, 31), (1, 10, 60), (1, 4, 3), (0, 14, 91), (1, 9, 5), (1, 7, 68), (0, 5, 48), (1, 14, 94), (0, 41, 64), (1, 6, 40), (1, 6, 4), (0, 5, 63), (0, 4, 45), (0, 13, 49), (1, 7, 61), (0, 6, 11), (0, 2, 8), (0, 15, 87), (1, 14, 3), (1, 16, 86), (1, 0, 35), (0, 4, 11), (0, 10, 68), (0, 8, 2), (1, 12, 61), (1, 21, 25), (1, 2, 32), (1, 17, 69), (0, 12, 44), (0, 13, 92), (1, 6, 38), (0, 10, 66), (1, 8, 62), (1, 17, 33), (1, 4, 62), (0, 12, 90), (1, 8, 0), (0, 11, 12), (0, 11, 64), (0, 17, 27), (0, 17, 9), (1, 10, 96), (1, 17, 54), (0, 10, 21), (1, 9, 69), (1, 10, 41), (0, 14, 67), (0, 15, 115), (0, 1, 6), (1, 3, 53), (0, 8, 19), (1, 9, 70), (1, 8, 67), (0, 20, 61), (0, 21, 52), (0, 19, 52), (1, 20, 43), (1, 19, 38), (1, 24, 61), (1, 22, 39), (1, 17, 56), (1, 18, 35), (1, 22, 41), (1, 23, 41), (0, 18, 56), (1, 19, 53), (0, 22, 56), (1, 17, 36), (0, 23, 34), (0, 21, 43), (1, 24, 47), (1, 8, 16), (1, 12, 2), (0, 8, 65), (1, 13, 40), (1, 15, 15), (1, 12, 68), (0, 3, 49), (0, 10, 38), (1, 5, 45), (0, 2, 63), (1, 12, 84), (0, 10, 41), (1, 1, 9), (1, 1, 66), (0, 0, 49), (1, 17, 41), (1, 12, 70), (0, 16, 9), (0, 6, 70), (0, 9, 75), (0, 23, 14), (1, 11, 43), (1, 2, 55), (0, 8, 0), (1, 23, 28), (1, 13, 57), (0, 18, 33), (1, 22, 30), (0, 13, 0), (1, 28, 41), (1, 11, 56), (1, 8, 14), (0, 14, 104), (1, 14, 78), (1, 7, 71), (0, 19, 17), (1, 17, 22), (0, 15, 50), (0, 0, 67), (0, 4, 38), (0, 17, 52), (1, 11, 114), (0, 11, 30), (1, 3, 69), (1, 16, 89), (0, 8, 92), (0, 16, 26), (0, 16, 53), (0, 5, 12), (0, 17, 30), (1, 7, 7), (1, 6, 8), (0, 13, 18), (1, 8, 1), (0, 13, 52), (0, 14, 91), (1, 15, 26), (0, 15, 42), (1, 13, 48), (0, 2, 63), (0, 15, 24), (1, 17, 53), (1, 10, 16), (1, 6, 65), (0, 1, 10), (1, 11, 26), (1, 1, 51), (1, 10, 70), (1, 16, 26), (0, 12, 60), (0, 2, 20), (1, 8, 61), (1, 17, 7), (1, 17, 23), (1, 31, 55), (0, 21, 103), (0, 23, 52), (0, 40, 79), (0, 29, 30), (0, 37, 100), (0, 34, 88), (0, 22, 109), (0, 28, 7), (0, 30, 75), (1, 34, 116), (0, 41, 25), (1, 17, 40), (1, 29, 26), (0, 42, 85), (1, 31, 47), (1, 22, 59), (0, 28, 75), (1, 41, 110), (0, 23, 61), (1, 17, 72), (1, 19, 111), (1, 42, 80), (0, 17, 42), (1, 19, 36), (0, 18, 98), (1, 38, 102), (0, 22, 53), (0, 24, 61), (0, 17, 113), (1, 21, 25), (1, 22, 84), (1, 31, 67), (0, 20, 86), (1, 28, 60), (1, 22, 39), (0, 19, 61), (0, 28, 63), (0, 35, 110), (1, 24, 115), (1, 20, 41), (0, 32, 69), (0, 38, 81), (0, 23, 53), (0, 33, 112), (0, 17, 33), (0, 39, 95), (1, 23, 35), (0, 17, 76), (0, 26, 23), (1, 19, 35), (0, 30, 103), (0, 19, 112), (0, 32, 27), (0, 34, 71), (0, 23, 53), (1, 22, 39), (0, 40, 74), (0, 27, 101), (0, 17, 58), (1, 18, 111), (0, 37, 97), (0, 17, 53), (1, 30, 71), (0, 24, 85), (1, 22, 56), (1, 19, 55), (0, 29, 111), (1, 22, 43), (0, 25, 102), (0, 41, 57), (1, 42, 25), (1, 21, 83), (0, 39, 10), (1, 39, 55), (1, 20, 52), (1, 31, 69), (0, 34, 22), (0, 32, 20), (0, 41, 69), (1, 17, 43), (1, 19, 51), (0, 34, 6), (1, 30, 108), (0, 19, 73), (0, 19, 44), (0, 30, 106), (1, 23, 60), (0, 19, 59), (1, 21, 103), (1, 31, 70), (1, 37, 23), (1, 19, 58), (0, 17, 39), (0, 18, 45), (1, 24, 54), (0, 25, 86), (1, 27, 60), (1, 28, 91), (0, 40, 22), (1, 30, 55), (0, 19, 105), (1, 19, 60), (0, 22, 42), (1, 24, 37), (0, 33, 103), (0, 29, 87), (1, 18, 97), (0, 20, 33), (0, 24, 51), (1, 21, 75), (0, 20, 33), (0, 31, 108), (0, 21, 60), (0, 21, 56), (0, 36, 81), (1, 17, 38), (1, 27, 16), (1, 18, 42), (0, 34, 106), (0, 35, 63), (1, 23, 35), (0, 30, 14), (0, 30, 101), (0, 23, 42), (1, 23, 53), (0, 24, 56), (1, 18, 92), (0, 29, 84), (1, 20, 34), (0, 30, 93), (1, 24, 47), (1, 22, 47), (1, 39, 42), (0, 18, 37), (1, 40, 111), (1, 17, 60), (0, 20, 38), (1, 21, 95), (0, 35, 81), (1, 17, 56), (0, 20, 47), (0, 29, 34), (1, 29, 82), (0, 23, 45), (1, 35, 53), (0, 36, 85), (1, 21, 58), (1, 20, 43), (0, 20, 87), (1, 19, 56), (0, 24, 71), (0, 19, 39), (1, 18, 77), (1, 42, 18), (1, 36, 51), (0, 23, 53), (1, 22, 52), (1, 24, 37), (1, 25, 99), (0, 35, 18), (0, 29, 33), (0, 24, 60), (1, 24, 81), (0, 41, 13), (0, 40, 30), (1, 28, 69), (1, 20, 52), (0, 36, 10), (0, 42, 29), (1, 42, 64), (0, 21, 54), (1, 41, 100), (0, 19, 61), (0, 28, 16), (0, 28, 55), (0, 24, 36), (1, 23, 68), (0, 40, 8), (1, 24, 46), (1, 30, 17), (0, 26, 107), (1, 31, 12), (0, 39, 17), (1, 38, 42), (0, 31, 23), (1, 21, 52), (1, 29, 18), (0, 30, 19), (1, 30, 17), (1, 29, 8), (0, 27, 28), (1, 42, 25), (0, 21, 41), (0, 28, 24), (1, 33, 8), (0, 32, 5), (1, 18, 17), (0, 31, 20)]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-ybOTy2K38JP",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "x = features\n",
        "y = purchase_encoded"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "m63cqqwe3vg3",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from sklearn.model_selection import train_test_split \n",
        "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1) "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4bH-zYvB3f82",
        "colab_type": "text"
      },
      "source": [
        "**Purchase**\n",
        "0 - No\n",
        "1 - Yes"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UXcmw_00zycc",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "e2560cdc-2e78-4e81-f55a-ef790a55152e"
      },
      "source": [
        "from sklearn.naive_bayes import GaussianNB\n",
        "\n",
        "model = GaussianNB()\n",
        "\n",
        "model.fit(x_train, y_train)\n",
        "\n",
        "purchase_predicted = model.predict(x_test)\n",
        "\n",
        "from sklearn import metrics\n",
        "\n",
        "print(\"Model accuracy(in %):\", metrics.accuracy_score(y_test, purchase_predicted)*100)"
      ],
      "execution_count": 77,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Model accuracy(in %): 87.5\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "DD3P0tqa-0K_",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        },
        "outputId": "e17c6642-ee9d-40b4-83d0-5e862016f42a"
      },
      "source": [
        "print(x_test)"
      ],
      "execution_count": 78,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[(1, 18, 17), (0, 21, 43), (1, 18, 92), (1, 21, 95), (0, 8, 92), (0, 20, 47), (0, 2, 20), (1, 31, 70), (1, 13, 3), (1, 30, 108), (0, 16, 53), (0, 21, 54), (1, 17, 53), (0, 30, 101), (0, 35, 63), (1, 38, 102), (1, 42, 64), (1, 9, 40), (0, 10, 68), (1, 42, 80), (0, 22, 56), (0, 32, 69), (0, 26, 107), (1, 29, 26), (1, 27, 10), (1, 8, 0), (0, 40, 30), (1, 31, 55), (0, 35, 18), (0, 34, 88), (1, 21, 25), (1, 1, 57), (0, 0, 67), (1, 39, 55), (0, 9, 65), (1, 12, 61), (1, 4, 3), (0, 14, 67), (0, 32, 5), (1, 1, 9)]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Ba8oKhN9-30b",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        },
        "outputId": "b14b0540-cc86-4b4d-e4bf-24e05020354e"
      },
      "source": [
        "print(purchase_predicted)"
      ],
      "execution_count": 79,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[0 0 1 1 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 1 0 1 1 1 0 0 1 1 1 1 0 0 0 1 0 0 0\n",
            " 0 1 0]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NWNHZDVt5jAJ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "3a149903-b6b7-4c71-f18c-c8d67f28b473"
      },
      "source": [
        "test = model.predict([(1, 40, 92)])\n",
        "print(\"Purchased: {}\".format(test))"
      ],
      "execution_count": 80,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Purchased: [1]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "elx8Ybro-hmU",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}