diff --git a/Assignment Colab/.ipynb_checkpoints/Talent_Paul-checkpoint.ipynb b/Assignment Colab/.ipynb_checkpoints/Talent_Paul-checkpoint.ipynb new file mode 100644 index 0000000..9f51018 --- /dev/null +++ b/Assignment Colab/.ipynb_checkpoints/Talent_Paul-checkpoint.ipynb @@ -0,0 +1,950 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Talent Paul" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing packages to use" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/kaggle/input/ace-class-assignment/Test.csv\n", + "/kaggle/input/ace-class-assignment/AMP_TrainSet.csv\n" + ] + } + ], + "source": [ + "\n", + "\n", + "import numpy as np # linear algebra\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import matplotlib.pyplot as plt # Plotting\n", + "\n", + "import os\n", + "for dirname, _, filenames in os.walk('/kaggle/input'):\n", + " for filename in filenames:\n", + " print(os.path.join(dirname, filename))\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### #Dealing with errors that may arise" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reading the datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "### Loading the data\n", + "Train = pd.read_csv(\"../input/ace-class-assignment/AMP_TrainSet.csv\")\n", + "Test = pd.read_csv(\"../input/ace-class-assignment/Test.csv\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Looking at the first five rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Train.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## # checking the dimensions of your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#This returns the number of rows and columns\n", + "\n", + "Train.shape\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data has 3038 rows and 12 columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Looking at missing values in the dataset\n", + "Train.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The above output shows there are no missing values in the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##Exploring the data and it's data types\n", + "Train.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data types of Train dataset are float and int" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Descriptive statistics\n", + "### The statistics summary show the counts for each attribtute, the mean, standard deviation, the minimum value for numerical atrributes, the 25th, 50th and 75th percentile for each numeric attribute andthe maximum value. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#\n", + "Train.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### AS_DAYM780201 and FULL_DAYM780201 have the highest mean and highest maximum. FULL_OOBM850104 has a negative mean" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Correlation\n", + "> Correlation helps us determine how the different attributes relate with each other. Itshows which attributes to select especially in the presence of highly correlated attributes in which case one of the two attributes would be sufficient." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Looking at the correlation of \"CLASS\" with other attributes\n", + "#Train.corr(method='pearson')\n", + "\n", + "Train.corr()['CLASS']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualising Correlation of different attributes with the CLASS attribute" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAFoCAYAAAC/l/tEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xm8JFV9/vHPwyiKIAIyIAIjqKCCQdAb3BcQDPiTTZTFaNAgk8R9iRH3BFeCJEZFDSoRUUFEgdGAoEDcUQZBZEBkxIURFFzABRWR5/fHqZ7p6ek7W5+qO7f7eb9e9zXd1X3rW7en+ltVp875HtkmIiImy3ozvQEREdG9JP+IiAmU5B8RMYGS/CMiJlCSf0TEBEryj4iYQEn+ERETKMk/ImICJflHREygu8z0Bkxn880393bbbTfTmxERMatceumlv7A9d1XvW2eT/3bbbcfChQtnejMiImYVST9enfel2SciYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJXkL+kkSTdJunKa1yXp3ZIWS7pC0sNrxI2IiLVT68z/I8A+K3l9X2CH5mc+8P5KcSMiYi1USf62vwz8aiVvOQD4qIuLgU0kbVUjdkRErLmu2vy3Bq7ve76kWbYcSfMlLZS08Oabb+5o0yIiZoi0dj8VdJX8h23tCjPH2z7R9pTtqblzVzk6OSIi1lJXyX8JsG3f822AGzqKHRERA7pK/guAv2t6/TwKuNX2jR3FjoiIAVUKu0k6FXgSsLmkJcCbgLsC2P4AcA7wVGAxcBvwvBpxIyJi7VRJ/rYPX8XrBl5YI1ZERIwuI3wjIiZQkn9ExARK8o+ImEBJ/hEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEBJ/hEREyjJPyJiAiX5R0RMoCT/iIgJVCX5S9pH0jWSFks6esjr8yRdJOkySVdIemqNuBERsXZGTv6S5gAnAPsCOwGHS9pp4G2vB063vRtwGPC+UeNGRMTaq3Hmvzuw2PZ1tm8HTgMOGHiPgY2bx/cik7dHRMyoGtM4bg1c3/d8CfDIgff8K3C+pBcDGwJ7VYgbERFrqcaZv4Ys88Dzw4GP2N6GMpH7KZJWiC1pvqSFkhbefPPNFTYtIiKGqZH8lwDb9j3fhhWbdY4ETgew/Q3g7sDmgyuyfaLtKdtTc+fOrbBpERExTI3kfwmwg6TtJa1PuaG7YOA9PwGeDCDpIZTkn1P7iIgZMnLyt30H8CLgPOBqSq+eRZKOkbR/87ZXAkdJ+g5wKvBc24NNQxER0ZEaN3yxfQ5wzsCyN/Y9vgp4bI1YERExuozwjYiYQEn+ERETKMk/ImICJflHREygJP+IiAmU5B8RMYGS/CMiJlCSf0TEBKoyyCsiZpCG1VZcDRlkP9Fy5h8RMYGS/CMiJlCSf0TEBEryj4iYQEn+ERETKMk/ImICJflHREygKslf0j6SrpG0WNLR07znEElXSVok6RM14kZExNoZeZCXpDnACcDelMncL5G0oJm9q/eeHYDXAI+1/WtJW4waNyIi1l6NM//dgcW2r7N9O3AacMDAe44CTrD9awDbN1WIGxERa6lG8t8auL7v+ZJmWb8dgR0lfU3SxZL2qRA3IiLWUo3aPsMKiwwWDbkLsAPwJGAb4CuSHmr7luVWJM0H5gPMmzevwqZFRMQwNc78lwDb9j3fBrhhyHvOtv1n2z8ErqEcDJZj+0TbU7an5s6dW2HTIiJimBrJ/xJgB0nbS1ofOAxYMPCes4A9ACRtTmkGuq5C7IiIWAsjJ3/bdwAvAs4DrgZOt71I0jGS9m/edh7wS0lXARcBr7L9y1FjR0TE2pHX0ZreU1NTXrhw4UxvRsS6L/X8Z68W/u8kXWp7alWryAjfiIgJlOQfETGBkvwjIibQ7JzDN22cEREjyZl/RMQESvKPiJhASf4RERMoyT8iYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJJ/RMQESvKPiJhASf4RERMoyT8iYgJVSf6S9pF0jaTFko5eyfueIcmSVjnRQEREtGfk5C9pDnACsC+wE3C4pJ2GvO+ewEuAb44aMyIiRlPjzH93YLHt62zfDpwGHDDkfW8G/h34Y4WYERExghrJf2vg+r7nS5plS0naDdjW9udWtiJJ8yUtlLTw5ptvrrBpERExTI3kP2xmlaWzpkhaD/hP4JWrWpHtE21P2Z6aO3duhU2LiIhhaiT/JcC2fc+3AW7oe35P4KHA/0n6EfAoYEFu+kZEzJwayf8SYAdJ20taHzgMWNB70fattje3vZ3t7YCLgf1tL6wQOyIi1sLIyd/2HcCLgPOAq4HTbS+SdIyk/Uddf0RE1FdlAnfb5wDnDCx74zTvfVKNmBERsfYywjciYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJJ/RMQESvKPiJhASf4RERMoyT8iYgIl+UdETKAk/4iICVSltk9ExFjQsOlJVoO96vesY5L8I2qboAQSs1eafSIiJlCSf0TEBKqS/CXtI+kaSYslHT3k9VdIukrSFZIukHS/GnEjImLtjJz8Jc0BTgD2BXYCDpe008DbLgOmbO8CnAH8+6hxIyJi7dU4898dWGz7Otu3A6cBB/S/wfZFtm9rnl5MmeQ9IiJmSI3kvzVwfd/zJc2y6RwJnFshbkRErKUaXT2H9Wsb2mdN0rOBKeCJ07w+H5gPMG/evAqbFhERw9Q4818CbNv3fBvghsE3SdoLeB2wv+0/DVuR7RNtT9memjt3boVNi4iIYWok/0uAHSRtL2l94DBgQf8bJO0G/Dcl8d9UIWZERIxg5ORv+w7gRcB5wNXA6bYXSTpG0v7N244DNgI+JelySQumWV1ERHSgSnkH2+cA5wwse2Pf471qxImIiDoywjciYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJJ/RMQESvKPiJhASf4RERMoyT8iYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJJ/RMQESvKPiJhASf4REROoSvKXtI+kayQtlnT0kNfvJumTzevflLRdjbgRMQGkNf+JVRo5+UuaA5wA7AvsBBwuaaeBtx0J/Nr2A4H/BI4dNW5ERKy9Gmf+uwOLbV9n+3bgNOCAgfccAJzcPD4DeLKUw3NExEypkfy3Bq7ve76kWTb0Pc2E77cC964QOyIi1kKNCdyHncF7Ld6DpPnAfIB58+ZNH9Er/Gq71vYiZW23M/Fmd7yu989x/z50+feN+/9dnxpn/kuAbfuebwPcMN17JN0FuBfwq8EV2T7R9pTtqblz51bYtIiIGKZG8r8E2EHS9pLWBw4DFgy8ZwFwRPP4GcCF9gwe8iIiJtzIzT6275D0IuA8YA5wku1Fko4BFtpeAHwYOEXSYsoZ/2Gjxo2IiLVXo80f2+cA5wwse2Pf4z8Cz6wRKyIiRpcRvhEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEBJ/hEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEAjJX9Jm0n6gqRrm383HfKeXSV9Q9IiSVdIOnSUmBERMbpRz/yPBi6wvQNwQfN80G3A39neGdgHeJekTUaMGxERIxg1+R8AnNw8Phk4cPANtr9v+9rm8Q3ATcDcEeNGRMQIRk3+W9q+EaD5d4uVvVnS7sD6wA9GjBsRESNY5QTukr4I3GfIS69bk0CStgJOAY6wfec075kPzAeYN2/emqw+IiLWwCqTv+29pntN0s8lbWX7xia53zTN+zYG/hd4ve2LVxLrROBEgKmpKa9q2yIiYu2M2uyzADiieXwEcPbgGyStD5wJfNT2p0aMFxERFYya/N8B7C3pWmDv5jmSpiR9qHnPIcATgOdKurz52XXEuBERMYJVNvusjO1fAk8esnwh8Pzm8ceAj40SJyIi6soI34iICZTkHxExgUZq9olYK05HroiZljP/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEBJ/hEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEAjJX9Jm0n6gqRrm383Xcl7N5b0U0nvHSVmRESMbtQz/6OBC2zvAFzQPJ/Om4EvjRgvIiIqGDX5HwCc3Dw+GThw2JskPQLYEjh/xHgREVHBqMl/S9s3AjT/bjH4BknrAccDr1rVyiTNl7RQ0sKbb755xE2LiIjprHImL0lfBO4z5KXXrWaMFwDn2L5e0krfaPtE4ESAqampTPcUEdGSVSZ/23tN95qkn0vayvaNkrYCbhrytkcDj5f0AmAjYH1Jv7O9svsDERHRolHn8F0AHAG8o/n37ME32P7b3mNJzwWmkvgjImbWqG3+7wD2lnQtsHfzHElTkj406sZFREQ7ZK+bTetTU1NeuHDhTG9GsYp7FdNa28923ON1bdz/vog+ki61PbWq92WEb0TEBBq1zT9i3Zcz+IgV5Mw/ImICJflHREygJP+IiAmU5B8RMYGS/CMiJlCSf0TEBEryj4iYQEn+ERETKMk/ImICJflHREygJP+IiAmU2j6R2jcREyhn/hERE2ik5C9pM0lfkHRt8++m07xvnqTzJV0t6SpJ240SNyIiRjPqmf/RwAW2dwAuaJ4P81HgONsPAXZn+Fy/ERHRkVGT/wHAyc3jk4EDB98gaSfgLra/AGD7d7ZvGzHueLPX7iciYjWNmvy3tH0jQPPvFkPesyNwi6TPSLpM0nGS5owYNyIiRrDK3j6SvgjcZ8hLr1uDGI8HdgN+AnwSeC7w4SGx5gPzAebNm7eaq+9AzqojYsysMvnb3mu61yT9XNJWtm+UtBXD2/KXAJfZvq75nbOARzEk+ds+ETgRygTuq/cnRETEmhq12WcBcETz+Ajg7CHvuQTYVNLc5vmewFUjxo2IiBGMmvzfAewt6Vpg7+Y5kqYkfQjA9l+AfwYukPRdQMAHR4wbEREjGGmEr+1fAk8esnwh8Py+518AdhklVkRE1JMRvhEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGB5HV09Kqkm4Efr8Wvbg78ovLmJF7iJV7izZa/7X62567qTets8l9bkhbankq8xEu8xJutsbqIl2afiIgJlOQfETGBxjH5n5h4iZd4iTfLY7Ueb+za/CMiYtXG8cw/IiJWIck/ImICJflHREygsUn+Kp4t6Y3N83mSdp/p7RqVpE1meht6JO0909tQm6T9Z3obapJ0yuosa3kbxm4/6Zqku0raTdKwedGrGJvkD7wPeDRwePP8t8AJbQWT9Lb+xCxpU0lvaSHULyR9UdKR68CBYIWpN9vSO4hXXufTB34OBk7sPW8h3uYDz58t6d2S5ktS7XiNnQdizgEe0VKs6VTfTyT9laSLJV0v6URJm/a99q3a8abZhu+3uO4PSNq5eXwv4DvAR4HLJB2+0l9e25jj0ttH0rdtP1zSZbZ3a5Z9x/bDWoq3NM7gNlSO813gNZSD2j7AV4FTgbNt/6FmrCbeguleAva0vWHtmNNsx09sz6u8zjuAz1Pmmu4l32cAZwC2/feV4y3dHyS9Hng88AngacAS2y+vGOs1wGuBDYDbeouB24ETbb+mVqwmXqf7iaSvAm8BLqZMFPU8YH/bPxj2XawQ77dALzn29pV7UD5b2964crxFtnvJ/2XAk2wfKOk+wLm1/z4YcSavdcyfm7McAzRzBt/ZYrw5ku5m+09NvA2Au7UQ58+2Pwd8romxH3AYcIKk82w/q3K8xwPPBn43sFxA1WY0Sb+Z7iVKEqvt0ZSpRi8BPmDbkp5k+3ktxIJlSQPg6cDjbf9e0ieAb9cMZPvtwNslvb12op9GZ/tJYyPbn28ev1PSpcDnJT2HZUm6po8A9wJeZfvnAJJ+aHv7FmJBOUj37A18CsD2z9q6SByn5P9u4ExgC0lvpZzRvb7FeB+jzEv8P5Sd7++Bk1uIs/R/vjnTPx04vbk0PLCFeBcDt9n+0gobIl1TOdYtwF/3vlwDsa6vHAvblzTt0S8GLpT0atpJHD0bSNqN0rw6x/bvm+34s6S/tBHQ9mskbQ3cj77vt+0vVw7V5X7SrFb3sn0rgO2Lmma7TwOb1Q5m+8WSHgGcKuks4L20u6/cIulpwE+BxwJHAki6C+2cCI1Psw+ApAdT5hQWcIHtq1uOtw+wVxPvfNvntRDjn22/s/Z61wXNPZIFtldos5V0rO1Xtxh7a+A/gSnb928pxkUDi55l+0ZJ9wbOa6Nol6R3UK4MrwJ6BxjbntU3tiU9C7jO9sUDy+cBb7B9VEtx1wNeBDwTeIDt+7YUZ0fKCex9gHfZ/kiz/G+Ap9h+ZfWY45L8JQ07+v/W9p9biDWH8uXdq/a61xWStgS2ppzt3DDs7DzWTrP/3M32bat885qv+xpgl15zZNsmZT+RtBWwm+1zZnpbahmnZp9vA9sCv6aciW8C3CjpJuAo25fWCmT7L5Ju678MbUvTvPMaShNPr0b3TcDZwDts31I53m7A+yntnT9tFm8j6RbgBbartlU3f98+9CUQyoG16t/VF6uzz7Iv7hRl37wDuNb291h2U7a264C7Aq0m/673k1Vsy4m257ew3gcDB9C3bzbt/tVbFCQdBfyf7WubnmAnAQcDPwKOsH1Z7ZjjlPw/D5zZa3qR9BRKUjmd0g30kZXj/RH4rqQvAL/vLbT9kspxTgcupNz9/xlA0wPgCMpNodp9qv8H+Afb3+xfKOlRzWvVek9J+jvgTcD5LEsgewBvk/Rvtj9aK1aj089S0hOB4yn3Nh4BfA3YVNKfgefYrn5fg3JQuVzSBfQdAFrYLzvbT5r1TteuL+CpNWM18V5N6WF3GtBrltyGcg/gNNvvqBzypZSbzDRxdwG2B3ajNAc9vnK8sWr2WWHig94ySZfb3rVyvCOGLbdd9aavpGtsP2hNXxsh3rW2d5jmtcW2H1gx1jXAIwfPuJs+3N+0vWOtWL14HX+Wl1Haa2+WtD3wH7YPam46v8r2U2rGa2J2tV92tp806/wLZWa//q4vbp5vbXv9yvG+D+w82GwsaX1g0XR/+wjxluaopjfYN23/V/O8ehdyGK8z/181R+vTmueHAr9u2lerd/m0fXLT9XKe7TZ6N/T8WNK/ACf3dTnbEngu0MaZ47mS/pcywKS3/m2Bv6NcXdUkhveguJPlv+S1dP1ZzrF9c/P4J5QeONj+gqR3tRCvy/2yy/0ESnPWk23/ZPCFNnqGUfbB+7LiVLJb0U4X8jub+wq/pnRaeWvfa6309hmn5P8sShPCWc3zrzbL5gCH1A4maT/gncD6wPaSdgWOaaFXxaHA0cCXtGyo98+BBbTwd9l+iaR9WdbWKWAJcEILN7veCnxb0vksSyDzKM0vb64cCzr+LIGFkj4MXED5PP8PQNI9KPtldV3tlx3vJwDvAjalHEQH/XsL8V5G6cp9Lcvvmw+k9P6p7Y3AQsp+scD2IljadHhdC/HGo9mnObt/h+1XdRjzUmBPyk2a3oji79r+q662YRw0TTx/w/IJ5Dzbv57RDatA0l2Bo4CdKMP1T2o6C2wAbGF78KyyRszsl5U03Tx3Z/l98xLbrYzRaPr037N/3++dKNj+be14Y3Hm33yhuq5fcoftW7X86LtOj6SSnmf7fyqvcw5l+Pw2lGHlX+977fW2q9Yvanb001b5xpa18Vk27cXvG7L8D6zYnFBLJ/tl1/vJKrZlb9tfqL1e23dSBrMNxtvI9uDI5hrx7qA0+9D0+NmD0nqxH7Bl7XjjVNjtMkkLJD1HfcW7Wox3pcrAkzmSdpD0HuDrq/qlyv6thXX+N/BE4JfAeyT9R99rbX6ey1GpadSlNj7LaUk6t6VVd7VfrhP7SaOzgoONq9pasaRHSvovysnBAuArwINbiTUOzT4AKmUWBtmVi3X1xbsH8Dqg12PjPOAttv9YOc4V070E7Gi7aj0hSVfY3qV5fBfKmevmlO5nF7tigamVHJxFqb0zd5rX1zZe15/ldD00BHzO9lY14zUx+/dLUfbLN7exX3a1nzQxui4k94qVxHud7aolJVRK0hxCuadxKqVUzUK3V0tofJJ/l1SKxt0PWNzWwKC+WD+ntIkPtoEL+Hrt4eaSvmf7wQPL3thswxY1u7g1/d0/zvBmiWfYvmetWE28rj/LvwBfYnjPpUfZbqUXRxe63E+adf+a6QvJfdJ21WYRSX8EjqMMzBv0cttVy6tLuhm4hnJj+3O2/yjpOrdUegTGpM0fQNLdKcWQdgbu3lte+8xf0vOBtwE/oPSmmG97urOSGj5HqWh4+ZBt+b8W4i2UtI+XVVDE9jGSbqCM6KzpCuCdtq8cfEFSG6Uzuv4sr6YMhLp2SLw2uif2RhO/FtiO5Qu77VI5VJf7CXRfSO7bwFkeUhmgyQG13YdytXY48C6VulAbSLpLcy+gurE585f0KeB7lBskxwB/C1xt+6WV41wJ7NEM3Lk/8HHbj64ZY1JIejzw42n6bk/ZXjgDm1WNpGcA3x3W317SgbbPGvJro8a8BngV8F36+qO30bNonEl6EPCrvnEa/a9t6RZrGDUnsk+jHAgeRylSWbt0+1gl/8ts79Zri2y62Z1ne8/KcZYbbTf4vA3Nnf9el7Ne/ZtvuaX/PA2paULpe9xqldQudP1Zdk3SV20/rqNYne8nmpBCcj2SNqbUJju++rrHZJ9H0rds7y7py8ALgJ9RvtRV28xUCsX1d008rP+5K9dQUalR9D7gWvoKaFEGm7zA9vmV4/XXNFnSF+8woGpNk+ZG4ZHAQZTRlL0Ecjbw4cGh9RXidf1ZvgK41faHB5a/mNJ3u/ooX0lPpvz/Ddb2+UzlOJ3tJ028oYXkKHWT2io42HkRwGm2pfqsdjBeyf/5lIkddqEUltoIeKPtD1SOM7R2So/r11C5GtjX9o8Glm8PnGP7IZXjdVbTRNKplC/vySyfQI4ANrN9aK1YTbyuP8srgYfbvn1g+d0og4Vqt8Mj6WOUroGLWNbsU73XW5f7SbPey5m+kNx/u/J0rZLOoxQBPNkrFgHcy3Znk9RLut72trXXOzY3fG1/qHn4JaC1O+S1k/tquAvLEmO/n1JK99bWZU2Th3vFYmpLgIvVzmTZXX+WHkz8zcI/Nc1PbXiYuxnN23Xtmw0HEz+A7YsltTGv9Ha2jx2I9TPgWEmtdB9fiVbO0Mcm+TdnUwezYi+HYyrH+Swr+c9w/do+JwGXSDqN5WuMHEo7g1u6rGnya0nPBD7djKbsDal/Jit2x6xh2Ge5LaWpopWBQsNuDjbt1m25WNJOtlsbiNTouvZN14XkfqwOiwCqDGoclldEC6N7YbyafT4P3ApcyrLp66h9o0Sl0NK0hnVFqxBzJ2B/lq8xsqCtL7g6qmkiaTvgWEotmv5JeC4Ejrb9w5rxmpgPYcViZK18lirzFbwEeCXLJmx/BKUQ2QltXEU2TVsPAH5IafMX5QqkjSamrmvfDCskt8AtFJJTqTl1dBNvsAjgsbZ/VTneDpQkP3hguR/lxvbimvFgvJL/lbYf2nHM9YFezflrat+gXEnczW3/osX1zwN+Y/uWJkFPUbrNLmox5r0p+2Nrf9dMaBLW0cBDKWd2iyg3DFsp7yDpfsOWt9HVcyb2k3El6XPAa21fMbB8CniT7f1qxxyn2j5fl9RZ5UJJT6L0GjmB0oPk+5Ke0EKcfSX9UNJXJe0maRHwTUlLmp4dteMdTblvcnFzE/3zwL7A6Zp+yPso8XaX9Ne2fwlsIekVTcKsTtI+fY/vJelDkq6Q9Im2mmJsn2v7ibbvbXvz5nFbdX16SX4TSjGw/YBNWkr8ne4nq9iWE1ta74MlPXnwnkL/flTRdoOJH6AZ67JdC/HA9qz+oQxmuYJSbOnPlCHSV/SWtxj3UuBBfc93BC5tIc7lwEOAR1OKaD2qWf4Q4NstxFtEmTzi3sBvgbnN8g2BKyvHehNl5OZC4O2U5p43Al+m1E+p/bd9u+/xh4C3UC6rX04ZzVk73vl9j1/Txn44JOZLgSspAx2Pab4HL57N+0mz3s2m+bk3sKSFeC9pcslZlHl0Dxi2H1WMt3htXhvlZxxu+D5thuLe1X0jN21/vxlYVtudbgbNSLrN9sVNvKubNtfa/mL7D5JuB/5AOeBg+/ctdFB5BrArcDfKuIxtbP9G0nHAN1l+NqPaprxsas//XFUX3rXUX5jumZQDXNuOpEyN+XsASccC3wDeUzlOl/sJwM1MP43jFkN/YzRHAY+w/bumSesMSdu5TK3Yxh94iaSjbH+wf6GkIyknmtWNQ/LfAtjcA5fSKjMa3UB7ddN7szSd0jz/W9r5T7pF0j8AG1N6x7ycMhH5XqxY5KqGb6vMIbohZaDQyc3N9D2pX8r2Dpebg7dJ+oHt30Cpdy+pje6CWzRNEgI2liQ3p1a00wQ6EzfURF+Hh+ZxG8mqy/0Eup/GcY6bmv22f9Q0857R3FNp4/N8GXCmpP48MkWZke2gFuKNRfI/jtL9atDVwImUnbEN/wS8kHJ5KEpTxQoTd1RwBPB6SiLpFX46j3JQO6qFeM+nnKUaOIPSm+NZlEvgEyrHul3SPWzfRukFAywdXdlG8v8g0KsUejKlBPHNzeCdFYq9VXB/lVLE6nu8lOt3C4YywPGbks5snh9IO91Yu9xPoPtpHH8maVc3RQCbK4CnUboLV7+36NKd9DGS9qB0DgD4X9sX1o7VM+t7+2glU9RJ+o4rj/yLeiTdzfafhizfHNjKdtcTulQ1E92Cm7gPpxQEE/Bl25e1EWecSdqGcmX6syGvPdb212Zgs6oah+S/2PYD1/S1EeJNNyEI0ErpXJqzgYMpg1ruoPQy+qDtH7QQ69vAZ4BT21j/kHidFVqTtBllANINlLPh11JupF8NvM2V5w2WtHGvKWvIa/OGNWGMEGulk4u4fr/0TveTJmanheSm6cr6PQ8pQT4bjUNXzy9KeqsG7jJJ+jdK75Ha7qS0o55CmXlnv4GfqiS9gzKK8WJKb6brKHMJnKEyOra2TSldBS+S9C1JL5dUdZKTHpVCa9cC/wo8Ffh/lOl1lzbdAAAYl0lEQVQUr21eq+1jlDbqRwAXUWqoH0u5YfmRFuL9X++BpAsGXqtdzvkXlKarhc3PpX0/bZTG7mw/gaWF5E6jXM18C7ikeXxq0+20drzpurJ+suuurK1powtRlz+UL/OplIT46eZnMWVH2ailmA+mJKlvUxLKU4G7tBTru32P7wJ8rXm8Ke10qevvDvl4yn2Mn1GS5fzKsa6m9G8eXL49ZbBQ7b/t8uZfAT8d9lrleJcNezzseYVY/wV8p/n/ejzNVX1bP13uJ02M71N62A0uXx+4toV4nXZlnYmfWX/mb/v3tg8H9qacvX0EeIrtw9zcrQeQtHPFmN+z/SaXOv6fpdQbeXmt9Q+4s++S/r7AnGYbeuUQWmP7K7ZfQLnMPpbSRFJT14XW1lMZtr8tsFFzKd8bXbx+C/E8zeNhz0cLVCYt2hX4FPAc4DJJ/65SsbRVHewnsKyQ3KC2Csn9xfYfKFVnl+vK2kKsGTEOvX0AsH0dpUlkOqcAVSZdkbQ1pRjYQZSaNC+nTLjchrdRvsjXUK44/qnZhrmUM73aVqim6dId8/PUL6DVdaG1t1NmewP4e+BDkgzsRLmSq62/a+kWfc0FYvkxAFW4nJpeJOkyymf4Zpr7Q7Vj0e1+At0XkpuuK+uTaacra+dm/Q3f1aVmpq8K6/kSpbvg6ZQubsvdSHPlG2tNzM0oZapbnzC+a+qw0FoTbw5lv79DZTKZXSlNQDe2EOtNK3vddrUDTlOC4ABKtde5lJuxn7TdylzBM0EdFpJr9o3+rqyPpHSz/gmlKN+svwKYpORfZbpFST9i2SV7/4fXq57Y2lwCA9vxIOCfbVft67+qm1m2/6NmvJVsR/XudE0XyGm58mxQq0vSa2yPNPpX0u8pZ/mnUu55LffFdv2ZvDrdTyTdA/izm+KJzf7/VOBHttu66u6Pf1dK//uf2r6p7XhdSPJfx0naBXgnpb3zLMow/fdRzkSOt/2flePdSek1ci7LSgIvVflsdQ6lx9TWwOdtX9kMpHktsEGNK7WBeHdSbuT1JuVerlSAK8/3vLpq7JuSPsL09xHs+jN5dbafNPG+DBxp+1pJD6T0+Pk4pcnuW7ZfUzneB4D32F7UDDr8BqWX32aUk65Ta8abCWOd/CXd1/YNzeOLbT+q4roPAi60fWvzfBPgSbarduGT9E3K3KXfAPYB/gX4BPAG23+sGauJtyulvXgfSjfBU4EL3MKO0iSsbSlf5EdSRi0/mlLLv3ZXSFRKYxxMmffhNODM/k4BM6VWk2SXutxPmnhLB3NKejNlms8XqpRVv9SVZy+TtMj2zs3jl1G+2wc2o8HPnW3/X8OMe/JvZeLjZt2Xe1lhsN6y6l/iwTgqdUy2a6Odc0jsx1DaOfcCXm17wSp+ZU3XfyWwi+07Jd2d0lf9gR4yqrJy3O0pf9cBlAPO29wM458Jlc78n237Y9M1x7TZXNf2ftLEuMLNAEpJXwOO650gtDGSv/+7rDKD2Kdsf2TwtdlsbHr7TKPNrpDDusm28XneXdJuLPtbfgfs0hvU1lY7ddObaDdKHZMlQBvtnLe7mb7R9h8lfb/txN/E+qGksyn9uJ9DKcc9Y8mfOvtpr+b8PVf6rso62k8ArpD0Tko34AcC5zfxN2kp3i1NE+RPgcdSqqX2bgRv0FLMTuXMf+3XfRKlD/AJlLbWFwOb2n5u5TgXreTl6u3Ukp5H6TFyd0ovh9PbusEl6TbKzUkoCfABzfNWph6UdH9KU8UBlO6CpwGfa6P5rIn3ItvvXY33vdb229rYhrZ0uZ808TagzFWwFXCS7e80yx8DPMD2KSv7/bWItyPwbsoo8Hf1nfX/DWUc0StrxpsJsz75S3oP0098fITtjVuKuyHwBsqlrihnIm+Z7V3Amht532VZ9cTBXiPVKlFqmikH+2JVLcfd/G1XAGcDv2HFv612D5XOOxk0Z+JHUWZ/Wnol2tIN3072kybe3ra/MM1rx9p+dc14k2Acmn1WVrekjZomwNKRftVriqwuSXsD/2J778qr3qPy+qbVS+5NG/zOlARydTNgrw3HsCxJbdRSjJl2NvAV4IssX9e/ts72k8YJkl5u+397C5p+/ydRzs6rkvTvwHW2PzCw/OXAfcbhYDPrz/xXRtI7bf9z5XW+y/bLJH2WIVccLZzx7Al8gGVdPd9GKSch4K0t9N/+F0oX0i5uKG9MmU5xitLmLuBhlN4jR3qaipizhaQ7gNuGvURp1qp+VTqsI0IbutxPmnjbUUYOv9b2Z5oOAmdQruCO6PX/rxjvKuChvXtSfcvXo0wP+9Dhvzl7jMOZ/8ocAlRN/iybueudldc7neOB+ZSunvtSqnu+wWU6uTbcD7hU0gtrD7Ia4t2UofKH9b5kzY3sNwDvpVQzraZJGIdSSnJ8FngV8ARKUcA32/5FzXiUonxd9wr5nKSn2j6n5Thd7ie4zKa1F3CepC0oN+q/abutCpseTPzNwjt7nS1mu3E/87/e9rYzvR2jGGw3Vpnu8AEtx3w4ZTDZ9yhjDJZ+CWr2LpJ0re0d1vS1EeKdTimLvSFNVVTKQeBxwK62q84H3WWXQEm/ZdmcthsCt1P+VmjvKqOT/aQvFpQbvh8FvkDfDF4txLsEeJbtaweW70CZw2CqZryZMOvP/DX9JBaiha6ekr7LSioy1u6hAmwi6enLb8Ky57WbfZp1flvS6yjlsR/A8uUsavYu6voMaifbD2266y2x3Ztp6/OS2iiS96kW1jmU7U67eDYxu9pPoFwB91wBbNm3rI14bwTOlfQWlp9T9zWUInOz3qxP/pT/mN4Zz6DbW4jXOzt8YfNv/wTuw9p3R/Ullp8kpv+5KQW8qmkuqY+nFJLbs9elriVfk/RGSpPL0gOqpDdQmrdqux3ApajbDQOvtdF2fdfm7xvGtt/cQkyak4PHUfaPr7Q0WrrL/QTbnd5gtn2upAMpTYMvbhYvAg72LJ9etGesm33aJOlrth+7qmWzjaTrgHdQpolsdedobvh+mFJq+3JKstoNuAx4vitXMJV0E8tmgzq0eUzz/BDbW1aON6wv+D0ok5/f23b1HkeS3kcZBNWrPXMo8APbL5z+t9YqTmf7SRPv6QOLTDN7me3fth2/bzvuDuxnu7OrurbM+uSvFSs1GviFWy5lK+ly4EW2v9o8fwzwvto9LYYM1+/t9F+1/cOasZp4c23fPGT5tpQbs8e1EPMBlAJdAha5pTlhJR2xstdtn9xG3Cb2PSmDlI6klAM/vo1BUZIWUXqpuHm+HuXGc7XJjJr1drqfSPqfIYs3A3ah9AxrY8rWXuw5wFMoJSz+hnI19Yy24nVlHJp9jh+ybDOVgk+Hu72aLUcCJ6lU/IMy2rfqQJrGsLbc7YDXSfpX26cNeX2t9X+hJW1OqWl+OKXyZtXSuWrq0dj+gaT79PcaWd3RsWuiP7lL2qgsandQXnNP6hWUZsGTgYe78kTxA66hTHLSGyC3LaWNvKou95Mm3vOGLW8GCp5OKQxYlaQnAM+izC39LUqZh+1tt9G827lZf+Y/HUlTwH/YfkLLcTamfI63thlnSNzNgC/WHkHanKEeRNnpd6R8kQ+1vU3NOE2spT2ZhvRqamV0rKR/oty069XC+R1wrO33tRDrOODpwImUCUBaryCqMtnQX1OSFc3jb9Dcj6o1DqXL/WQ1tqX6viJpCWX08vuBs2z/VtIPbbc+LWZXxuHMfyjbC5uzu6o0TfXEXtdfdzTZie1ftdTf+CZK4ng9pWnJKuWr26BpHg97Pnow6fXAYyjlea9rlt0f+C9Jm9l+S+WQr6TUun895Upt6abQUvdLSi+VLnS5n0xLZVKXP7Ww6k8DB1LumfxFpRDgWJ0pj23yl7Ql7fxnzUj1xEHNyN82mg9eSyl+9n7gE5I+2UKMns4mOG88B3iY+wq52b5O0iGU+ZCrJn/bwyq/tsr2l2DpFWl/bZ/a04t2uZ8wzYj6zSj9/p9dO57tl6rU8d+D0px1HLBxs6+c08VVXNtmfbOPhhd224xyhvdS25/tfqvqmWZcwWbADZRh7Ve3FPf+lJ3+MGAH4E2UyU9WmLh7hBi9qp79FT1pnt/f9obT/e5axrvG9oOmee17th9cM95MkDSfMnH7HyiDrlqdXrSL/aSJ88SBRQZ+CVxru40u3YPx70qZuOZwSlXPzduO2bZxSP6DPTh6O8UlbfSm6It7MuXgckvzfFNKD47a1RMHK18a+GXbNyoHtuGvKG27h9QcXTzkb1uO61f1vIAyccsFA8v3pJTM6LpYWXWSrgUe7fqlKlYndiv7SbPuR9luY+zHGpP0BNtfnuntGNU4JP95tn+y6ndWj7vC0P2uhvOrlJM+kDL8/P+1FGMTypkcwPe7vqHdBkk7U6pefpVlgwP/mtKL4wDbi2Zw86qQ9Hng6V31SOlqPxnoHPAN249uI05fvE7nl54J49DmfxZlkBCSPm374I7iridp0163vab3TWufZ9N19amUM6t9KDekPrDSX1r7OCdSDi4/pDQb3E/SmcA/1rzElnQkZS7W45rnP6XcSxGlXPX7a8UCcJmM+6GUz3DnJs6XgX9wSxO6zIDXAF9Xmft56Y1Q2y+pGaTL/aQXsu/x3Suve5gPs2x+6XdLanV+6ZkwDsm/f6dopV1zGsdTvmRnUM4gD6GUW65KpW5/b3DJRZRyErtP1++5gtcDdwW27Y2cbLr1nUCptvmGirH+kXIg67nJ9tbNKMrzKTcTq3KZLvIiSm+V3vwB45L4Af4buJAy0coKVSkr6nI/geZkizJ9au/x0u9+Cze0p5iB+aW7NA7NPtP2Fe8g9k6UglICLrB9VQsx7qRMzvHc3oheSde1eAPvSsrB5baB5RsBF7tiHXNJl9p+RN/zpdMZSrrE9l/XitWsszd/wCMo5STWY4zmDwCQ9HXbj+kgTmf7SbPeH7HsBvag6je0uxp3MpPG4cz/YZJ+Q9kpNmgeQ7t9qaGs/CrgqqYN/iBJx7XQBv8ISk+KL6rUUzkNmFM5Rr87h7UX2/6dpNpnCvfqf9KX+NcD7l05FnQ8f8AMuajp8fNZlm/2qX1m3OV+gu3tVud9knaudO/mwZJ6I6MFPKB53sr80jNh1p/5z5Rp2uA/02bXUkmPpTQBHUw5cz3T9omVY3wHeBLDz7Ausv2wirHeB/zK9usHlr8F2Nz2P9aK1ay30/kDZoKkYfWe2jgz7mw/WRO1ztC77ok2E5L819CQNvhPAu9Z3TOTStuwHrA35Qz2ec2yKmc8XV5eN1dMH6L0uOmVBH4YZe7l59ceSCNpse0HTvPaWCT/YSStX/sGbNfNMKurqx53ffFa73nUliT/NdR1G/wabFfX9ztqXV73Bgr1qk5e5YGqnhUPbCezbMrGwfkDdrT9nFFjrCua5qw9KFem+7lyueo12I5q+8lqxuv6e9DpwaamcWjz71rXbfCrq+tZsU6h6WI7Kpc6O9d1EOvFlC58i1VKcvfPH3BkhfXPOEmPpCT8gygjwV9ImZBkplTbT9ZRs/bsOcl/Ddm+jJIsXt3XBr++pHNpoQ1+TTat43hdHmyqxGp68zxTy88f8Oq2rjS6JOmtlO7GP6FM5HIMsNAtzlGwmro+KWm91MO46Lzw1Dix/TXbL6KMAnwXZRAIsHQ06Tjr8mBTNZbtH9j+rO0Fg4m/ccqQZeu6+cDPKWMjPmb7l6wbZ6VVtkHS/bRs7gwk7SHpvyS9oul8UYLZj6oRb002reN41ST5V2D7TtvnDQy8aj2BSLpv39Oc8dQzG7/Q9wHeCuxPadY6hdL1eVyu7k+nqagraVfgU5SrnIcBbczFcP5qvnXW3icalx1jXdRFArmYMmtTJ2c8ku5ruzfxeasHmy5jDbEunDGvEdt/Ac4Fzm1GpD6NMl/wTyVdYPtZM7Rptf7vNujbH54NnGT7+KbnWxuz9c1dnTfZvrKF2J1I8m9PFwmk6zPULg82nR7YxklTruIM4Iym5MLg5Ocja/rB3+KmkJukPSh1fn4MvLfXtbTi/13/vr4npYYRTfmFNr4H99KKk8YvZfszLcTsVJL/7JabvLWCzeyVRjWS7kYZBLgd7X6/T6f0KLq1rxnm7Sxrhnl+5XgXSjoduBHYlFK/CElbAW3UZroX5epp6DgGIMk/lmkjgWj4ZDVQdspNasRYA7P2Ju9qGJcrjbOBWyn1itqY3rCn62aYl1GmVNwKeJztPzfLd6B0aa3tx648N8e6Jsm/rjYSyMK1fG2tdHmwWccObLPxJu8w29jeZ9VvG1mnzTDNoLzToNzwlfRSStfWH1J62tX2IEmPtf21/oWSHg/cME0vsVklyb+uNnb6aftpS3pn7Xh0e7Dp9MC2CrPuJu80vi7pr2x/t+U4nTbDSNqRMrjycMpMfZ+kVChoa/a1bwK/HbL8D5SDzX4txe1Mkn9dXSeQQ4B/rrnCLg82XR/Y1rErjbY8DnhuU+DtT7RXhbLrZpjvUcqq7Gd7MYCkl7cQp2cL21cMLrS9UNJ2LcbtTJL/GlrHEkjXTRXVDzYdx1qXrjTasm8XQWagGeZgypn/RSpTVZ5Gu/v/ymYL26DFuJ1J8l9zXbfBT3cWJbpP/rO6t88MNKF1rldqWNIWtDjdYdfNMLbPBM7UsvmrXw5sKen9lLIqqzsoa3VdIuko2x/sX6gy9eillWPNiFT1rEjSO21XPVttLt9NdzMYrexg8x3b28zGWKuxLT+xPa+reG2RtD9litH7UqaqvB9lqsqq5Ub6qtse2dcM02l122b/eSZwqO09K697S+BMSq+9XrKfAtYHDvIYTOeY5F/ROCSQLg82XR/YVrEt19vetqt4bWkmWdkT+KLt3ZrBV4fbnl85zkGUM//HAL1mmA/Z3r5mnJnWfH69KSkX2b5wJrenpiT/itpIIJIGy+Ea+IXt62vGmQTr0pVGWyQttD3VHAR2a7pefsv27i3F6zXDHE456JxMO80wUVna/NfQDLTBHz9k2WZNJcPDbVcdUNPlwWYGDmyXMv2Vxqwd1TvgFpVJ1L8CfFzSTcAdbQWz/Xvg402sXjPM0UCS/zouZ/5raF1pqpA0BfyH7SdUXu9FQxZvRmnrrHqw6TLWpGjOxP9Aqdj7t5QyBR9vSjxHLJXkP4t1OWVdWwebLmNNShNaU3RtB9tflHQPYI7tYQOWYoKl2WcNrSsJpOmN0NmRuxncstEsj9VpE9pMkHQUZWKXzYAHUCYa+gDw5Jncrlj3JPmvua7b4IcNKtuM0svipTVjrWI7OjvYtBVruj7ozZXGu4HWr2o68EJgd0p5Amxf2/T5j1hOkv8amoEEMjhwzJRBNa+wfVPlWJ0ebNaVA1uXVzUd+JPt23u11ZqZvNK2GytI8q+kxQRyke2ftLDe6XR5sOn0wDadrpvQWvYlSa+lTOG4N/AC4LMzvE2xDsoN30qaBHKO7UdUXu/Sm7qSPm374JrrHxJvXlcHmy5jNfFWeqVhe9Ynyaae/pHAUyg90s6jDL7KFz2Wk+S/hrpOIJIus73b4OO2dHmwmYED2xEDi3pXGpd0eaURsS5Is8+a67qpwtM8bkv/+IW2xyx0GQu6b0LrjKQVyg/3a6Gkc8xySf5rrusE8jBJv6Ekyg2ax7CsTvvGleN1ebDp+sB2FtDZlUbH7qR8hp+gtPH/YWY3J9Z1afZZQ103VXRN0l+A39McbIDbei9R+WDTZawmXqdNaF2T9GBKjZ39gKsoB4LzbbdW3iFmr5z5r7mumyo6ZXvOOMbqhZzm8Viw/T3gTcCbJB0KfBQ4FjhuRjcs1klJ/mturBPImOu6Ca1TkramlFk+CPg1ZcKTM2d0o2KdlWafNdR1U0XE6pD0JeCewOnAGcCv+l+3/athvxeTK8k/YgxI+hHLrkT7v9S9k5Kxa6KM0ST5R0wQSTvbXjTT2xEzb72Z3oCI6NQpM70BsW5I8o+YLG3MNhezUJJ/xGRJO28ASf4RERMpyT9isozLRPUxovT2iRgDzby9t9i+tXm+B3Ag8GPgvbaT9GM5OfOPGA+nAxsCSNoV+BTwE+BhwPtmcLtiHZXyDhHjYQPbNzSPnw2cZPv4ZnKXWT8xfdSXM/+I8dDfhXNP4AIA23eS7p0xRM78I8bDhZJOB24ENgUuBJC0FfDHmdywWDcl+UeMh5cBhwJbAY+z/edm+Q6UaUYjlpPePhFjprnh+yzgEOCHwGdsv2dmtyrWNTnzjxgDknak1PI/nDKn9CcpJ3d7zOiGxTorZ/4RY0DSncBXgCNtL26WXZdSzjGd9PaJGA8HAz8DLpL0QUlPJr18YiVy5h8xRiRtSBnZezily+fJwJm2z5/RDYt1TpJ/xJiStBnwTOBQ23vO9PbEuiXJPyJiAqXNPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIibQ/weuDEpH0Hgi/gAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "Train.corr(method='pearson')['CLASS'].plot(kind='bar', color=('red'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###### Most of the attributes are negatively correlated to class" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### FULL_Charge and AS_MeanAmphiMoment have the highest positive correlation values with CLASS whereas FULL_AcidicMolPerc,FULL_AURR980107 and FULL_DAYM780201 have the most negative correlation values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Classification\n", + "Train.groupby('CLASS').size().plot(kind='bar')\n", + "Train.groupby('CLASS').size()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Both Classes have the same entries of 1519 and 1519" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data With Visualization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Univariate Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.figure(figsize=(50,50))\n", + "Train.hist()\n", + "plt.subplots_adjust(bottom=1,right=2,top=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### The above shows CLASS and NT_EFC195 are categorical variables. CLASS is uniformly distributed and NT_EFC195 is not equally distributed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Density Plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Train.plot(kind='density', subplots=True, layout=(4,3), figsize=(10,10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Values for AS_FUK010112, CT_RACS820104,FULL_GEOR030101 and FULL_AURR980107 lie close to zero compared to the rest of the variables." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Preparing data for machine learning" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data transformation using minMaxScaler to set all values between 0 and 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##Rescaling \n", + "from numpy import set_printoptions\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "###rescaling the data\n", + "array = Train.values\n", + "# separate array into input and output components\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "##Scaling the data so that it's within the range of 0 and 1\n", + "scaler = MinMaxScaler(feature_range=(0, 1))\n", + "rescaledX = scaler.fit_transform(X)\n", + "# summarize transformed data\n", + "set_printoptions(precision=3) #number of decimal points\n", + "print(rescaledX[0:11,:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Feature selection\n", + "> `I have used Recursive Feature Elimination (RFE)`\n", + "`RFE` works by recursively removing attributes and building a model on those that remain. It uses the model accuracy to indentify the attributes, and their combinations, that contribute the most to predicting the target attribute." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#feature selection using recursive feature elimination\n", + "\n", + "from sklearn.feature_selection import RFE\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# feature extraction\n", + "model = LogisticRegression()\n", + "rfe = RFE(model, 4)\n", + "\n", + "#I chose RFE because it eliminates worst performing features\n", + "fit = rfe.fit(rescaledX, Y)\n", + "print(\"Num Features: \", fit.n_features_)\n", + "print(\"Selected Features:\", fit.support_)\n", + "print(\"Feature Ranking: \", fit.ranking_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### I tried to test for the performance of the model with more features and the results hardly changed.\n", + "#I used 11 then 7,and finally 4 features. I selected the features ranndomly.\n", + "#As a trade off for faster performance , I decided to go with 4 features.\n", + "* #Accuracy at 11 =91.72482552342971\n", + "* #Accuracy at 7 =91.72482552342971\n", + "* #Accuracy at 4 =91.72482552342971" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### #extracting features of interest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rescaledX[:,fit.support_] \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Logistic Regression on the rescaled data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Splitting data into Train and Test Sets\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression \n", + "\n", + "test_size = 0.33 #Size of the test data\n", + "seed = 7\n", + "rescaledX_train, rescaledX_test, Y_train, Y_test = train_test_split(rescaledX, Y, test_size=test_size,\n", + "random_state=seed)\n", + "model = LogisticRegression() #Using Logistic Regression\n", + "model.fit(rescaledX_train, Y_train)\n", + "result = model.score(rescaledX_test, Y_test)\n", + "print(\"Accuracy: \", (result*100.0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Logistic Regression on unscaled data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "###Algorithm used on unscaled data\n", + "#Using Logistic Regression\n", + "#Splitting data into Train and Test Sets\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression \n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "test_size = 0.33 #Size of the test data\n", + "seed = 7\n", + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,\n", + "random_state=seed)\n", + "model = LogisticRegression() #Using Logistic Regression\n", + "model.fit(X_train, Y_train)\n", + "result = model.score(X_test, Y_test)\n", + "print(\"Accuracy: \", (result*100.0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Rescaled data gives accuracy of 91.72482552342971 and un scaled data with all the features gives 91.92422731804587. I have used unscaled data for the rest of the algorithms down" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##Use the test dataset to see how the aligorithm is performing\n", + "out = model.predict(Test.values)\n", + "\n", + "out1 = pd.DataFrame(out) #Converting to data frame\n", + "out1.columns=[\"CLASS\"] #Naming the column\n", + "out1.index.name=\"Index\" #Creating a column index\n", + "out1[\"CLASS\"]=out1[\"CLASS\"].map({0.0:False,1.0:True}) # Chaninging 0 to \"False\" 1 to \"True\"\n", + "\n", + "out1.to_csv(\"talz_csv3\") ## Writing a csv file\n", + "print(out1['CLASS'].unique())\n", + "print(out1['CLASS'].nunique())\n", + "\n", + "#printing the numbers of False and True\n", + "print(out1.groupby('CLASS').size()[0].sum()) #\n", + "print(out1.groupby('CLASS').size()[1].sum()) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### False has 383 instances and True has 375 instances" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Naive Bayes Algorithm\n", + "\n", + "### Assumes that all features are independent of each other and each feature contributes equally to the resulting class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "from sklearn.naive_bayes import GaussianNB\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = GaussianNB()\n", + "model.fit(X, Y)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##Use the test dataset to see how the aligorithm is performing\n", + "f = model.predict(Test.values)\n", + "\n", + "f1 = pd.DataFrame(f) #Converting to data frame\n", + "f1.columns=[\"CLASS\"] #Naming the column\n", + "f1.index.name=\"Index\" #Creating a column index\n", + "f1[\"CLASS\"]=f1[\"CLASS\"].map({0.0:False,1.0:True}) # Chaninging 0 to \"False\" 1 to \"True\"\n", + "\n", + "f1.to_csv(\"talz_csv8\") ## Writing a csv file\n", + "print(f1['CLASS'].unique())\n", + "print(f1['CLASS'].nunique())\n", + "\n", + "#printing the numbers of False and True\n", + "print(f1.groupby('CLASS').size()[0].sum()) #\n", + "print(f1.groupby('CLASS').size()[1].sum()) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Classiffication and Regression Trees" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Classiffication and Regression Trees\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = DecisionTreeClassifier()\n", + "model.fit(X, Y)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fd = model.predict(Test.values)\n", + "\n", + "fd1 = pd.DataFrame(fd) #Converting to data frame\n", + "fd1.columns=[\"CLASS\"] #Naming the column\n", + "fd1.index.name=\"Index\" #Creating a column index\n", + "fd1[\"CLASS\"]=fd1[\"CLASS\"].map({0.0:False,1.0:True}) # Chaninging 0 to \"False\" 1 to \"True\"\n", + "\n", + "fd1.to_csv(\"talz_csv9\") ## Writing a csv file\n", + "print(fd1['CLASS'].unique())\n", + "print(fd1['CLASS'].nunique())\n", + "\n", + "#printing the numbers of False and True\n", + "print(fd1.groupby('CLASS').size()[0].sum()) #\n", + "print(fd1.groupby('CLASS').size()[1].sum()) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Support Vector Machines (SVM)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.svm import SVC\n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = SVC()\n", + "model.fit(X, Y)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "svm = model.predict(Test.values)\n", + "\n", + "svm1 = pd.DataFrame(svm) #Converting to data frame\n", + "svm1.columns=[\"CLASS\"] #Naming the column\n", + "svm1.index.name=\"Index\" #Creating a column index\n", + "svm1[\"CLASS\"]=svm1[\"CLASS\"].map({0.0:False,1.0:True}) # Chaninging 0 to \"False\" 1 to \"True\"\n", + "\n", + "svm1.to_csv(\"talz_csv10\") ## Writing a csv file\n", + "print(svm1['CLASS'].unique())\n", + "print(svm1['CLASS'].nunique())\n", + "\n", + "#printing the numbers of False and True\n", + "print(svm1.groupby('CLASS').size()[0].sum()) #\n", + "print(svm1.groupby('CLASS').size()[1].sum()) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Linear Discriminant Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = LinearDiscriminantAnalysis()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## K-Nearest Neighbors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsRegressor\n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = KNeighborsRegressor()\n", + "scoring = 'neg_mean_squared_error'\n", + "results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ridge Regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import Ridge\n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = Ridge()\n", + "scoring = 'neg_mean_squared_error'\n", + "results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Random Forest Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "array = Train.values\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "num_trees = 1000\n", + "\n", + "max_features = 3\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Stochastic Gradient Descent - SGD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Stochastic Gradient Boosting Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "array = Train.values\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "seed = 7\n", + "num_trees = 100\n", + "\n", + "kfold = KFold(n_splits=10, random_state=seed)\n", + "model = GradientBoostingClassifier(n_estimators=num_trees, random_state=seed)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## XGB" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Stochastic X Gradient Boosting Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from xgboost import XGBClassifier\n", + "\n", + "array = Train.values\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "seed = 7\n", + "num_trees = 100\n", + "\n", + "kfold = KFold(n_splits=10, random_state=seed)\n", + "model = XGBClassifier(n_estimators=num_trees, random_state=seed)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## # Comparing the Algorithms used" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from matplotlib import pyplot\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "from sklearn.naive_bayes import GaussianNB\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from xgboost import XGBClassifier\n", + "# load dataset\n", + "\n", + "array = Train.values\n", + "\n", + "#split the dataset \n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "# prepare models and add them to a list\n", + "models = []\n", + "models.append(('LR', LogisticRegression()))\n", + "models.append(('CART', DecisionTreeClassifier()))\n", + "models.append(('NB', GaussianNB()))\n", + "models.append(('SVM', SVC()))\n", + "models.append(('LDA', LinearDiscriminantAnalysis()))\n", + "models.append(('DTC', DecisionTreeClassifier()))\n", + "models.append(('KNN', KNeighborsClassifier()))\n", + "models.append(('RFC', RandomForestClassifier()))\n", + "models.append(('GBC', GradientBoostingClassifier()))\n", + "models.append(('XGB', XGBClassifier()))\n", + "# evaluate each model in turn\n", + "results = []\n", + "names = []\n", + "scoring = 'accuracy'\n", + "\n", + "for name, model in models:\n", + " kfold = KFold(n_splits=10, random_state=7)\n", + " cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", + " results.append(cv_results)\n", + " names.append(name)\n", + " msg = (name, cv_results.mean(), cv_results.std())\n", + " print(msg)\n", + "\n", + "# boxplot algorithm comparison\n", + "fig = pyplot.figure()\n", + "fig.suptitle('Algorithm Comparison')\n", + "ax = fig.add_subplot(111)\n", + "pyplot.boxplot(results)\n", + "ax.set_xticklabels(names)\n", + "pyplot.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " ### Naive Bayes gives the best accuarcy. However, other metrics have to be put in place to measure performance of a model such as Matthews correlation coefficient, area under the ROC curve or logarithmic loss since a single parameter is not enough to measure model performance and thus help in choosing the best model" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Assignment Colab/Talent Paul.ipynb b/Assignment Colab/Talent Paul.ipynb index 648ba8b..caad52e 100644 --- a/Assignment Colab/Talent Paul.ipynb +++ b/Assignment Colab/Talent Paul.ipynb @@ -1866,7 +1866,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.6.5" }, "varInspector": { "cols": { diff --git a/Assignment Colab/Talent_Paul.ipynb b/Assignment Colab/Talent_Paul.ipynb new file mode 100644 index 0000000..9f51018 --- /dev/null +++ b/Assignment Colab/Talent_Paul.ipynb @@ -0,0 +1,950 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Talent Paul" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing packages to use" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/kaggle/input/ace-class-assignment/Test.csv\n", + "/kaggle/input/ace-class-assignment/AMP_TrainSet.csv\n" + ] + } + ], + "source": [ + "\n", + "\n", + "import numpy as np # linear algebra\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import matplotlib.pyplot as plt # Plotting\n", + "\n", + "import os\n", + "for dirname, _, filenames in os.walk('/kaggle/input'):\n", + " for filename in filenames:\n", + " print(os.path.join(dirname, filename))\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### #Dealing with errors that may arise" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reading the datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "### Loading the data\n", + "Train = pd.read_csv(\"../input/ace-class-assignment/AMP_TrainSet.csv\")\n", + "Test = pd.read_csv(\"../input/ace-class-assignment/Test.csv\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Looking at the first five rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Train.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## # checking the dimensions of your data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#This returns the number of rows and columns\n", + "\n", + "Train.shape\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data has 3038 rows and 12 columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Looking at missing values in the dataset\n", + "Train.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The above output shows there are no missing values in the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##Exploring the data and it's data types\n", + "Train.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data types of Train dataset are float and int" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Descriptive statistics\n", + "### The statistics summary show the counts for each attribtute, the mean, standard deviation, the minimum value for numerical atrributes, the 25th, 50th and 75th percentile for each numeric attribute andthe maximum value. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#\n", + "Train.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### AS_DAYM780201 and FULL_DAYM780201 have the highest mean and highest maximum. FULL_OOBM850104 has a negative mean" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Correlation\n", + "> Correlation helps us determine how the different attributes relate with each other. Itshows which attributes to select especially in the presence of highly correlated attributes in which case one of the two attributes would be sufficient." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Looking at the correlation of \"CLASS\" with other attributes\n", + "#Train.corr(method='pearson')\n", + "\n", + "Train.corr()['CLASS']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualising Correlation of different attributes with the CLASS attribute" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAFoCAYAAAC/l/tEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xm8JFV9/vHPwyiKIAIyIAIjqKCCQdAb3BcQDPiTTZTFaNAgk8R9iRH3BFeCJEZFDSoRUUFEgdGAoEDcUQZBZEBkxIURFFzABRWR5/fHqZ7p6ek7W5+qO7f7eb9e9zXd1X3rW7en+ltVp875HtkmIiImy3ozvQEREdG9JP+IiAmU5B8RMYGS/CMiJlCSf0TEBEryj4iYQEn+ERETKMk/ImICJflHREygu8z0Bkxn880393bbbTfTmxERMatceumlv7A9d1XvW2eT/3bbbcfChQtnejMiImYVST9enfel2SciYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJXkL+kkSTdJunKa1yXp3ZIWS7pC0sNrxI2IiLVT68z/I8A+K3l9X2CH5mc+8P5KcSMiYi1USf62vwz8aiVvOQD4qIuLgU0kbVUjdkRErLmu2vy3Bq7ve76kWbYcSfMlLZS08Oabb+5o0yIiZoi0dj8VdJX8h23tCjPH2z7R9pTtqblzVzk6OSIi1lJXyX8JsG3f822AGzqKHRERA7pK/guAv2t6/TwKuNX2jR3FjoiIAVUKu0k6FXgSsLmkJcCbgLsC2P4AcA7wVGAxcBvwvBpxIyJi7VRJ/rYPX8XrBl5YI1ZERIwuI3wjIiZQkn9ExARK8o+ImEBJ/hEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEBJ/hEREyjJPyJiAiX5R0RMoCT/iIgJVCX5S9pH0jWSFks6esjr8yRdJOkySVdIemqNuBERsXZGTv6S5gAnAPsCOwGHS9pp4G2vB063vRtwGPC+UeNGRMTaq3Hmvzuw2PZ1tm8HTgMOGHiPgY2bx/cik7dHRMyoGtM4bg1c3/d8CfDIgff8K3C+pBcDGwJ7VYgbERFrqcaZv4Ys88Dzw4GP2N6GMpH7KZJWiC1pvqSFkhbefPPNFTYtIiKGqZH8lwDb9j3fhhWbdY4ETgew/Q3g7sDmgyuyfaLtKdtTc+fOrbBpERExTI3kfwmwg6TtJa1PuaG7YOA9PwGeDCDpIZTkn1P7iIgZMnLyt30H8CLgPOBqSq+eRZKOkbR/87ZXAkdJ+g5wKvBc24NNQxER0ZEaN3yxfQ5wzsCyN/Y9vgp4bI1YERExuozwjYiYQEn+ERETKMk/ImICJflHREygJP+IiAmU5B8RMYGS/CMiJlCSf0TEBKoyyCsiZpCG1VZcDRlkP9Fy5h8RMYGS/CMiJlCSf0TEBEryj4iYQEn+ERETKMk/ImICJflHREygKslf0j6SrpG0WNLR07znEElXSVok6RM14kZExNoZeZCXpDnACcDelMncL5G0oJm9q/eeHYDXAI+1/WtJW4waNyIi1l6NM//dgcW2r7N9O3AacMDAe44CTrD9awDbN1WIGxERa6lG8t8auL7v+ZJmWb8dgR0lfU3SxZL2qRA3IiLWUo3aPsMKiwwWDbkLsAPwJGAb4CuSHmr7luVWJM0H5gPMmzevwqZFRMQwNc78lwDb9j3fBrhhyHvOtv1n2z8ErqEcDJZj+0TbU7an5s6dW2HTIiJimBrJ/xJgB0nbS1ofOAxYMPCes4A9ACRtTmkGuq5C7IiIWAsjJ3/bdwAvAs4DrgZOt71I0jGS9m/edh7wS0lXARcBr7L9y1FjR0TE2pHX0ZreU1NTXrhw4UxvRsS6L/X8Z68W/u8kXWp7alWryAjfiIgJlOQfETGBkvwjIibQ7JzDN22cEREjyZl/RMQESvKPiJhASf4RERMoyT8iYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJJ/RMQESvKPiJhASf4RERMoyT8iYgJVSf6S9pF0jaTFko5eyfueIcmSVjnRQEREtGfk5C9pDnACsC+wE3C4pJ2GvO+ewEuAb44aMyIiRlPjzH93YLHt62zfDpwGHDDkfW8G/h34Y4WYERExghrJf2vg+r7nS5plS0naDdjW9udWtiJJ8yUtlLTw5ptvrrBpERExTI3kP2xmlaWzpkhaD/hP4JWrWpHtE21P2Z6aO3duhU2LiIhhaiT/JcC2fc+3AW7oe35P4KHA/0n6EfAoYEFu+kZEzJwayf8SYAdJ20taHzgMWNB70fattje3vZ3t7YCLgf1tL6wQOyIi1sLIyd/2HcCLgPOAq4HTbS+SdIyk/Uddf0RE1FdlAnfb5wDnDCx74zTvfVKNmBERsfYywjciYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJJ/RMQESvKPiJhASf4RERMoyT8iYgIl+UdETKAk/4iICVSltk9ExFjQsOlJVoO96vesY5L8I2qboAQSs1eafSIiJlCSf0TEBKqS/CXtI+kaSYslHT3k9VdIukrSFZIukHS/GnEjImLtjJz8Jc0BTgD2BXYCDpe008DbLgOmbO8CnAH8+6hxIyJi7dU4898dWGz7Otu3A6cBB/S/wfZFtm9rnl5MmeQ9IiJmSI3kvzVwfd/zJc2y6RwJnFshbkRErKUaXT2H9Wsb2mdN0rOBKeCJ07w+H5gPMG/evAqbFhERw9Q4818CbNv3fBvghsE3SdoLeB2wv+0/DVuR7RNtT9memjt3boVNi4iIYWok/0uAHSRtL2l94DBgQf8bJO0G/Dcl8d9UIWZERIxg5ORv+w7gRcB5wNXA6bYXSTpG0v7N244DNgI+JelySQumWV1ERHSgSnkH2+cA5wwse2Pf471qxImIiDoywjciYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJJ/RMQESvKPiJhASf4RERMoyT8iYgIl+UdETKAk/4iICZTkHxExgZL8IyImUJJ/RMQESvKPiJhASf4REROoSvKXtI+kayQtlnT0kNfvJumTzevflLRdjbgRMQGkNf+JVRo5+UuaA5wA7AvsBBwuaaeBtx0J/Nr2A4H/BI4dNW5ERKy9Gmf+uwOLbV9n+3bgNOCAgfccAJzcPD4DeLKUw3NExEypkfy3Bq7ve76kWTb0Pc2E77cC964QOyIi1kKNCdyHncF7Ld6DpPnAfIB58+ZNH9Er/Gq71vYiZW23M/Fmd7yu989x/z50+feN+/9dnxpn/kuAbfuebwPcMN17JN0FuBfwq8EV2T7R9pTtqblz51bYtIiIGKZG8r8E2EHS9pLWBw4DFgy8ZwFwRPP4GcCF9gwe8iIiJtzIzT6275D0IuA8YA5wku1Fko4BFtpeAHwYOEXSYsoZ/2Gjxo2IiLVXo80f2+cA5wwse2Pf4z8Cz6wRKyIiRpcRvhEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEBJ/hEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEAjJX9Jm0n6gqRrm383HfKeXSV9Q9IiSVdIOnSUmBERMbpRz/yPBi6wvQNwQfN80G3A39neGdgHeJekTUaMGxERIxg1+R8AnNw8Phk4cPANtr9v+9rm8Q3ATcDcEeNGRMQIRk3+W9q+EaD5d4uVvVnS7sD6wA9GjBsRESNY5QTukr4I3GfIS69bk0CStgJOAY6wfec075kPzAeYN2/emqw+IiLWwCqTv+29pntN0s8lbWX7xia53zTN+zYG/hd4ve2LVxLrROBEgKmpKa9q2yIiYu2M2uyzADiieXwEcPbgGyStD5wJfNT2p0aMFxERFYya/N8B7C3pWmDv5jmSpiR9qHnPIcATgOdKurz52XXEuBERMYJVNvusjO1fAk8esnwh8Pzm8ceAj40SJyIi6soI34iICZTkHxExgUZq9olYK05HroiZljP/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEBJ/hEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIiZQkn9ExARK8o+ImEAjJX9Jm0n6gqRrm383Xcl7N5b0U0nvHSVmRESMbtQz/6OBC2zvAFzQPJ/Om4EvjRgvIiIqGDX5HwCc3Dw+GThw2JskPQLYEjh/xHgREVHBqMl/S9s3AjT/bjH4BknrAccDr1rVyiTNl7RQ0sKbb755xE2LiIjprHImL0lfBO4z5KXXrWaMFwDn2L5e0krfaPtE4ESAqampTPcUEdGSVSZ/23tN95qkn0vayvaNkrYCbhrytkcDj5f0AmAjYH1Jv7O9svsDERHRolHn8F0AHAG8o/n37ME32P7b3mNJzwWmkvgjImbWqG3+7wD2lnQtsHfzHElTkj406sZFREQ7ZK+bTetTU1NeuHDhTG9GsYp7FdNa28923ON1bdz/vog+ki61PbWq92WEb0TEBBq1zT9i3Zcz+IgV5Mw/ImICJflHREygJP+IiAmU5B8RMYGS/CMiJlCSf0TEBEryj4iYQEn+ERETKMk/ImICJflHREygJP+IiAmU2j6R2jcREyhn/hERE2ik5C9pM0lfkHRt8++m07xvnqTzJV0t6SpJ240SNyIiRjPqmf/RwAW2dwAuaJ4P81HgONsPAXZn+Fy/ERHRkVGT/wHAyc3jk4EDB98gaSfgLra/AGD7d7ZvGzHueLPX7iciYjWNmvy3tH0jQPPvFkPesyNwi6TPSLpM0nGS5owYNyIiRrDK3j6SvgjcZ8hLr1uDGI8HdgN+AnwSeC7w4SGx5gPzAebNm7eaq+9AzqojYsysMvnb3mu61yT9XNJWtm+UtBXD2/KXAJfZvq75nbOARzEk+ds+ETgRygTuq/cnRETEmhq12WcBcETz+Ajg7CHvuQTYVNLc5vmewFUjxo2IiBGMmvzfAewt6Vpg7+Y5kqYkfQjA9l+AfwYukPRdQMAHR4wbEREjGGmEr+1fAk8esnwh8Py+518AdhklVkRE1JMRvhEREyjJPyJiAiX5R0RMoCT/iIgJlOQfETGB5HV09Kqkm4Efr8Wvbg78ovLmJF7iJV7izZa/7X62567qTets8l9bkhbankq8xEu8xJutsbqIl2afiIgJlOQfETGBxjH5n5h4iZd4iTfLY7Ueb+za/CMiYtXG8cw/IiJWIck/ImICJflHREygsUn+Kp4t6Y3N83mSdp/p7RqVpE1meht6JO0909tQm6T9Z3obapJ0yuosa3kbxm4/6Zqku0raTdKwedGrGJvkD7wPeDRwePP8t8AJbQWT9Lb+xCxpU0lvaSHULyR9UdKR68CBYIWpN9vSO4hXXufTB34OBk7sPW8h3uYDz58t6d2S5ktS7XiNnQdizgEe0VKs6VTfTyT9laSLJV0v6URJm/a99q3a8abZhu+3uO4PSNq5eXwv4DvAR4HLJB2+0l9e25jj0ttH0rdtP1zSZbZ3a5Z9x/bDWoq3NM7gNlSO813gNZSD2j7AV4FTgbNt/6FmrCbeguleAva0vWHtmNNsx09sz6u8zjuAz1Pmmu4l32cAZwC2/feV4y3dHyS9Hng88AngacAS2y+vGOs1wGuBDYDbeouB24ETbb+mVqwmXqf7iaSvAm8BLqZMFPU8YH/bPxj2XawQ77dALzn29pV7UD5b2964crxFtnvJ/2XAk2wfKOk+wLm1/z4YcSavdcyfm7McAzRzBt/ZYrw5ku5m+09NvA2Au7UQ58+2Pwd8romxH3AYcIKk82w/q3K8xwPPBn43sFxA1WY0Sb+Z7iVKEqvt0ZSpRi8BPmDbkp5k+3ktxIJlSQPg6cDjbf9e0ieAb9cMZPvtwNslvb12op9GZ/tJYyPbn28ev1PSpcDnJT2HZUm6po8A9wJeZfvnAJJ+aHv7FmJBOUj37A18CsD2z9q6SByn5P9u4ExgC0lvpZzRvb7FeB+jzEv8P5Sd7++Bk1uIs/R/vjnTPx04vbk0PLCFeBcDt9n+0gobIl1TOdYtwF/3vlwDsa6vHAvblzTt0S8GLpT0atpJHD0bSNqN0rw6x/bvm+34s6S/tBHQ9mskbQ3cj77vt+0vVw7V5X7SrFb3sn0rgO2Lmma7TwOb1Q5m+8WSHgGcKuks4L20u6/cIulpwE+BxwJHAki6C+2cCI1Psw+ApAdT5hQWcIHtq1uOtw+wVxPvfNvntRDjn22/s/Z61wXNPZIFtldos5V0rO1Xtxh7a+A/gSnb928pxkUDi55l+0ZJ9wbOa6Nol6R3UK4MrwJ6BxjbntU3tiU9C7jO9sUDy+cBb7B9VEtx1wNeBDwTeIDt+7YUZ0fKCex9gHfZ/kiz/G+Ap9h+ZfWY45L8JQ07+v/W9p9biDWH8uXdq/a61xWStgS2ppzt3DDs7DzWTrP/3M32bat885qv+xpgl15zZNsmZT+RtBWwm+1zZnpbahmnZp9vA9sCv6aciW8C3CjpJuAo25fWCmT7L5Ju678MbUvTvPMaShNPr0b3TcDZwDts31I53m7A+yntnT9tFm8j6RbgBbartlU3f98+9CUQyoG16t/VF6uzz7Iv7hRl37wDuNb291h2U7a264C7Aq0m/673k1Vsy4m257ew3gcDB9C3bzbt/tVbFCQdBfyf7WubnmAnAQcDPwKOsH1Z7ZjjlPw/D5zZa3qR9BRKUjmd0g30kZXj/RH4rqQvAL/vLbT9kspxTgcupNz9/xlA0wPgCMpNodp9qv8H+Afb3+xfKOlRzWvVek9J+jvgTcD5LEsgewBvk/Rvtj9aK1aj089S0hOB4yn3Nh4BfA3YVNKfgefYrn5fg3JQuVzSBfQdAFrYLzvbT5r1TteuL+CpNWM18V5N6WF3GtBrltyGcg/gNNvvqBzypZSbzDRxdwG2B3ajNAc9vnK8sWr2WWHig94ySZfb3rVyvCOGLbdd9aavpGtsP2hNXxsh3rW2d5jmtcW2H1gx1jXAIwfPuJs+3N+0vWOtWL14HX+Wl1Haa2+WtD3wH7YPam46v8r2U2rGa2J2tV92tp806/wLZWa//q4vbp5vbXv9yvG+D+w82GwsaX1g0XR/+wjxluaopjfYN23/V/O8ehdyGK8z/181R+vTmueHAr9u2lerd/m0fXLT9XKe7TZ6N/T8WNK/ACf3dTnbEngu0MaZ47mS/pcywKS3/m2Bv6NcXdUkhveguJPlv+S1dP1ZzrF9c/P4J5QeONj+gqR3tRCvy/2yy/0ESnPWk23/ZPCFNnqGUfbB+7LiVLJb0U4X8jub+wq/pnRaeWvfa6309hmn5P8sShPCWc3zrzbL5gCH1A4maT/gncD6wPaSdgWOaaFXxaHA0cCXtGyo98+BBbTwd9l+iaR9WdbWKWAJcEILN7veCnxb0vksSyDzKM0vb64cCzr+LIGFkj4MXED5PP8PQNI9KPtldV3tlx3vJwDvAjalHEQH/XsL8V5G6cp9Lcvvmw+k9P6p7Y3AQsp+scD2IljadHhdC/HGo9mnObt/h+1XdRjzUmBPyk2a3oji79r+q662YRw0TTx/w/IJ5Dzbv57RDatA0l2Bo4CdKMP1T2o6C2wAbGF78KyyRszsl5U03Tx3Z/l98xLbrYzRaPr037N/3++dKNj+be14Y3Hm33yhuq5fcoftW7X86LtOj6SSnmf7fyqvcw5l+Pw2lGHlX+977fW2q9Yvanb001b5xpa18Vk27cXvG7L8D6zYnFBLJ/tl1/vJKrZlb9tfqL1e23dSBrMNxtvI9uDI5hrx7qA0+9D0+NmD0nqxH7Bl7XjjVNjtMkkLJD1HfcW7Wox3pcrAkzmSdpD0HuDrq/qlyv6thXX+N/BE4JfAeyT9R99rbX6ey1GpadSlNj7LaUk6t6VVd7VfrhP7SaOzgoONq9pasaRHSvovysnBAuArwINbiTUOzT4AKmUWBtmVi3X1xbsH8Dqg12PjPOAttv9YOc4V070E7Gi7aj0hSVfY3qV5fBfKmevmlO5nF7tigamVHJxFqb0zd5rX1zZe15/ldD00BHzO9lY14zUx+/dLUfbLN7exX3a1nzQxui4k94qVxHud7aolJVRK0hxCuadxKqVUzUK3V0tofJJ/l1SKxt0PWNzWwKC+WD+ntIkPtoEL+Hrt4eaSvmf7wQPL3thswxY1u7g1/d0/zvBmiWfYvmetWE28rj/LvwBfYnjPpUfZbqUXRxe63E+adf+a6QvJfdJ21WYRSX8EjqMMzBv0cttVy6tLuhm4hnJj+3O2/yjpOrdUegTGpM0fQNLdKcWQdgbu3lte+8xf0vOBtwE/oPSmmG97urOSGj5HqWh4+ZBt+b8W4i2UtI+XVVDE9jGSbqCM6KzpCuCdtq8cfEFSG6Uzuv4sr6YMhLp2SLw2uif2RhO/FtiO5Qu77VI5VJf7CXRfSO7bwFkeUhmgyQG13YdytXY48C6VulAbSLpLcy+gurE585f0KeB7lBskxwB/C1xt+6WV41wJ7NEM3Lk/8HHbj64ZY1JIejzw42n6bk/ZXjgDm1WNpGcA3x3W317SgbbPGvJro8a8BngV8F36+qO30bNonEl6EPCrvnEa/a9t6RZrGDUnsk+jHAgeRylSWbt0+1gl/8ts79Zri2y62Z1ne8/KcZYbbTf4vA3Nnf9el7Ne/ZtvuaX/PA2paULpe9xqldQudP1Zdk3SV20/rqNYne8nmpBCcj2SNqbUJju++rrHZJ9H0rds7y7py8ALgJ9RvtRV28xUCsX1d008rP+5K9dQUalR9D7gWvoKaFEGm7zA9vmV4/XXNFnSF+8woGpNk+ZG4ZHAQZTRlL0Ecjbw4cGh9RXidf1ZvgK41faHB5a/mNJ3u/ooX0lPpvz/Ddb2+UzlOJ3tJ028oYXkKHWT2io42HkRwGm2pfqsdjBeyf/5lIkddqEUltoIeKPtD1SOM7R2So/r11C5GtjX9o8Glm8PnGP7IZXjdVbTRNKplC/vySyfQI4ANrN9aK1YTbyuP8srgYfbvn1g+d0og4Vqt8Mj6WOUroGLWNbsU73XW5f7SbPey5m+kNx/u/J0rZLOoxQBPNkrFgHcy3Znk9RLut72trXXOzY3fG1/qHn4JaC1O+S1k/tquAvLEmO/n1JK99bWZU2Th3vFYmpLgIvVzmTZXX+WHkz8zcI/Nc1PbXiYuxnN23Xtmw0HEz+A7YsltTGv9Ha2jx2I9TPgWEmtdB9fiVbO0Mcm+TdnUwezYi+HYyrH+Swr+c9w/do+JwGXSDqN5WuMHEo7g1u6rGnya0nPBD7djKbsDal/Jit2x6xh2Ge5LaWpopWBQsNuDjbt1m25WNJOtlsbiNTouvZN14XkfqwOiwCqDGoclldEC6N7YbyafT4P3ApcyrLp66h9o0Sl0NK0hnVFqxBzJ2B/lq8xsqCtL7g6qmkiaTvgWEotmv5JeC4Ejrb9w5rxmpgPYcViZK18lirzFbwEeCXLJmx/BKUQ2QltXEU2TVsPAH5IafMX5QqkjSamrmvfDCskt8AtFJJTqTl1dBNvsAjgsbZ/VTneDpQkP3hguR/lxvbimvFgvJL/lbYf2nHM9YFezflrat+gXEnczW3/osX1zwN+Y/uWJkFPUbrNLmox5r0p+2Nrf9dMaBLW0cBDKWd2iyg3DFsp7yDpfsOWt9HVcyb2k3El6XPAa21fMbB8CniT7f1qxxyn2j5fl9RZ5UJJT6L0GjmB0oPk+5Ke0EKcfSX9UNJXJe0maRHwTUlLmp4dteMdTblvcnFzE/3zwL7A6Zp+yPso8XaX9Ne2fwlsIekVTcKsTtI+fY/vJelDkq6Q9Im2mmJsn2v7ibbvbXvz5nFbdX16SX4TSjGw/YBNWkr8ne4nq9iWE1ta74MlPXnwnkL/flTRdoOJH6AZ67JdC/HA9qz+oQxmuYJSbOnPlCHSV/SWtxj3UuBBfc93BC5tIc7lwEOAR1OKaD2qWf4Q4NstxFtEmTzi3sBvgbnN8g2BKyvHehNl5OZC4O2U5p43Al+m1E+p/bd9u+/xh4C3UC6rX04ZzVk73vl9j1/Txn44JOZLgSspAx2Pab4HL57N+0mz3s2m+bk3sKSFeC9pcslZlHl0Dxi2H1WMt3htXhvlZxxu+D5thuLe1X0jN21/vxlYVtudbgbNSLrN9sVNvKubNtfa/mL7D5JuB/5AOeBg+/ctdFB5BrArcDfKuIxtbP9G0nHAN1l+NqPaprxsas//XFUX3rXUX5jumZQDXNuOpEyN+XsASccC3wDeUzlOl/sJwM1MP43jFkN/YzRHAY+w/bumSesMSdu5TK3Yxh94iaSjbH+wf6GkIyknmtWNQ/LfAtjcA5fSKjMa3UB7ddN7szSd0jz/W9r5T7pF0j8AG1N6x7ycMhH5XqxY5KqGb6vMIbohZaDQyc3N9D2pX8r2Dpebg7dJ+oHt30Cpdy+pje6CWzRNEgI2liQ3p1a00wQ6EzfURF+Hh+ZxG8mqy/0Eup/GcY6bmv22f9Q0857R3FNp4/N8GXCmpP48MkWZke2gFuKNRfI/jtL9atDVwImUnbEN/wS8kHJ5KEpTxQoTd1RwBPB6SiLpFX46j3JQO6qFeM+nnKUaOIPSm+NZlEvgEyrHul3SPWzfRukFAywdXdlG8v8g0KsUejKlBPHNzeCdFYq9VXB/lVLE6nu8lOt3C4YywPGbks5snh9IO91Yu9xPoPtpHH8maVc3RQCbK4CnUboLV7+36NKd9DGS9qB0DgD4X9sX1o7VM+t7+2glU9RJ+o4rj/yLeiTdzfafhizfHNjKdtcTulQ1E92Cm7gPpxQEE/Bl25e1EWecSdqGcmX6syGvPdb212Zgs6oah+S/2PYD1/S1EeJNNyEI0ErpXJqzgYMpg1ruoPQy+qDtH7QQ69vAZ4BT21j/kHidFVqTtBllANINlLPh11JupF8NvM2V5w2WtHGvKWvIa/OGNWGMEGulk4u4fr/0TveTJmanheSm6cr6PQ8pQT4bjUNXzy9KeqsG7jJJ+jdK75Ha7qS0o55CmXlnv4GfqiS9gzKK8WJKb6brKHMJnKEyOra2TSldBS+S9C1JL5dUdZKTHpVCa9cC/wo8Ffh/lOl1lzbdAAAYl0lEQVQUr21eq+1jlDbqRwAXUWqoH0u5YfmRFuL9X++BpAsGXqtdzvkXlKarhc3PpX0/bZTG7mw/gaWF5E6jXM18C7ikeXxq0+20drzpurJ+suuurK1powtRlz+UL/OplIT46eZnMWVH2ailmA+mJKlvUxLKU4G7tBTru32P7wJ8rXm8Ke10qevvDvl4yn2Mn1GS5fzKsa6m9G8eXL49ZbBQ7b/t8uZfAT8d9lrleJcNezzseYVY/wV8p/n/ejzNVX1bP13uJ02M71N62A0uXx+4toV4nXZlnYmfWX/mb/v3tg8H9qacvX0EeIrtw9zcrQeQtHPFmN+z/SaXOv6fpdQbeXmt9Q+4s++S/r7AnGYbeuUQWmP7K7ZfQLnMPpbSRFJT14XW1lMZtr8tsFFzKd8bXbx+C/E8zeNhz0cLVCYt2hX4FPAc4DJJ/65SsbRVHewnsKyQ3KC2Csn9xfYfKFVnl+vK2kKsGTEOvX0AsH0dpUlkOqcAVSZdkbQ1pRjYQZSaNC+nTLjchrdRvsjXUK44/qnZhrmUM73aVqim6dId8/PUL6DVdaG1t1NmewP4e+BDkgzsRLmSq62/a+kWfc0FYvkxAFW4nJpeJOkyymf4Zpr7Q7Vj0e1+At0XkpuuK+uTaacra+dm/Q3f1aVmpq8K6/kSpbvg6ZQubsvdSHPlG2tNzM0oZapbnzC+a+qw0FoTbw5lv79DZTKZXSlNQDe2EOtNK3vddrUDTlOC4ABKtde5lJuxn7TdylzBM0EdFpJr9o3+rqyPpHSz/gmlKN+svwKYpORfZbpFST9i2SV7/4fXq57Y2lwCA9vxIOCfbVft67+qm1m2/6NmvJVsR/XudE0XyGm58mxQq0vSa2yPNPpX0u8pZ/mnUu55LffFdv2ZvDrdTyTdA/izm+KJzf7/VOBHttu66u6Pf1dK//uf2r6p7XhdSPJfx0naBXgnpb3zLMow/fdRzkSOt/2flePdSek1ci7LSgIvVflsdQ6lx9TWwOdtX9kMpHktsEGNK7WBeHdSbuT1JuVerlSAK8/3vLpq7JuSPsL09xHs+jN5dbafNPG+DBxp+1pJD6T0+Pk4pcnuW7ZfUzneB4D32F7UDDr8BqWX32aUk65Ta8abCWOd/CXd1/YNzeOLbT+q4roPAi60fWvzfBPgSbarduGT9E3K3KXfAPYB/gX4BPAG23+sGauJtyulvXgfSjfBU4EL3MKO0iSsbSlf5EdSRi0/mlLLv3ZXSFRKYxxMmffhNODM/k4BM6VWk2SXutxPmnhLB3NKejNlms8XqpRVv9SVZy+TtMj2zs3jl1G+2wc2o8HPnW3/X8OMe/JvZeLjZt2Xe1lhsN6y6l/iwTgqdUy2a6Odc0jsx1DaOfcCXm17wSp+ZU3XfyWwi+07Jd2d0lf9gR4yqrJy3O0pf9cBlAPO29wM458Jlc78n237Y9M1x7TZXNf2ftLEuMLNAEpJXwOO650gtDGSv/+7rDKD2Kdsf2TwtdlsbHr7TKPNrpDDusm28XneXdJuLPtbfgfs0hvU1lY7ddObaDdKHZMlQBvtnLe7mb7R9h8lfb/txN/E+qGksyn9uJ9DKcc9Y8mfOvtpr+b8PVf6rso62k8ArpD0Tko34AcC5zfxN2kp3i1NE+RPgcdSqqX2bgRv0FLMTuXMf+3XfRKlD/AJlLbWFwOb2n5u5TgXreTl6u3Ukp5H6TFyd0ovh9PbusEl6TbKzUkoCfABzfNWph6UdH9KU8UBlO6CpwGfa6P5rIn3ItvvXY33vdb229rYhrZ0uZ808TagzFWwFXCS7e80yx8DPMD2KSv7/bWItyPwbsoo8Hf1nfX/DWUc0StrxpsJsz75S3oP0098fITtjVuKuyHwBsqlrihnIm+Z7V3Amht532VZ9cTBXiPVKlFqmikH+2JVLcfd/G1XAGcDv2HFv612D5XOOxk0Z+JHUWZ/Wnol2tIN3072kybe3ra/MM1rx9p+dc14k2Acmn1WVrekjZomwNKRftVriqwuSXsD/2J778qr3qPy+qbVS+5NG/zOlARydTNgrw3HsCxJbdRSjJl2NvAV4IssX9e/ts72k8YJkl5u+397C5p+/ydRzs6rkvTvwHW2PzCw/OXAfcbhYDPrz/xXRtI7bf9z5XW+y/bLJH2WIVccLZzx7Al8gGVdPd9GKSch4K0t9N/+F0oX0i5uKG9MmU5xitLmLuBhlN4jR3qaipizhaQ7gNuGvURp1qp+VTqsI0IbutxPmnjbUUYOv9b2Z5oOAmdQruCO6PX/rxjvKuChvXtSfcvXo0wP+9Dhvzl7jMOZ/8ocAlRN/iybueudldc7neOB+ZSunvtSqnu+wWU6uTbcD7hU0gtrD7Ia4t2UofKH9b5kzY3sNwDvpVQzraZJGIdSSnJ8FngV8ARKUcA32/5FzXiUonxd9wr5nKSn2j6n5Thd7ie4zKa1F3CepC0oN+q/abutCpseTPzNwjt7nS1mu3E/87/e9rYzvR2jGGw3Vpnu8AEtx3w4ZTDZ9yhjDJZ+CWr2LpJ0re0d1vS1EeKdTimLvSFNVVTKQeBxwK62q84H3WWXQEm/ZdmcthsCt1P+VmjvKqOT/aQvFpQbvh8FvkDfDF4txLsEeJbtaweW70CZw2CqZryZMOvP/DX9JBaiha6ekr7LSioy1u6hAmwi6enLb8Ky57WbfZp1flvS6yjlsR/A8uUsavYu6voMaifbD2266y2x3Ztp6/OS2iiS96kW1jmU7U67eDYxu9pPoFwB91wBbNm3rI14bwTOlfQWlp9T9zWUInOz3qxP/pT/mN4Zz6DbW4jXOzt8YfNv/wTuw9p3R/Ullp8kpv+5KQW8qmkuqY+nFJLbs9elriVfk/RGSpPL0gOqpDdQmrdqux3ApajbDQOvtdF2fdfm7xvGtt/cQkyak4PHUfaPr7Q0WrrL/QTbnd5gtn2upAMpTYMvbhYvAg72LJ9etGesm33aJOlrth+7qmWzjaTrgHdQpolsdedobvh+mFJq+3JKstoNuAx4vitXMJV0E8tmgzq0eUzz/BDbW1aON6wv+D0ok5/f23b1HkeS3kcZBNWrPXMo8APbL5z+t9YqTmf7SRPv6QOLTDN7me3fth2/bzvuDuxnu7OrurbM+uSvFSs1GviFWy5lK+ly4EW2v9o8fwzwvto9LYYM1+/t9F+1/cOasZp4c23fPGT5tpQbs8e1EPMBlAJdAha5pTlhJR2xstdtn9xG3Cb2PSmDlI6klAM/vo1BUZIWUXqpuHm+HuXGc7XJjJr1drqfSPqfIYs3A3ah9AxrY8rWXuw5wFMoJSz+hnI19Yy24nVlHJp9jh+ybDOVgk+Hu72aLUcCJ6lU/IMy2rfqQJrGsLbc7YDXSfpX26cNeX2t9X+hJW1OqWl+OKXyZtXSuWrq0dj+gaT79PcaWd3RsWuiP7lL2qgsandQXnNP6hWUZsGTgYe78kTxA66hTHLSGyC3LaWNvKou95Mm3vOGLW8GCp5OKQxYlaQnAM+izC39LUqZh+1tt9G827lZf+Y/HUlTwH/YfkLLcTamfI63thlnSNzNgC/WHkHanKEeRNnpd6R8kQ+1vU3NOE2spT2ZhvRqamV0rKR/oty069XC+R1wrO33tRDrOODpwImUCUBaryCqMtnQX1OSFc3jb9Dcj6o1DqXL/WQ1tqX6viJpCWX08vuBs2z/VtIPbbc+LWZXxuHMfyjbC5uzu6o0TfXEXtdfdzTZie1ftdTf+CZK4ng9pWnJKuWr26BpHg97Pnow6fXAYyjlea9rlt0f+C9Jm9l+S+WQr6TUun895Upt6abQUvdLSi+VLnS5n0xLZVKXP7Ww6k8DB1LumfxFpRDgWJ0pj23yl7Ql7fxnzUj1xEHNyN82mg9eSyl+9n7gE5I+2UKMns4mOG88B3iY+wq52b5O0iGU+ZCrJn/bwyq/tsr2l2DpFWl/bZ/a04t2uZ8wzYj6zSj9/p9dO57tl6rU8d+D0px1HLBxs6+c08VVXNtmfbOPhhd224xyhvdS25/tfqvqmWZcwWbADZRh7Ve3FPf+lJ3+MGAH4E2UyU9WmLh7hBi9qp79FT1pnt/f9obT/e5axrvG9oOmee17th9cM95MkDSfMnH7HyiDrlqdXrSL/aSJ88SBRQZ+CVxru40u3YPx70qZuOZwSlXPzduO2bZxSP6DPTh6O8UlbfSm6It7MuXgckvzfFNKD47a1RMHK18a+GXbNyoHtuGvKG27h9QcXTzkb1uO61f1vIAyccsFA8v3pJTM6LpYWXWSrgUe7fqlKlYndiv7SbPuR9luY+zHGpP0BNtfnuntGNU4JP95tn+y6ndWj7vC0P2uhvOrlJM+kDL8/P+1FGMTypkcwPe7vqHdBkk7U6pefpVlgwP/mtKL4wDbi2Zw86qQ9Hng6V31SOlqPxnoHPAN249uI05fvE7nl54J49DmfxZlkBCSPm374I7iridp0163vab3TWufZ9N19amUM6t9KDekPrDSX1r7OCdSDi4/pDQb3E/SmcA/1rzElnQkZS7W45rnP6XcSxGlXPX7a8UCcJmM+6GUz3DnJs6XgX9wSxO6zIDXAF9Xmft56Y1Q2y+pGaTL/aQXsu/x3Suve5gPs2x+6XdLanV+6ZkwDsm/f6dopV1zGsdTvmRnUM4gD6GUW65KpW5/b3DJRZRyErtP1++5gtcDdwW27Y2cbLr1nUCptvmGirH+kXIg67nJ9tbNKMrzKTcTq3KZLvIiSm+V3vwB45L4Af4buJAy0coKVSkr6nI/geZkizJ9au/x0u9+Cze0p5iB+aW7NA7NPtP2Fe8g9k6UglICLrB9VQsx7qRMzvHc3oheSde1eAPvSsrB5baB5RsBF7tiHXNJl9p+RN/zpdMZSrrE9l/XitWsszd/wCMo5STWY4zmDwCQ9HXbj+kgTmf7SbPeH7HsBvag6je0uxp3MpPG4cz/YZJ+Q9kpNmgeQ7t9qaGs/CrgqqYN/iBJx7XQBv8ISk+KL6rUUzkNmFM5Rr87h7UX2/6dpNpnCvfqf9KX+NcD7l05FnQ8f8AMuajp8fNZlm/2qX1m3OV+gu3tVud9knaudO/mwZJ6I6MFPKB53sr80jNh1p/5z5Rp2uA/02bXUkmPpTQBHUw5cz3T9omVY3wHeBLDz7Ausv2wirHeB/zK9usHlr8F2Nz2P9aK1ay30/kDZoKkYfWe2jgz7mw/WRO1ztC77ok2E5L819CQNvhPAu9Z3TOTStuwHrA35Qz2ec2yKmc8XV5eN1dMH6L0uOmVBH4YZe7l59ceSCNpse0HTvPaWCT/YSStX/sGbNfNMKurqx53ffFa73nUliT/NdR1G/wabFfX9ztqXV73Bgr1qk5e5YGqnhUPbCezbMrGwfkDdrT9nFFjrCua5qw9KFem+7lyueo12I5q+8lqxuv6e9DpwaamcWjz71rXbfCrq+tZsU6h6WI7Kpc6O9d1EOvFlC58i1VKcvfPH3BkhfXPOEmPpCT8gygjwV9ImZBkplTbT9ZRs/bsOcl/Ddm+jJIsXt3XBr++pHNpoQ1+TTat43hdHmyqxGp68zxTy88f8Oq2rjS6JOmtlO7GP6FM5HIMsNAtzlGwmro+KWm91MO46Lzw1Dix/TXbL6KMAnwXZRAIsHQ06Tjr8mBTNZbtH9j+rO0Fg4m/ccqQZeu6+cDPKWMjPmb7l6wbZ6VVtkHS/bRs7gwk7SHpvyS9oul8UYLZj6oRb002reN41ST5V2D7TtvnDQy8aj2BSLpv39Oc8dQzG7/Q9wHeCuxPadY6hdL1eVyu7k+nqagraVfgU5SrnIcBbczFcP5qvnXW3icalx1jXdRFArmYMmtTJ2c8ku5ruzfxeasHmy5jDbEunDGvEdt/Ac4Fzm1GpD6NMl/wTyVdYPtZM7Rptf7vNujbH54NnGT7+KbnWxuz9c1dnTfZvrKF2J1I8m9PFwmk6zPULg82nR7YxklTruIM4Iym5MLg5Ocja/rB3+KmkJukPSh1fn4MvLfXtbTi/13/vr4npYYRTfmFNr4H99KKk8YvZfszLcTsVJL/7JabvLWCzeyVRjWS7kYZBLgd7X6/T6f0KLq1rxnm7Sxrhnl+5XgXSjoduBHYlFK/CElbAW3UZroX5epp6DgGIMk/lmkjgWj4ZDVQdspNasRYA7P2Ju9qGJcrjbOBWyn1itqY3rCn62aYl1GmVNwKeJztPzfLd6B0aa3tx648N8e6Jsm/rjYSyMK1fG2tdHmwWccObLPxJu8w29jeZ9VvG1mnzTDNoLzToNzwlfRSStfWH1J62tX2IEmPtf21/oWSHg/cME0vsVklyb+uNnb6aftpS3pn7Xh0e7Dp9MC2CrPuJu80vi7pr2x/t+U4nTbDSNqRMrjycMpMfZ+kVChoa/a1bwK/HbL8D5SDzX4txe1Mkn9dXSeQQ4B/rrnCLg82XR/Y1rErjbY8DnhuU+DtT7RXhbLrZpjvUcqq7Gd7MYCkl7cQp2cL21cMLrS9UNJ2LcbtTJL/GlrHEkjXTRXVDzYdx1qXrjTasm8XQWagGeZgypn/RSpTVZ5Gu/v/ymYL26DFuJ1J8l9zXbfBT3cWJbpP/rO6t88MNKF1rldqWNIWtDjdYdfNMLbPBM7UsvmrXw5sKen9lLIqqzsoa3VdIuko2x/sX6gy9eillWPNiFT1rEjSO21XPVttLt9NdzMYrexg8x3b28zGWKuxLT+xPa+reG2RtD9litH7UqaqvB9lqsqq5Ub6qtse2dcM02l122b/eSZwqO09K697S+BMSq+9XrKfAtYHDvIYTOeY5F/ROCSQLg82XR/YVrEt19vetqt4bWkmWdkT+KLt3ZrBV4fbnl85zkGUM//HAL1mmA/Z3r5mnJnWfH69KSkX2b5wJrenpiT/itpIIJIGy+Ea+IXt62vGmQTr0pVGWyQttD3VHAR2a7pefsv27i3F6zXDHE456JxMO80wUVna/NfQDLTBHz9k2WZNJcPDbVcdUNPlwWYGDmyXMv2Vxqwd1TvgFpVJ1L8CfFzSTcAdbQWz/Xvg402sXjPM0UCS/zouZ/5raF1pqpA0BfyH7SdUXu9FQxZvRmnrrHqw6TLWpGjOxP9Aqdj7t5QyBR9vSjxHLJXkP4t1OWVdWwebLmNNShNaU3RtB9tflHQPYI7tYQOWYoKl2WcNrSsJpOmN0NmRuxncstEsj9VpE9pMkHQUZWKXzYAHUCYa+gDw5Jncrlj3JPmvua7b4IcNKtuM0svipTVjrWI7OjvYtBVruj7ozZXGu4HWr2o68EJgd0p5Amxf2/T5j1hOkv8amoEEMjhwzJRBNa+wfVPlWJ0ebNaVA1uXVzUd+JPt23u11ZqZvNK2GytI8q+kxQRyke2ftLDe6XR5sOn0wDadrpvQWvYlSa+lTOG4N/AC4LMzvE2xDsoN30qaBHKO7UdUXu/Sm7qSPm374JrrHxJvXlcHmy5jNfFWeqVhe9Ynyaae/pHAUyg90s6jDL7KFz2Wk+S/hrpOIJIus73b4OO2dHmwmYED2xEDi3pXGpd0eaURsS5Is8+a67qpwtM8bkv/+IW2xyx0GQu6b0LrjKQVyg/3a6Gkc8xySf5rrusE8jBJv6Ekyg2ax7CsTvvGleN1ebDp+sB2FtDZlUbH7qR8hp+gtPH/YWY3J9Z1afZZQ103VXRN0l+A39McbIDbei9R+WDTZawmXqdNaF2T9GBKjZ39gKsoB4LzbbdW3iFmr5z5r7mumyo6ZXvOOMbqhZzm8Viw/T3gTcCbJB0KfBQ4FjhuRjcs1klJ/mturBPImOu6Ca1TkramlFk+CPg1ZcKTM2d0o2KdlWafNdR1U0XE6pD0JeCewOnAGcCv+l+3/athvxeTK8k/YgxI+hHLrkT7v9S9k5Kxa6KM0ST5R0wQSTvbXjTT2xEzb72Z3oCI6NQpM70BsW5I8o+YLG3MNhezUJJ/xGRJO28ASf4RERMpyT9isozLRPUxovT2iRgDzby9t9i+tXm+B3Ag8GPgvbaT9GM5OfOPGA+nAxsCSNoV+BTwE+BhwPtmcLtiHZXyDhHjYQPbNzSPnw2cZPv4ZnKXWT8xfdSXM/+I8dDfhXNP4AIA23eS7p0xRM78I8bDhZJOB24ENgUuBJC0FfDHmdywWDcl+UeMh5cBhwJbAY+z/edm+Q6UaUYjlpPePhFjprnh+yzgEOCHwGdsv2dmtyrWNTnzjxgDknak1PI/nDKn9CcpJ3d7zOiGxTorZ/4RY0DSncBXgCNtL26WXZdSzjGd9PaJGA8HAz8DLpL0QUlPJr18YiVy5h8xRiRtSBnZezily+fJwJm2z5/RDYt1TpJ/xJiStBnwTOBQ23vO9PbEuiXJPyJiAqXNPyJiAiX5R0RMoCT/iIgJlOQfETGBkvwjIibQ/weuDEpH0Hgi/gAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "Train.corr(method='pearson')['CLASS'].plot(kind='bar', color=('red'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###### Most of the attributes are negatively correlated to class" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### FULL_Charge and AS_MeanAmphiMoment have the highest positive correlation values with CLASS whereas FULL_AcidicMolPerc,FULL_AURR980107 and FULL_DAYM780201 have the most negative correlation values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Classification\n", + "Train.groupby('CLASS').size().plot(kind='bar')\n", + "Train.groupby('CLASS').size()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Both Classes have the same entries of 1519 and 1519" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data With Visualization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Univariate Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plt.figure(figsize=(50,50))\n", + "Train.hist()\n", + "plt.subplots_adjust(bottom=1,right=2,top=3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### The above shows CLASS and NT_EFC195 are categorical variables. CLASS is uniformly distributed and NT_EFC195 is not equally distributed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Density Plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Train.plot(kind='density', subplots=True, layout=(4,3), figsize=(10,10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Values for AS_FUK010112, CT_RACS820104,FULL_GEOR030101 and FULL_AURR980107 lie close to zero compared to the rest of the variables." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Preparing data for machine learning" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data transformation using minMaxScaler to set all values between 0 and 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##Rescaling \n", + "from numpy import set_printoptions\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "###rescaling the data\n", + "array = Train.values\n", + "# separate array into input and output components\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "##Scaling the data so that it's within the range of 0 and 1\n", + "scaler = MinMaxScaler(feature_range=(0, 1))\n", + "rescaledX = scaler.fit_transform(X)\n", + "# summarize transformed data\n", + "set_printoptions(precision=3) #number of decimal points\n", + "print(rescaledX[0:11,:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Feature selection\n", + "> `I have used Recursive Feature Elimination (RFE)`\n", + "`RFE` works by recursively removing attributes and building a model on those that remain. It uses the model accuracy to indentify the attributes, and their combinations, that contribute the most to predicting the target attribute." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#feature selection using recursive feature elimination\n", + "\n", + "from sklearn.feature_selection import RFE\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# feature extraction\n", + "model = LogisticRegression()\n", + "rfe = RFE(model, 4)\n", + "\n", + "#I chose RFE because it eliminates worst performing features\n", + "fit = rfe.fit(rescaledX, Y)\n", + "print(\"Num Features: \", fit.n_features_)\n", + "print(\"Selected Features:\", fit.support_)\n", + "print(\"Feature Ranking: \", fit.ranking_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### I tried to test for the performance of the model with more features and the results hardly changed.\n", + "#I used 11 then 7,and finally 4 features. I selected the features ranndomly.\n", + "#As a trade off for faster performance , I decided to go with 4 features.\n", + "* #Accuracy at 11 =91.72482552342971\n", + "* #Accuracy at 7 =91.72482552342971\n", + "* #Accuracy at 4 =91.72482552342971" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### #extracting features of interest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rescaledX[:,fit.support_] \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Logistic Regression on the rescaled data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Splitting data into Train and Test Sets\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression \n", + "\n", + "test_size = 0.33 #Size of the test data\n", + "seed = 7\n", + "rescaledX_train, rescaledX_test, Y_train, Y_test = train_test_split(rescaledX, Y, test_size=test_size,\n", + "random_state=seed)\n", + "model = LogisticRegression() #Using Logistic Regression\n", + "model.fit(rescaledX_train, Y_train)\n", + "result = model.score(rescaledX_test, Y_test)\n", + "print(\"Accuracy: \", (result*100.0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Logistic Regression on unscaled data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "###Algorithm used on unscaled data\n", + "#Using Logistic Regression\n", + "#Splitting data into Train and Test Sets\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression \n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "test_size = 0.33 #Size of the test data\n", + "seed = 7\n", + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,\n", + "random_state=seed)\n", + "model = LogisticRegression() #Using Logistic Regression\n", + "model.fit(X_train, Y_train)\n", + "result = model.score(X_test, Y_test)\n", + "print(\"Accuracy: \", (result*100.0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Rescaled data gives accuracy of 91.72482552342971 and un scaled data with all the features gives 91.92422731804587. I have used unscaled data for the rest of the algorithms down" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##Use the test dataset to see how the aligorithm is performing\n", + "out = model.predict(Test.values)\n", + "\n", + "out1 = pd.DataFrame(out) #Converting to data frame\n", + "out1.columns=[\"CLASS\"] #Naming the column\n", + "out1.index.name=\"Index\" #Creating a column index\n", + "out1[\"CLASS\"]=out1[\"CLASS\"].map({0.0:False,1.0:True}) # Chaninging 0 to \"False\" 1 to \"True\"\n", + "\n", + "out1.to_csv(\"talz_csv3\") ## Writing a csv file\n", + "print(out1['CLASS'].unique())\n", + "print(out1['CLASS'].nunique())\n", + "\n", + "#printing the numbers of False and True\n", + "print(out1.groupby('CLASS').size()[0].sum()) #\n", + "print(out1.groupby('CLASS').size()[1].sum()) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### False has 383 instances and True has 375 instances" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Naive Bayes Algorithm\n", + "\n", + "### Assumes that all features are independent of each other and each feature contributes equally to the resulting class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "from sklearn.naive_bayes import GaussianNB\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = GaussianNB()\n", + "model.fit(X, Y)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##Use the test dataset to see how the aligorithm is performing\n", + "f = model.predict(Test.values)\n", + "\n", + "f1 = pd.DataFrame(f) #Converting to data frame\n", + "f1.columns=[\"CLASS\"] #Naming the column\n", + "f1.index.name=\"Index\" #Creating a column index\n", + "f1[\"CLASS\"]=f1[\"CLASS\"].map({0.0:False,1.0:True}) # Chaninging 0 to \"False\" 1 to \"True\"\n", + "\n", + "f1.to_csv(\"talz_csv8\") ## Writing a csv file\n", + "print(f1['CLASS'].unique())\n", + "print(f1['CLASS'].nunique())\n", + "\n", + "#printing the numbers of False and True\n", + "print(f1.groupby('CLASS').size()[0].sum()) #\n", + "print(f1.groupby('CLASS').size()[1].sum()) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Classiffication and Regression Trees" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Classiffication and Regression Trees\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = DecisionTreeClassifier()\n", + "model.fit(X, Y)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fd = model.predict(Test.values)\n", + "\n", + "fd1 = pd.DataFrame(fd) #Converting to data frame\n", + "fd1.columns=[\"CLASS\"] #Naming the column\n", + "fd1.index.name=\"Index\" #Creating a column index\n", + "fd1[\"CLASS\"]=fd1[\"CLASS\"].map({0.0:False,1.0:True}) # Chaninging 0 to \"False\" 1 to \"True\"\n", + "\n", + "fd1.to_csv(\"talz_csv9\") ## Writing a csv file\n", + "print(fd1['CLASS'].unique())\n", + "print(fd1['CLASS'].nunique())\n", + "\n", + "#printing the numbers of False and True\n", + "print(fd1.groupby('CLASS').size()[0].sum()) #\n", + "print(fd1.groupby('CLASS').size()[1].sum()) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Support Vector Machines (SVM)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.svm import SVC\n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = SVC()\n", + "model.fit(X, Y)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "svm = model.predict(Test.values)\n", + "\n", + "svm1 = pd.DataFrame(svm) #Converting to data frame\n", + "svm1.columns=[\"CLASS\"] #Naming the column\n", + "svm1.index.name=\"Index\" #Creating a column index\n", + "svm1[\"CLASS\"]=svm1[\"CLASS\"].map({0.0:False,1.0:True}) # Chaninging 0 to \"False\" 1 to \"True\"\n", + "\n", + "svm1.to_csv(\"talz_csv10\") ## Writing a csv file\n", + "print(svm1['CLASS'].unique())\n", + "print(svm1['CLASS'].nunique())\n", + "\n", + "#printing the numbers of False and True\n", + "print(svm1.groupby('CLASS').size()[0].sum()) #\n", + "print(svm1.groupby('CLASS').size()[1].sum()) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Linear Discriminant Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = LinearDiscriminantAnalysis()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## K-Nearest Neighbors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsRegressor\n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = KNeighborsRegressor()\n", + "scoring = 'neg_mean_squared_error'\n", + "results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ridge Regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import Ridge\n", + "\n", + "array = Train.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = Ridge()\n", + "scoring = 'neg_mean_squared_error'\n", + "results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Random Forest Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "array = Train.values\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "num_trees = 1000\n", + "\n", + "max_features = 3\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Stochastic Gradient Descent - SGD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Stochastic Gradient Boosting Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "array = Train.values\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "seed = 7\n", + "num_trees = 100\n", + "\n", + "kfold = KFold(n_splits=10, random_state=seed)\n", + "model = GradientBoostingClassifier(n_estimators=num_trees, random_state=seed)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## XGB" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Stochastic X Gradient Boosting Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from xgboost import XGBClassifier\n", + "\n", + "array = Train.values\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "seed = 7\n", + "num_trees = 100\n", + "\n", + "kfold = KFold(n_splits=10, random_state=seed)\n", + "model = XGBClassifier(n_estimators=num_trees, random_state=seed)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## # Comparing the Algorithms used" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from matplotlib import pyplot\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "from sklearn.naive_bayes import GaussianNB\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from xgboost import XGBClassifier\n", + "# load dataset\n", + "\n", + "array = Train.values\n", + "\n", + "#split the dataset \n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "# prepare models and add them to a list\n", + "models = []\n", + "models.append(('LR', LogisticRegression()))\n", + "models.append(('CART', DecisionTreeClassifier()))\n", + "models.append(('NB', GaussianNB()))\n", + "models.append(('SVM', SVC()))\n", + "models.append(('LDA', LinearDiscriminantAnalysis()))\n", + "models.append(('DTC', DecisionTreeClassifier()))\n", + "models.append(('KNN', KNeighborsClassifier()))\n", + "models.append(('RFC', RandomForestClassifier()))\n", + "models.append(('GBC', GradientBoostingClassifier()))\n", + "models.append(('XGB', XGBClassifier()))\n", + "# evaluate each model in turn\n", + "results = []\n", + "names = []\n", + "scoring = 'accuracy'\n", + "\n", + "for name, model in models:\n", + " kfold = KFold(n_splits=10, random_state=7)\n", + " cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", + " results.append(cv_results)\n", + " names.append(name)\n", + " msg = (name, cv_results.mean(), cv_results.std())\n", + " print(msg)\n", + "\n", + "# boxplot algorithm comparison\n", + "fig = pyplot.figure()\n", + "fig.suptitle('Algorithm Comparison')\n", + "ax = fig.add_subplot(111)\n", + "pyplot.boxplot(results)\n", + "ax.set_xticklabels(names)\n", + "pyplot.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " ### Naive Bayes gives the best accuarcy. However, other metrics have to be put in place to measure performance of a model such as Matthews correlation coefficient, area under the ROC curve or logarithmic loss since a single parameter is not enough to measure model performance and thus help in choosing the best model" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}